贝叶斯算法
let pointWordsList = [ {words:['周六', '公司', '一起', '聚餐', '时间'],isTrash:0}, {words:['优惠', '返利', '打折', '优惠', '金融', '理财'],isTrash:1}, {words:['喜欢', '机器学习', '一起', '研究', '欢迎', '贝叶斯', '算法', '公式'],isTrash:0}, {words:['公司', '发票', '税点', '优惠', '增值税', '打折'],isTrash:1}, {words:['北京', '今天', '雾霾', '不宜', '外出', '时间', '在家', '讨论', '学习'],isTrash:0}, {words:['招聘', '兼职', '日薪', '保险', '返利'],isTrash:1} ] //公式: //P(C1|A1A2A3) = P(A1A2A3|C1)*P(C1)/P(A1A2A3) //= P(A1|C1)*P(A2|C1)*P(A3|C1)*P(C1)/P(A1)*P(A2)*P(A3) function getTashPostData (data) { let trashList = data.filter((item)=>{ return item.isTrash === 1 }) return trashList } function getPC1fromDataResource (data) { let trashList = getTashPostData (data) // console.log(trashNumber) let PC1 = trashList.length/data.length return PC1 } function getSomeHelpfulData (data) { let PC1 = getPC1fromDataResource (data) let PC2 = 1 - PC1 return {PC1,PC2} } function filterArray (arr) { let newarr = [...new Set(arr)] return newarr } let newWords = ['周六','机器学习','贝叶斯','公司'] function training (data,flag) { let checkList = data.filter((item)=>{ item.words = filterArray(item.words) return item.isTrash === flag }) let interapter = [] for(let i in checkList){ interapter = interapter.concat(checkList[i].words) } let originList = interapter interapter = filterArray(interapter) let C1IstanshProb = [] for (let i in interapter){ let num = 0 for(let j in originList){ if(interapter[i]===originList[j]){ num++ } } C1IstanshProb.push({word:interapter[i],prob:num/checkList.length}) } return C1IstanshProb } let C1wordsProbList = training (pointWordsList,1) let C2wordsProbList = training (pointWordsList,0) function setNewWordsProbList (newwords,probList) { let result = [] for(let i in newwords){ let flag = false for(let j in probList){ if(newwords[i]===probList[j].word){ flag = true result.push(probList[j]) } } if(!flag){ let list = getTashPostData (pointWordsList) result.push({word:newwords[i],prob:1/(list.length+1)}) } } return result } let newC1wordsProbList = setNewWordsProbList (newWords,C1wordsProbList) let newC2wordsProbList = setNewWordsProbList (newWords,C2wordsProbList) let {PC1,PC2} = getSomeHelpfulData (pointWordsList) function checkNewPost (newC1wordsProbList,newC2wordsProbList,PC1,PC2) { let C1ProbList = [] for(let i in newC1wordsProbList){ C1ProbList.push(newC1wordsProbList[i].prob) } let reduce1 = C1ProbList.reduce((prev,curr)=>{ return prev*curr }) let C2ProbList = [] for(let i in newC2wordsProbList){ C2ProbList.push(newC2wordsProbList[i].prob) } let reduce2 = C2ProbList.reduce((prev,curr)=>{ return prev*curr }) return (reduce1*PC1)/(reduce2*PC2) } let result = checkNewPost (newC1wordsProbList,newC2wordsProbList,PC1,PC2) if(result>1){ console.log('广告邮件') }else{ console.log('正常邮件') } console.log(result)