转:TopN推荐系统——推荐的实现与推荐效果的评价指标
转自:用户推荐系统_python 代码-豆瓣
书籍:项亮的<推荐系统实践>
import random
import math class UserBasedCF:
def __init__(self,train = None,test = None):
self.trainfile = train
self.testfile = test
self.readData() def readData(self,train = None,test = None):
self.trainfile = train or self.trainfile
self.testfile = test or self.testfile
self.traindata = {}
self.testdata = {}
for line in open(self.trainfile):
userid,itemid,record,_ = line.split()
self.traindata.setdefault(userid,{})
self.traindata[userid][itemid]=record
for line in open(self.testfile):
userid,itemid,record,_ = line.split()
self.testdata.setdefault(userid,{})
self.testdata[userid][itemid]=record def userSimilarityBest(self,train = None):
train = train or self.traindata
self.userSimBest = dict()
item_users = dict()
for u,item in train.items():
for i in item.keys():
item_users.setdefault(i,set())
item_users[i].add(u)
user_item_count = dict()
count = dict()
for item,users in item_users.items():
for u in users:
user_item_count.setdefault(u,0)
user_item_count[u] += 1
for v in users:
if u == v:continue
count.setdefault(u,{})
count[u].setdefault(v,0)
count[u][v] += 1
for u ,related_users in count.items():
self.userSimBest.setdefault(u,dict())
for v, cuv in related_users.items():
self.userSimBest[u][v] = cuv / math.sqrt(user_item_count[u] * user_item_count[v] * 1.0) def recommend(self,user,train = None,k = 8,nitem = 40):
train = train or self.traindata
rank = dict()
interacted_items = train.get(user,{})
for v ,wuv in sorted(self.userSimBest[user].items(),key = lambda x : x[1],reverse = True)[0:k]:#获取与user相似度最高的k个用户
for i , rvi in train[v].items():
if i in interacted_items:
continue #只选择user没有评分过的物品进行推荐
rank.setdefault(i,0)#设置初始值,以便做下面的累加运算
rank[i] += wuv #书中为rank[i] +=rvi*wuv
return dict(sorted(rank.items(),key = lambda x :x[1],reverse = True)[0:nitem])#用sorted方法对推荐的物品进行排序,预计评分高的排在前面,再取其中nitem个,nitem为每个用户推荐的物品数量 def recallAndPrecision(self,train = None,test = None,k = 8,nitem = 10):
train = train or self.traindata
test = test or self.testdata
hit = 0
recall = 0
precision = 0
for user in train.keys():
tu = test.get(user,{})#如果测试集中没有这个用户,则将tu初始化为空,避免test[user]报错
rank = self.recommend(user, train = train,k = k,nitem = nitem)
for item,_ in rank.items():
if item in tu:
hit += 1
recall += len(tu)
precision += nitem
return (hit / (recall * 1.0),hit / (precision * 1.0)) def coverage(self,train = None,test = None,k = 8,nitem = 10):
train = train or self.traindata
test = test or self.testdata
recommend_items = set()
all_items = set()
for user in train.keys():
for item in train[user].keys():
all_items.add(item)
rank = self.recommend(user, train, k = k, nitem = nitem)
for item,_ in rank.items():
recommend_items.add(item)
return len(recommend_items) / (len(all_items) * 1.0) def popularity(self,train = None,test = None,k = 8,nitem = 10):
train = train or self.traindata
test = test or self.testdata
item_popularity = dict()
for user ,items in train.items():
for item in items.keys():
item_popularity.setdefault(item,0)
item_popularity[item] += 1
ret = 0
n = 0
for user in train.keys():
rank = self.recommend(user, train, k = k, nitem = nitem)
for item ,_ in rank.items():
ret += math.log(1+item_popularity[item])
n += 1
return ret / (n * 1.0) def testUserBasedCF():
train = 'u1.base'
test = 'u1.test'
cf = UserBasedCF(train,test)
cf.userSimilarityBest()
print("%3s%20s%20s%20s%20s" % ('K',"precision",'recall','coverage','popularity'))
for k in [5,10,20,40,80,160]:
recall,precision = cf.recallAndPrecision( k = k)
coverage = cf.coverage(k = k)
popularity = cf.popularity(k = k)
print("%3d%19.3f%%%19.3f%%%19.3f%%%20.3f" % (k,precision * 100,recall * 100,coverage * 100,popularity)) if __name__ == "__main__":
testUserBasedCF() 基于项目的推荐系统,IBCF: '''
Created on 2013-10-10 @author: Administrator
'''
import random
import math class KNN:
def __init__(self,train = None,test = None):
self.trainfile = train
self.testfile = test
self.readData() def readData(self,train = None,test = None):
self.trainfile = train or self.trainfile
self.testfile = test or self.testfile
self.traindata = {}
self.testdata = {}
for line in open(self.trainfile):
userid,itemid,record,_ = line.split()
self.traindata.setdefault(userid,{})
self.traindata[userid][itemid]=record
for line in open(self.testfile):
userid,itemid,record,_ = line.split()
self.testdata.setdefault(userid,{})
self.testdata[userid][itemid]=record def ItemSim(self,train = None):
train = train or self.traindata
ItemSimcount = dict()
Item_count = dict()
for _,items in train.items():
for itemidi in items.keys():
Item_count.setdefault(itemidi,0)
Item_count[itemidi] += 1
for itemidj in items.keys():
if itemidi == itemidj:
continue
ItemSimcount.setdefault(itemidi,{})
ItemSimcount[itemidi].setdefault(itemidj,0)
ItemSimcount[itemidi][itemidj] +=1
self.ItemSimlist = dict()
for itemidi, related_item in ItemSimcount.items():
self.ItemSimlist.setdefault(itemidi,{})
for itemidj,wij in related_item.items():
self.ItemSimlist[itemidi].setdefault(itemidj,0)
self.ItemSimlist[itemidi][itemidj] = wij/math.sqrt(Item_count[itemidi]*Item_count[itemidj]*1.0) def recommend(self,user,train = None,k = 5,nitem = 10):
train = train or self.traindata
recommendlist = dict()
User_Itemlist = train.get(user,{})
for i,ri in User_Itemlist.items():
for j,wij in sorted(self.ItemSimlist[i].items(),key = lambda x:x[1],reverse = True)[0:k]:
if j in User_Itemlist:
continue
recommendlist.setdefault(j,0)
recommendlist[j] += float(ri)*wij
return dict(sorted(recommendlist.items(),key = lambda x :x[1],reverse = True)[0:nitem]) def recallAndPrecision(self,train = None,test = None,k = 5,nitem = 10):
train = train or self.traindata
test = test or self.testdata
hit = 0
recall = 0
precision = 0
for user in train.keys():
tu = test.get(user,{})
rank = self.recommend(user, train = train,k = k,nitem = nitem)
for item,_ in rank.items():
if item in tu:
hit += 1
recall += len(tu)
precision += nitem
return (hit / (recall * 1.0),hit / (precision * 1.0)) def coverage(self,train = None,test = None,k = 5,nitem = 10):
train = train or self.traindata
test = test or self.testdata
recommend_items = set()
all_items = set()
for user in train.keys():
for item in train[user].keys():
all_items.add(item)
rank = self.recommend(user, train, k = k, nitem = nitem)
for item,_ in rank.items():
recommend_items.add(item)
return len(recommend_items) / (len(all_items) * 1.0) def popularity(self,train = None,test = None,k = 5,nitem = 10):
train = train or self.traindata
test = test or self.testdata
item_popularity = dict()
for user ,items in train.items():
for item in items.keys():
item_popularity.setdefault(item,0)
item_popularity[item] += 1
ret = 0
n = 0
for user in train.keys():
rank = self.recommend(user, train, k = k, nitem = nitem)
for item ,_ in rank.items():
if item in item_popularity:
ret += math.log(1+item_popularity[item])
n += 1
return ret / (n * 1.0) def testKNNCF():
train = 'u1.base'
test = 'u1.test'
cf = KNN(train,test)
cf.ItemSim()
print("%3s%20s%20s%20s%20s" % ('K',"precision",'recall','coverage','popularity'))
for k in [5,10,20,40,80,160]:
recall,precision = cf.recallAndPrecision( k = k)
coverage = cf.coverage(k = k)
popularity = cf.popularity(k = k)
print("%3d%19.3f%%%19.3f%%%19.3f%%%20.3f" % (k,precision * 100,recall * 100,coverage * 100,popularity)) if __name__ == "__main__":
testKNNCF()
转:TopN推荐系统——推荐的实现与推荐效果的评价指标的更多相关文章
- 新闻推荐系统:基于内容的推荐算法(Recommender System:Content-based Recommendation)
https://blog.csdn.net/qq_32690999/article/details/77434381 因为开发了一个新闻推荐系统的模块,在推荐算法这一块涉及到了基于内容的推荐算法(Co ...
- 推荐系统之基于图的推荐:基于随机游走的PersonalRank算法
转自http://blog.csdn.net/sinat_33741547/article/details/53002524 一 基本概念 基于图的模型是推荐系统中相当重要的一种方法,以下内容的基本思 ...
- linux学习书籍推荐linux学习书籍推荐
引用地址:http://www.cnblogs.com/notepi/archive/2013/06/15/3137103.html Linux 学习书目推荐 Linux基础 1.<Linux与 ...
- 推荐算法之用户推荐(UserCF)和物品推荐(ItemCF)对比
一.定义 UserCF:推荐那些和他有共同兴趣爱好的用户喜欢的物品 ItemCF:推荐那些和他之前喜欢的物品类似的物品 根据用户推荐重点是反应和用户兴趣相似的小群体的热点,根据物品推荐着重与用户过去的 ...
- python书籍推荐:python编码推荐(高清完整pdf)
目录INF-qa Python 编码规范................................................................................ ...
- 如何用ABP框架快速完成项目(11) - ABP只要加人即可马上加快项目进展- 全栈篇(2) - 不推荐模块组件化, 推荐微服务
一个人写代码不需要担心会和别人的代码冲突, 不需要做代码合并, 不需要担心自己的代码被覆盖. 但是多个人一起写代码就需要担心这些问题. 解决这些问题的方法很多, 比如用AzureDevOps(TF ...
- SparkML之推荐引擎(二)---推荐模型评估
本文内容和代码是接着上篇文章来写的,推荐先看一下哈~ 我们上一篇文章是写了电影推荐的实现,但是推荐内容是否合理呢,这就需要我们对模型进行评估 针对推荐模型,这里根据 均方差 和 K值平均准确率 来对模 ...
- JS 浮点型计算的精度问题 推荐的js 库 推荐的类库 Numeral.js 和 accounting.js
推荐的类库 Numeral.js 和 accounting.js 文章来自 http://www.css88.com/archives/7324#more-7324
- mac软件推荐及chrome插件推荐
通用软件 Alfred (超级好用的效率工具) 用mac这个软件一定要装,用习惯之后加上电脑本身的快捷键.效率提升的飞起. Alfred我常使用的功能有: 搜索chrome的书签 我搜索的书签大概分为 ...
随机推荐
- java enum类
1.可以在enum中添加变量和方法 先来看一段代码示例: ? 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 ...
- JaxWsProxyFactoryBean 与 JaxWsDynamicClientFactory
1. JaxWsProxyFactoryBean 简介:调用方式采用了和RMI类似的机制,即客户端直接调用服务器端提供的服务接口(interface),CXF通过运行时代理生成远程服务的代理对象 ...
- HDU 2007
/*杭电ACM ID:2007*/ #define _CRT_SECURE_NO_WARNINGS #include <stdio.h> int main() { int in1, in2 ...
- 【C语言入门教程】2.8 C 语言的预处理命令
预处理命令是在程序编译阶段进行执行的命令,用于编译与特定环境相关的可执行文件.预处理命令扩展了 C 语言,本节将选择其中一些常用的预处理命令进行讲解. 2.8.1 宏替换命令 宏替换命令的作用类似于对 ...
- indexPathForCell returns nil since ios7
-(UITableViewCell*)GetCellFromTableView:(UITableView*)tableView Sender:(id)sender { CGPoint pos = [s ...
- javascript 正则表达式使用
切记:js 正则表达式无需用双引号,正则表达式不是字符串. 参考网址:http://www.w3school.com.cn/jsref/jsref_obj_regexp.asp 个人用于查找字条串匹配 ...
- 【转】phpcms基础内容
<?php 思路: 一.目前在企业中使用比较多的cms内容管理有如下几种: 1.dedecms 2.phpcms 二.我们选择学习v9版本的phpcms,主要有以下几点原因: 1.基于MVC模式 ...
- Codeforces 271 Div 2 A Keyboard
题目链接:http://codeforces.com/contest/474/problem/A 解题报告:一个矩形的键盘,上面只有规定的字符,现在按的时候总是会向某个方向按偏,也就是输入一串字符后, ...
- Oracle 恢复被删除的数据,解决误操作删除数据
在删除数据的时候不小心,把delete语句执行错了,把别的表给delete,而且还执行了commit!真汗.......数据是相当的重要........废话少说了!赶快找方法吧: 第一种: 1.打开F ...
- HTTP头部详解
因为之后的HTTP头注入要学习这些所以就看了.觉得很不错,算是学习前的科普. <HTTP头部详解>转载自:http://www.cnblogs.com/lcamry/p/5763040.h ...