基于评分的商品top-N推荐系统

import io  # needed because of weird encoding of u.item file

import os

from surprise import KNNBaseline

from surprise import Dataset

from surprise import get_dataset_dir

from surprise import Reader

from surprise import dump

def read_item_names(item_file_path,split_flag='\t'):

    """

    从MOVIELNEN 100-K数据集读取UE项目文件并返回两个

    映射将原始ID转换成电影名称和电影名称为原始ID。

    Read the u.item file from MovieLens 100-k dataset and return two

    mappings to convert raw ids into movie names and movie names into raw ids.

    """

    # file_name = r'C:\Users\FELIX\Desktop\surprise库源码分析\uitems.txt'

    file_name=item_file_path

    rid_to_name = {}

    name_to_rid = {}

    with io.open(file_name, 'r', encoding='utf8') as f:

        for line in f:

            line = line.split(split_flag)

            rid_to_name[line[0]] = line[1].strip()

            name_to_rid[line[1].strip()] = line[0]

    return rid_to_name, name_to_rid

save_path=os.path.expanduser(r'~/dump_file')

def train_data(user_item_score_path,split_flag='\t',user_based=False):

    # path to dataset file

    # 数据集路径

    # file_path = os.path.expanduser(r'C:\Users\FELIX\Desktop\surprise库源码分析\uuu.txt')

    file_path = os.path.expanduser(user_item_score_path)

    reader = Reader(line_format='user item rating timestamp', sep=split_flag)

    data = Dataset.load_from_file(file_path, reader=reader)

    # First, train the algortihm to compute the similarities between items

    # 首先训练算法来计算不同项目之间的相似度

    # data = Dataset.load_builtin('ml-100k')

    trainset = data.build_full_trainset()

    sim_options = {'name': 'pearson_baseline', 'user_based': user_based}

    algo = KNNBaseline(sim_options=sim_options)

    algo.fit(trainset)

    # Dump algorithm and reload it.

#     file_name = os.path.expanduser(r'C:\Users\FELIX\Desktop\surprise库源码分析\uuu.txt\dump_file')

    dump.dump(save_path, algo=algo)    # 模型保存

def get_neighbors(item_name,item_file_path,kk=10):

    _, algo = dump.load(save_path) # 模型加载

    # # Read the mappings raw id <-> movie name

    rid_to_name, name_to_rid = read_item_names(item_file_path)

#     print(name_to_rid)

    # # Retrieve inner id of the movie Toy Story

#     item_name_raw_id = name_to_rid['uitems10\n']

    item_name_raw_id = name_to_rid[item_name.strip()]

    item_name_inner_id = algo.trainset.to_inner_iid(item_name_raw_id)

    # # Retrieve inner ids of the nearest neighbors of Toy Story.

    item_name_neighbors = algo.get_neighbors(item_name_inner_id, k=kk)

    # Convert inner ids of the neighbors into names.

    item_name_neighbors = (algo.trainset.to_raw_iid(inner_id)

                           for inner_id in item_name_neighbors)

    item_name_neighbors = (rid_to_name[rid]

                           for rid in item_name_neighbors)

    return item_name_neighbors

u_i_path=r'C:\Users\FELIX\Desktop\surprise库源码分析\uuu.txt'

train_data(u_i_path)

i_path=r'C:\Users\FELIX\Desktop\surprise库源码分析\uitems.txt'

nei_items=get_neighbors('uitems685',i_path,kk=10)

for nei in nei_items:

    print(nei)

如果没有数据的话，可以随机生成测试数据：

# 自己生成数据   1000人   5000商品   1000人，随机对5000个商品中的东西进行评价，评分为1-10

import random

for n in range(4):

    for i in range(1000):

        t=int(random.random()*100)

        for j in range(t):

    #         kk=int(random.random()*200)

    #         for k in range(kk):

                item=int(random.random()*5000)

                goal=int(random.random()*10)

                with open('uu.txt','a',encoding='utf8') as f:

                    line=str(i)+'\t'+str(item)+'\t'+str(goal)+'\t'+'\n'

                    f.write(line)

# 随机打乱评分数据

with open('uu.txt','r',encoding='utf8')as f:

    data=f.readlines()

    data2=random.shuffle(data)

    with open('uuu.txt','a',encoding='utf8')as f2:

        for line in data:

            f2.write(line)

# 随机生成商品数据

with open('uitems.txt','w',encoding='utf8')as f:

    for i in range(5000):

        s=str(i)+'\t'+'uitems{}'.format(str(i))+'\n'

        f.write(s)

基于评分的商品top-N推荐系统的更多相关文章

【新鲜出炉的个人项目】基于 Flink 的商品推荐系统
FlinkCommodityRecommendationSystem Recs FlinkCommodityRecommendationSystem(基于 Flink 的商品推荐系统) 1. 前言系 ...
文献综述八：基于JAVA的商品网站的研究
一.基本信息标题:基于JAVA的商品网站的研究时间:2015 出版源:信息技术文件分类:对java语言的研究二.研究背景本文主要介绍了系统的分析,设计和开发的全部过程. 三.具体内容文献的 ...
文献综述三：基于JSP的商品信息管理系统设计与开发
一.基本信息标题:基于JSP的商品信息管理系统设计与开发时间:2015 出版源:Computer Knowledge and Technology 文件分类:jsp技术的系统开发二.研究背景通 ...
基于卷积神经网络CNN的电影推荐系统
本项目使用文本卷积神经网络,并使用MovieLens数据集完成电影推荐的任务. 推荐系统在日常的网络应用中无处不在,比如网上购物.网上买书.新闻app.社交网络.音乐网站.电影网站等等等等,有人的地方 ...
Python基于机器学习方法实现的电影推荐系统
推荐算法在互联网行业的应用非常广泛,今日头条.美团点评等都有个性化推荐,推荐算法抽象来讲,是一种对于内容满意度的拟合函数,涉及到用户特征和内容特征,作为模型训练所需维度的两大来源,而点击率,页面停留时 ...
【Machine Learning】决策树案例：基于python的商品购买能力预测系统
决策树在商品购买能力预测案例中的算法实现作者:白宁超 2016年12月24日22:05:42 摘要:随着机器学习和深度学习的热潮,各种图书层出不穷.然而多数是基础理论知识介绍,缺乏实现的深入理解.本 ...
基于神经网络的embeddding来构建推荐系统
在之前的博客中,我主要介绍了embedding用于处理类别特征的应用,其实,在学术界和工业界上,embedding的应用还有很多,比如在推荐系统中的应用.本篇博客就介绍了如何利用embedding来构 ...
基于neighborhood models(item-based) 的个性化推荐系统
文章主要介绍的是koren 08年发的论文[1], 2.2neighborhood models部分内容(其余部分会陆续补充上来). koren论文中用到netflix 数据集, 过于大, 在普通的 ...
【转】基于 Kylin 的推荐系统效果评价系统
OLAP(联机分析处理)是数据仓库的主要应用之一,通过设计维度.度量,我们可以构建星型模型或雪花模型,生成数据多维立方体Cube,基于Cube可以做钻取.切片.旋转等多维分析操作.早在十年前,SQL ...

随机推荐

AtCoder Grand Contest 034
A:如果C在D左侧,显然先让B到达终点再让A走即可,否则先判断一下A是否可以在某处超过B.也就是先判断一下起点与终点之间是否有连续的障碍,若有则无解:然后若C在D左侧输出Yes,否则判断B和D之间是否 ...
ASCII,UTF-8,Unicode字符串相互转换
#include<string> #include<windows.h> #include<vector> using namespace std; //utf8 ...
.NET Standards
.net的创始者们在一开始的时候,就意识到了他们的编程技术可以用在不通的操作系统和不同类型的cpu上.他们改进了20世纪90年代编程语言实现技术.最主要的一条是,不同的编程语言对应统一个运行时,及CL ...
.net通过网络路径下载文件至本地
获取网络文件,通过流保存文件,由于上一版存在数据丢失情况,稍微调整了以下. //网络路径文件 string pathUrl = "http://localhost:805/春风吹.mp3&q ...
数据结构与算法--递归(recursion)
递归的概念简单的说: 递归就是方法自己调用自己,每次调用时传入不同的变量.递归有助于编程者解决复杂的问题,同时可以让代码变得简洁. 递归调用机制我列举两个小案例,来帮助大家理解递归 1.打印问题 ...
css 盒子取值
盒子:当我们设置一个标签宽高时,默认设置的是盒子里面content大小. 内容盒:content 填充盒:content+padding(overflow截取的区域) 边框盒:content+padd ...
JavaIO模型--装饰者模式
JavaIO体现出装饰者的设计模式今天在学SparkRDD之前,听了一堂复习JavaIO的课,觉得讲得不错 Java的IO一直让我觉得一层一层的很麻烦,刚接触的时候,理不太清楚只知道要分解为输入输 ...
JAVA笔记整理（二），下载安装JDK
Windows平台 1.登录Oracle官方网站(http://www.oracle.com/index.html),找到下载 2.选择要下载的版本,点击JDK DOWNLOAD 3.下载文件,先勾选 ...
Django的结构
一.Django的结构二.静态文件的配置 STATIC_URL = '/static/' # HTML中使用的静态文件夹前缀 STATICFILES_DIRS = [ os.path.join(BA ...
天兔 -Lepus 慢查询分析平台配置
想要实现慢查询查询分析,需要在被监控端安装percona-toolkit工具. 1.被监控端安装软件包 yum -y install perl-IO-Socket-SSL yum -y insta ...

基于评分的商品top-N推荐系统

基于评分的商品top-N推荐系统的更多相关文章

随机推荐

热门专题