RecommenderFilterSalaryResult
package org.andy.mymahout.recommendation.job; import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set; import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.cf.taste.recommender.RecommendedItem; public class RecommenderFilterSalaryResult { final static int NEIGHBORHOOD_NUM = 2;
final static int RECOMMENDER_NUM = 3; public static void main(String[] args) throws TasteException, IOException {
String file = "datafile/job/pv.csv";
DataModel dataModel = RecommendFactory.buildDataModelNoPref(file);
RecommenderBuilder rb1 = RecommenderEvaluator.userCityBlock(dataModel);
RecommenderBuilder rb2 = RecommenderEvaluator.itemLoglikelihood(dataModel); Map<Long, Double> averSalary = getAverSalary("datafile/job/job.csv", dataModel); LongPrimitiveIterator iter = dataModel.getUserIDs();
while (iter.hasNext()) {
long uid = iter.nextLong();
System.out.print("userCityBlock =>");
filterSalary(uid, rb1, dataModel, averSalary);
System.out.print("itemLoglikelihood=>");
filterSalary(uid, rb2, dataModel, averSalary);
}
} public static void filterSalary(long uid, RecommenderBuilder recommenderBuilder, DataModel dataModel, Map<Long, Double> averSalary) throws TasteException, IOException {
Set<Long> jobids = getSalaryJobID(uid, "datafile/job/job.csv", averSalary);
IDRescorer rescorer = new JobRescorer(jobids);
List<RecommendedItem> list = recommenderBuilder.buildRecommender(dataModel).recommend(uid, RECOMMENDER_NUM, rescorer);
RecommendFactory.showItems(uid, list, false);
} public static Set<Long> getSalaryJobID(long uid, String file, Map<Long, Double> averSalary) throws IOException {
BufferedReader br = new BufferedReader(new FileReader(new File(file)));
Set<Long> jobids = new HashSet<Long>();
String s = null;
while ((s = br.readLine()) != null) {
String[] cols = s.split(",");
double salary = Double.valueOf(cols[2]);
if (salary < averSalary.get(uid)) {
jobids.add(Long.parseLong(cols[0]));
}
}
br.close();
return jobids;
} // 获取每个用户的基准比较工资:aver(浏览过的工资)*0.8
public static Map<Long, Double> getAverSalary(String file, DataModel dataModel)
throws NumberFormatException, IOException, TasteException{
Map<Long, Double> salaries = new HashMap<Long, Double>();
BufferedReader br = new BufferedReader(new FileReader(new File(file)));
String s = null;
while ((s = br.readLine()) != null) {
String[] cols = s.split(",");
salaries.put(Long.parseLong(cols[0]), Double.valueOf(cols[2]));
}
br.close(); Map<Long, Double> averSalaries = new HashMap<Long, Double>();
LongPrimitiveIterator iter = dataModel.getUserIDs();
while (iter.hasNext()) {
long uid = iter.nextLong();
FastIDSet items = dataModel.getItemIDsFromUser(uid);
LongPrimitiveIterator itemsIter = items.iterator();
double sum = 0;
int count = 0;
double aver = 0.0;
while (itemsIter.hasNext()) {
long item = itemsIter.nextLong();
double salary = salaries.get(item);
sum += salary;
count ++;
}
if(count > 0) aver = 0.8*sum/count;
averSalaries.put(uid, aver);
}
return averSalaries;
} }
RecommenderFilterSalaryResult的更多相关文章
- 转】用Mahout构建职位推荐引擎
原博文出自于: http://blog.fens.me/hadoop-mahout-recommend-job/ 感谢! 用Mahout构建职位推荐引擎 Hadoop家族系列文章,主要介绍Hadoop ...
随机推荐
- Linux mint
最近一直在配置vim, 今天终于配的差不多了,拿出来晒晒,^_^ . 附上一段Linux Mint 的简介(来自Wiki). Linux Mint是一种基于Ubuntu开发出的Linux操作系统.由L ...
- Video Brightness Enhancement
Tone Mapping原是摄影学中的一个术语,因为打印相片所能表现的亮度范围不足以表现现实世界中的亮度域,而如果简单的将真实世界的整个亮度域线性压缩到照片所能表现的亮度域内,则会在明暗两端同时丢失很 ...
- 【leetcode】Balanced Binary Tree
Given a binary tree, determine if it is height-balanced. For this problem, a height-balanced binary ...
- python编写脚本应用实例
这里主要记录工作中应用python编写脚本的实例.由于shell脚本操作数据库(增.删.改.查)并不是十分直观方便,故这里采用python监控mysql状态,然后将状态保存到数据库中,供前台页面进行调 ...
- react项目中antd组件库的使用需要注意的问题
antd是蚂蚁金服推出的ui组件库,给我们在react项目开发中提供了大大的便利.但在使用的过程中,或多或少的会遇到一些问题,毕竟,用的是别人的东西,就得遵守别人的规则嘛!官方文档:https://a ...
- CV2图像操作
一.读入图像使用函数cv2.imread(filepath,flags)读入一副图片filepath:要读入图片的完整路径flags:读入图片的标志 cv2.IMREAD_COLOR:默认参数,读入一 ...
- BZOJ 3296 [USACO2011 Open] Learning Languages:并查集
题目链接:http://www.lydsy.com/JudgeOnline/problem.php?id=3296 题意: 农夫约翰的N(2 <= N <= 10,000)头奶牛,编号为1 ...
- Linux-awk command
简介 awk是一个强大的文本分析工具,相对于grep的查找,sed的编辑,awk在其对数据分析并生成报告时,显得尤为强大.简单来说awk就是把文件逐行的读入,以空格为默认分隔符将每行切片,切开的部分再 ...
- 数学建模--matlab基础知识
虽然python也能做数据分析,不过参加数学建模,咱还是用专业的 1. Matlab-入门篇:Hello world! 程序员入门第一式: disp(‘hello world!’) 2. 基本运算 先 ...
- 【遍历二叉树】09判断二叉树是否关于自己镜像对称【Symmetric Tree】
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 给定一个二叉树,判断是否他自己的镜 ...