RecommenderFilterSalaryResult
package org.andy.mymahout.recommendation.job; import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set; import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.cf.taste.recommender.RecommendedItem; public class RecommenderFilterSalaryResult { final static int NEIGHBORHOOD_NUM = 2;
final static int RECOMMENDER_NUM = 3; public static void main(String[] args) throws TasteException, IOException {
String file = "datafile/job/pv.csv";
DataModel dataModel = RecommendFactory.buildDataModelNoPref(file);
RecommenderBuilder rb1 = RecommenderEvaluator.userCityBlock(dataModel);
RecommenderBuilder rb2 = RecommenderEvaluator.itemLoglikelihood(dataModel); Map<Long, Double> averSalary = getAverSalary("datafile/job/job.csv", dataModel); LongPrimitiveIterator iter = dataModel.getUserIDs();
while (iter.hasNext()) {
long uid = iter.nextLong();
System.out.print("userCityBlock =>");
filterSalary(uid, rb1, dataModel, averSalary);
System.out.print("itemLoglikelihood=>");
filterSalary(uid, rb2, dataModel, averSalary);
}
} public static void filterSalary(long uid, RecommenderBuilder recommenderBuilder, DataModel dataModel, Map<Long, Double> averSalary) throws TasteException, IOException {
Set<Long> jobids = getSalaryJobID(uid, "datafile/job/job.csv", averSalary);
IDRescorer rescorer = new JobRescorer(jobids);
List<RecommendedItem> list = recommenderBuilder.buildRecommender(dataModel).recommend(uid, RECOMMENDER_NUM, rescorer);
RecommendFactory.showItems(uid, list, false);
} public static Set<Long> getSalaryJobID(long uid, String file, Map<Long, Double> averSalary) throws IOException {
BufferedReader br = new BufferedReader(new FileReader(new File(file)));
Set<Long> jobids = new HashSet<Long>();
String s = null;
while ((s = br.readLine()) != null) {
String[] cols = s.split(",");
double salary = Double.valueOf(cols[2]);
if (salary < averSalary.get(uid)) {
jobids.add(Long.parseLong(cols[0]));
}
}
br.close();
return jobids;
} // 获取每个用户的基准比较工资:aver(浏览过的工资)*0.8
public static Map<Long, Double> getAverSalary(String file, DataModel dataModel)
throws NumberFormatException, IOException, TasteException{
Map<Long, Double> salaries = new HashMap<Long, Double>();
BufferedReader br = new BufferedReader(new FileReader(new File(file)));
String s = null;
while ((s = br.readLine()) != null) {
String[] cols = s.split(",");
salaries.put(Long.parseLong(cols[0]), Double.valueOf(cols[2]));
}
br.close(); Map<Long, Double> averSalaries = new HashMap<Long, Double>();
LongPrimitiveIterator iter = dataModel.getUserIDs();
while (iter.hasNext()) {
long uid = iter.nextLong();
FastIDSet items = dataModel.getItemIDsFromUser(uid);
LongPrimitiveIterator itemsIter = items.iterator();
double sum = 0;
int count = 0;
double aver = 0.0;
while (itemsIter.hasNext()) {
long item = itemsIter.nextLong();
double salary = salaries.get(item);
sum += salary;
count ++;
}
if(count > 0) aver = 0.8*sum/count;
averSalaries.put(uid, aver);
}
return averSalaries;
} }
RecommenderFilterSalaryResult的更多相关文章
- 转】用Mahout构建职位推荐引擎
原博文出自于: http://blog.fens.me/hadoop-mahout-recommend-job/ 感谢! 用Mahout构建职位推荐引擎 Hadoop家族系列文章,主要介绍Hadoop ...
随机推荐
- ubuntu14.04 在自带python2.7上安装python3.3.5 可以用但是有问题
一开始写的时候并没有发现这么安装有问题,后来发现问题也不想删了,当个教训,如果想安装从python自带版本换别的版本的话就别接着看了,这么安装有问题.需要进行配置,但是我还不会.其实下面只是差了一步配 ...
- import与import static
import ......className (静态导入) 功能: 导入一个类 import static ......className.* 功能:导入这个类里的静态方法,是JDK1.5中的新特性, ...
- hd acm2025
问题:平面上有n条折线,问这些折线最多能将平面分割成多少块? 思路:像这种平面被线段分割成几部分的问题,80%用递推解决,因为n条线段与(n-1)条线段能建立联系. 你可以作图观察一下,会发现新增 ...
- hd acm1013
Problem Description(数根) The digital root of a positive integer is found by summing the digits of the ...
- Vim 标签定义
一.单个文件: m+标记字符 打上标记,如在开头行按ms(start),标记开头: 如需返回到自己的标记点,按`+标记字符就行: 二.多个文件: m+大写标记字符 如果删除了标签的行,同时也删除了标签 ...
- <再看TCP/IP第一卷>关于网络层及协议细节---ICMP协议几个要注意的地方
在TCP/IP协议族中,ICMP协议是一个介于网络层和传输层中间的一个协议,许多材料都会认为ICMP是网络层的一个部分,但是ICMP协议的报头是被包裹在IP协议之中的,而UDP协议又可以被ICMP协议 ...
- OJ 之 FATE
- FATE Crawling in process... Crawling failed Time Limit:100 ...
- C#实现对外部程序的调用操作
测试工具,首先也是一个C#的程序,它的主要目的是: 1:获取上文应用程序的窗口句柄,继而获取TextBox句柄及Button句柄: 2:为TextBox随机填入一些字符: 3:模拟点击Button: ...
- Hibernate技术
Hibernate中3个重要的类: 配置类(configuration) 负责管理Hibernate的配置信息,包含数据库连接URL.数据库用户.数据库密麻麻.数据库驱动等. 会话工厂类(Sessio ...
- 目标检测 — one-stage检测(二)
one-stage检测算法,其不需要region proposal阶段,直接产生物体的类别概率和位置坐标值,经过单次检测即可直接得到最终的检测结果,因此有着更快的检测速度,比较典型的算法如YOLO,S ...