package org.andy.mymahout.recommendation.job;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set; import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.cf.taste.recommender.RecommendedItem; public class RecommenderFilterSalaryResult { final static int NEIGHBORHOOD_NUM = 2;
final static int RECOMMENDER_NUM = 3; public static void main(String[] args) throws TasteException, IOException {
String file = "datafile/job/pv.csv";
DataModel dataModel = RecommendFactory.buildDataModelNoPref(file);
RecommenderBuilder rb1 = RecommenderEvaluator.userCityBlock(dataModel);
RecommenderBuilder rb2 = RecommenderEvaluator.itemLoglikelihood(dataModel); Map<Long, Double> averSalary = getAverSalary("datafile/job/job.csv", dataModel); LongPrimitiveIterator iter = dataModel.getUserIDs();
while (iter.hasNext()) {
long uid = iter.nextLong();
System.out.print("userCityBlock =>");
filterSalary(uid, rb1, dataModel, averSalary);
System.out.print("itemLoglikelihood=>");
filterSalary(uid, rb2, dataModel, averSalary);
}
} public static void filterSalary(long uid, RecommenderBuilder recommenderBuilder, DataModel dataModel, Map<Long, Double> averSalary) throws TasteException, IOException {
Set<Long> jobids = getSalaryJobID(uid, "datafile/job/job.csv", averSalary);
IDRescorer rescorer = new JobRescorer(jobids);
List<RecommendedItem> list = recommenderBuilder.buildRecommender(dataModel).recommend(uid, RECOMMENDER_NUM, rescorer);
RecommendFactory.showItems(uid, list, false);
} public static Set<Long> getSalaryJobID(long uid, String file, Map<Long, Double> averSalary) throws IOException {
BufferedReader br = new BufferedReader(new FileReader(new File(file)));
Set<Long> jobids = new HashSet<Long>();
String s = null;
while ((s = br.readLine()) != null) {
String[] cols = s.split(",");
double salary = Double.valueOf(cols[2]);
if (salary < averSalary.get(uid)) {
jobids.add(Long.parseLong(cols[0]));
}
}
br.close();
return jobids;
} // 获取每个用户的基准比较工资:aver(浏览过的工资)*0.8
public static Map<Long, Double> getAverSalary(String file, DataModel dataModel)
throws NumberFormatException, IOException, TasteException{
Map<Long, Double> salaries = new HashMap<Long, Double>();
BufferedReader br = new BufferedReader(new FileReader(new File(file)));
String s = null;
while ((s = br.readLine()) != null) {
String[] cols = s.split(",");
salaries.put(Long.parseLong(cols[0]), Double.valueOf(cols[2]));
}
br.close(); Map<Long, Double> averSalaries = new HashMap<Long, Double>();
LongPrimitiveIterator iter = dataModel.getUserIDs();
while (iter.hasNext()) {
long uid = iter.nextLong();
FastIDSet items = dataModel.getItemIDsFromUser(uid);
LongPrimitiveIterator itemsIter = items.iterator();
double sum = 0;
int count = 0;
double aver = 0.0;
while (itemsIter.hasNext()) {
long item = itemsIter.nextLong();
double salary = salaries.get(item);
sum += salary;
count ++;
}
if(count > 0) aver = 0.8*sum/count;
averSalaries.put(uid, aver);
}
return averSalaries;
} }

RecommenderFilterSalaryResult的更多相关文章

  1. 转】用Mahout构建职位推荐引擎

    原博文出自于: http://blog.fens.me/hadoop-mahout-recommend-job/ 感谢! 用Mahout构建职位推荐引擎 Hadoop家族系列文章,主要介绍Hadoop ...

随机推荐

  1. 【八】MongoDB管理之分片集群实践

    MongoDB中集群有三种:主从复制.副本集.分片集群.目前副本集已经替代主从复制架构,成为官方建议采用的架构,而分片集群相较于前两种,更加复杂. 下面是生产环境中常用的分片集群架构: 我们知道,分片 ...

  2. CSS3滑块菜单

    在线演示 本地下载

  3. Spring Cloud之Feigin客户端重构思想

    应该重构接口信息(重点) toov5-parent  存放共同依赖信息 toov5-api       api的只有接口没有实现 toov5-api-member toov5-api-order to ...

  4. Android 基础-3.0 数据存储方式

    Android几种数据存储方式 文件存储 SharedPreference存储 Json解析 SQLite数据库存储 文件存储 文件存储是Android中最基本的一种存储方式,和Java中实现I/O的 ...

  5. DELPHI中四种EXCEL访问技术实现

    一.引言 EXCEL在处理中文报表时功能非常强大,EXCEL报表访问也是信息系统开发中的一个重要内容,本文总结以往开发中所用到的几中EXCEL文件访问方法,在实际工作中也得到了很好的验证,本文列举了其 ...

  6. Python基础之元组操作

    元组的常用操作包括但不限于以下操作: 元组的索引,计数等 这里将对列表的内置操作方法进行总结归纳,重点是以示例的方式进行展示. 使用type获取创建对象的类 type(tuple) 使用dir获取类的 ...

  7. 浏览器访问web站点原理图

    启动tomcat,在浏览器中输入http://localhost:8080/web_kevin/hello.html,发生的事情如下: 1.浏览器解析主机名,即解析localhost.浏览器首先会到本 ...

  8. BEC listen and translation exercise 34

    In a busy classroom filled with nearly 20 children, Sabriye Tenberken lectures her pupils to always ...

  9. 音频压缩(Speex使用&Opus简介)--转

    博客地址:http://blog.csdn.net/kevindgk GitHub地址:https://github.com/KevinDGK/MyAudioDemo 一简介 二局域网语音配置 三Sp ...

  10. Gym 101142G : Gangsters in Central City(DFS序+LCA+set)

    题意:现在有一棵树,1号节点是水源,叶子节点是村庄,现在有些怪兽会占领一些村庄(即只占领叶子节点),现在要割去一些边,使得怪兽到不了水源.给出怪兽占领和离开的情况,现在要割每次回答最小的割,使得怪兽不 ...