hadoop2.2.0 MapReduce求和并排序
javabean必须实现WritableComparable接口,并实现该接口的序列化,反序列话和比较方法
package com.my.hadoop.mapreduce.sort;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class InfoBean implements WritableComparable<InfoBean> {
private String account;
private double income;
private double expences;
private double surplus;
public void set(String account, double income, double expences){
this.account = account;
this.income = income;
this.expences = expences;
this.surplus = income - expences;
}
@Override
public String toString() {
return income+"\t"+expences+"\t"+surplus;
}
@Override
public void readFields(DataInput in) throws IOException {
this.account = in.readUTF();
this.income = in.readDouble();
this.expences = in.readDouble();
this.surplus = in.readDouble();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(this.account);
out.writeDouble(this.income);
out.writeDouble(this.expences);
out.writeDouble(this.surplus);
}
@Override
public int compareTo(InfoBean o) {
if (this.income == o.getIncome()) {
return this.expences > o.getExpences() ? 1 : -1;
} else {
return this.income > o.getIncome() ? -1 : 1;
}
}
public String getAccount() {
return account;
}
public void setAccount(String account) {
this.account = account;
}
public double getIncome() {
return income;
}
public void setIncome(double income) {
this.income = income;
}
public double getExpences() {
return expences;
}
public void setExpences(double expences) {
this.expences = expences;
}
public double getSurplus() {
return surplus;
}
public void setSurplus(double surplus) {
this.surplus = surplus;
}
}
先求和
package com.my.hadoop.mapreduce.sort;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class SumStep {
public static class SumMap extends Mapper<LongWritable, Text, Text, InfoBean>{
private Text k = new Text();
private InfoBean v = new InfoBean();
@Override
public void map(LongWritable key, Text value, Context context) throws java.io.IOException ,InterruptedException {
String[] fields = value.toString().split("\t");
String account = fields[0];
double in = Double.parseDouble(fields[1]);
double out = Double.parseDouble(fields[2]);
k.set(account);
v.set(account, in, out);
context.write(k, v);
}
}
public static class SumReduce extends Reducer<Text, InfoBean, Text, InfoBean>{
private InfoBean v = new InfoBean();
@Override
public void reduce(Text key, Iterable<InfoBean> value, Context context) throws java.io.IOException ,InterruptedException {
double in_sum = 0;
double out_sum = 0;
for (InfoBean bean : value) {
in_sum += bean.getIncome();
out_sum += bean.getExpences();
}
v.set("", in_sum, out_sum);
context.write(key, v);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, SumStep.class.getSimpleName());
job.setJarByClass(SumStep.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
job.setMapperClass(SumMap.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(InfoBean.class);
job.setReducerClass(SumReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(InfoBean.class);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 :1);
}
}
后排序
package com.my.hadoop.mapreduce.sort;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class SortStep {
public static class SortMap extends Mapper<LongWritable, Text, InfoBean, NullWritable>{
private InfoBean k = new InfoBean();
@Override
public void map(LongWritable key, Text value, Context context) throws java.io.IOException ,InterruptedException {
System.out.println("===="+value.toString()+"====");
String[] fields = value.toString().split("\t");
String account = fields[0];
double in = Double.parseDouble(fields[1]);
double out = Double.parseDouble(fields[2]);
k.set(account, in, out);
context.write(k, NullWritable.get());
}
}
public static class SortReduce extends Reducer<InfoBean, NullWritable, Text, InfoBean>{
private Text k = new Text();
@Override
public void reduce(InfoBean bean, Iterable<NullWritable> value, Context context) throws java.io.IOException ,InterruptedException {
k.set(bean.getAccount());
context.write(k, bean);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, SortStep.class.getSimpleName());
job.setJarByClass(SortStep.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
job.setMapperClass(SortMap.class);
job.setMapOutputKeyClass(InfoBean.class);
job.setMapOutputValueClass(NullWritable.class);
job.setReducerClass(SortReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(InfoBean.class);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 :1);
}
}
hadoop2.2.0 MapReduce求和并排序的更多相关文章
- hadoop2.2.0 MapReduce分区
package com.my.hadoop.mapreduce.partition; import java.util.HashMap;import java.util.Map; import org ...
- hadoop2.2.0 MapReduce的序列化
package com.my.hadoop.mapreduce.dataformat; import java.io.DataInput;import java.io.DataOutput;impor ...
- 【hadoop2.6.0】用C++ 编写mapreduce
hadoop通过hadoop streaming 来实现用非Java语言写的mapreduce代码. 对于一个一点Java都不会的我来说,这真是个天大的好消息. 官网上hadoop streaming ...
- 一脸懵逼学习Hadoop中的序列化机制——流量求和统计MapReduce的程序开发案例——流量求和统计排序
一:序列化概念 序列化(Serialization)是指把结构化对象转化为字节流.反序列化(Deserialization)是序列化的逆过程.即把字节流转回结构化对象.Java序列化(java.io. ...
- 国内最全最详细的hadoop2.2.0集群的MapReduce的最简单配置
简介 hadoop2的中的MapReduce不再是hadoop1中的结构已经没有了JobTracker,而是分解成ResourceManager和ApplicationMaster.这次大变革被称为M ...
- 编写简单的Mapreduce程序并部署在Hadoop2.2.0上运行
今天主要来说说怎么在Hadoop2.2.0分布式上面运行写好的 Mapreduce 程序. 可以在eclipse写好程序,export或用fatjar打包成jar文件. 先给出这个程序所依赖的Mave ...
- Hadoop2.2.0 第一步完成MapReduce wordcount计算文本数量
1.完成Hadoop2.2.0单机版环境搭建之后需要利用一个例子程序来检验hadoop2 的mapreduce的功能 //启动hdfs和yarn sbin/start-dfs.sh sbin/star ...
- 使用命令行编译打包运行自己的MapReduce程序 Hadoop2.6.0
使用命令行编译打包运行自己的MapReduce程序 Hadoop2.6.0 网上的 MapReduce WordCount 教程对于如何编译 WordCount.java 几乎是一笔带过… 而有写到的 ...
- Eclipse中部署hadoop2.3.0
1 eclipse中hadoop环境部署概览 eclipse 中部署hadoop包括两大部分:hdfs环境部署和mapreduce任务执行环境部署.一般hdfs环境部署比较简单,部署后就 可以在ecl ...
随机推荐
- [转] 用source命令执行脚本和用sh执行脚本之间的区别
from: http://blog.csdn.net/david_xtd/article/details/8012627 问题: 有很多方式可以执行脚本, 1).source test.bsh 2). ...
- Java基础知识强化之IO流笔记02:try...catch的方式处理异常
1. 案例示例: package com.himi.trycatch; public class ExceptionDemo { public static void main(String[] ar ...
- 读书笔记--用Python写网络爬虫01--网络爬虫简介
Wiki - Web crawler 百度百科 - 网络爬虫 1.1 网络爬虫何时使用 用于快速自动地获取网络信息,避免重复性的手工操作. 1.2 网络爬虫是否合法 网络爬虫目前人处于早期的蛮荒阶段, ...
- OD: Windows Kernel Debug
内核调试入门 内核程序运行在内核态,因此不能像对用户态应用程序那样来调试.关于内核调试方面的知识请参考<软件调试>这本书.目前内核调试主要有以下三种方法. 一是使用硬件调试器,它通过特定的 ...
- 第一个androidAPP项目总结—ListView的上拉和下拉
1.下拉刷新 需继承implements SwipeRefreshLayout.OnRefreshListener @Overridepublic void onRefresh() { new Wea ...
- Win7系统安装MySQL
最近重装系统,重新搭建编译环境:重装mysql,发现一篇特别好的安装博客(http://blog.csdn.net/longyuhome/article/details/7913375),转载过来,留 ...
- JavaWeb学习笔记之JSP(一)
1. JSP: 1.1. 为什么需要 JSP ? 如果使用Servlet程序来输出只有局部内容需要动态改变的网页,但是其中的静态网页内容也需要程序员使用Java语言来进行输出,这就造成了大量代码的冗余 ...
- Eclipse换背景
http://tieba.baidu.com/p/2128040173 1.打开Eclipse的Help->Eclipse Marketplace 2.在Find里搜索Eclipse Color ...
- 如何实现一个通用的IHttpHandler 万能的IHttpHandler HttpWebRequest文件上传
昨天遇到一个比较奇怪的需求,大致是需要在服务器上部署一个http服务,但是服务的具体功能不知道,以后在客服端实现.这里介绍一下系统背景,有一个系统运(部署在美国)行了很多年了,给系统产生了很多文件,现 ...
- javascript模块
//使用模块 template = {}; template.createObj = (function(){ //私有成员 var _age = ""; var _name = ...