1、概念

2、Hadoop默认分组机制--所有的Key分到一个组,一个Reduce任务处理

3、代码示例

FlowBean

package com.ares.hadoop.mr.flowgroup;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException; import org.apache.hadoop.io.WritableComparable; public class FlowBean implements WritableComparable<FlowBean>{
private String phoneNum;
private long upFlow;
private long downFlow;
private long sumFlow; public FlowBean() {
// TODO Auto-generated constructor stub
}
// public FlowBean(String phoneNum, long upFlow, long downFlow, long sumFlow) {
// super();
// this.phoneNum = phoneNum;
// this.upFlow = upFlow;
// this.downFlow = downFlow;
// this.sumFlow = sumFlow;
// } public String getPhoneNum() {
return phoneNum;
} public void setPhoneNum(String phoneNum) {
this.phoneNum = phoneNum;
} public long getUpFlow() {
return upFlow;
} public void setUpFlow(long upFlow) {
this.upFlow = upFlow;
} public long getDownFlow() {
return downFlow;
} public void setDownFlow(long downFlow) {
this.downFlow = downFlow;
} public long getSumFlow() {
return sumFlow;
} public void setSumFlow(long sumFlow) {
this.sumFlow = sumFlow;
} @Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
phoneNum = in.readUTF();
upFlow = in.readLong();
downFlow = in.readLong();
sumFlow = in.readLong();
} @Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
out.writeUTF(phoneNum);
out.writeLong(upFlow);
out.writeLong(downFlow);
out.writeLong(sumFlow);
} @Override
public String toString() {
return "" + phoneNum + "\t" + upFlow + "\t" + downFlow + "\t" + sumFlow;
} @Override
public int compareTo(FlowBean o) {
// TODO Auto-generated method stub
return sumFlow>o.getSumFlow()?-:;
} }

FlowGroup

package com.ares.hadoop.mr.flowgroup;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger; import com.ares.hadoop.mr.exception.LineException;
import com.ares.hadoop.mr.flowgroup.FlowBean;; public class FlowGroup extends Configured implements Tool {
private static final Logger LOGGER = Logger.getLogger(FlowGroup.class);
enum Counter {
LINESKIP
} public static class FlowGroupMapper extends Mapper<LongWritable, Text,
Text, FlowBean> {
private String line;
private int length;
private final static char separator = '\t'; private String phoneNum;
private long upFlow;
private long downFlow;
//private long sumFlow; private Text text = new Text();
private FlowBean flowBean = new FlowBean(); @Override
protected void map(
LongWritable key,
Text value,
Mapper<LongWritable, Text, Text, FlowBean>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
//super.map(key, value, context);
String errMsg;
try {
line = value.toString();
String[] fields = StringUtils.split(line, separator);
length = fields.length;
if (length != ) {
throw new LineException(key.get() + ", " + line + " LENGTH INVALID, IGNORE...");
} phoneNum = fields[];
upFlow = Long.parseLong(fields[length-]);
downFlow = Long.parseLong(fields[length-]);
//sumFlow = upFlow + downFlow; text.set(phoneNum);
flowBean.setPhoneNum(phoneNum);
flowBean.setUpFlow(upFlow);
flowBean.setDownFlow(downFlow);
//flowBean.setSumFlow(sumFlow); context.write(text, flowBean);
} catch (LineException e) {
// TODO: handle exception
LOGGER.error(e);
System.out.println(e);
context.getCounter(Counter.LINESKIP).increment();
return;
} catch (NumberFormatException e) {
// TODO: handle exception
errMsg = key.get() + ", " + line + " FLOW DATA INVALID, IGNORE...";
LOGGER.error(errMsg);
System.out.println(errMsg);
context.getCounter(Counter.LINESKIP).increment();
return;
} catch (Exception e) {
// TODO: handle exception
LOGGER.error(e);
System.out.println(e);
context.getCounter(Counter.LINESKIP).increment();
return;
}
}
} public static class FlowGroupReducer extends Reducer<Text, FlowBean,
FlowBean, NullWritable> { private FlowBean flowBean = new FlowBean(); @Override
protected void reduce(
Text key,
Iterable<FlowBean> values,
Reducer<Text, FlowBean, FlowBean, NullWritable>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
//super.reduce(arg0, arg1, arg2);
long upFlowCounter = ;
long downFlowCounter = ; for (FlowBean flowBean : values) {
upFlowCounter += flowBean.getUpFlow();
downFlowCounter += flowBean.getDownFlow();
}
flowBean.setPhoneNum(key.toString());
flowBean.setUpFlow(upFlowCounter);
flowBean.setDownFlow(downFlowCounter);
flowBean.setSumFlow(upFlowCounter + downFlowCounter); context.write(flowBean, NullWritable.get());
}
} @Override
public int run(String[] args) throws Exception {
// TODO Auto-generated method stub
String errMsg = "FlowGroup: TEST STARTED...";
LOGGER.debug(errMsg);
System.out.println(errMsg); Configuration conf = new Configuration();
//FOR Eclipse JVM Debug
//conf.set("mapreduce.job.jar", "flowsum.jar");
Job job = Job.getInstance(conf); // JOB NAME
job.setJobName("FlowGroup"); // JOB MAPPER & REDUCER
job.setJarByClass(FlowGroup.class);
job.setMapperClass(FlowGroupMapper.class);
job.setReducerClass(FlowGroupReducer.class); // JOB PARTITION
job.setPartitionerClass(FlowGroupPartition.class); // JOB REDUCE TASK NUMBER
job.setNumReduceTasks(); // MAP & REDUCE
job.setOutputKeyClass(FlowBean.class);
job.setOutputValueClass(NullWritable.class);
// MAP
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(FlowBean.class); // JOB INPUT & OUTPUT PATH
//FileInputFormat.addInputPath(job, new Path(args[0]));
FileInputFormat.setInputPaths(job, args[]);
FileOutputFormat.setOutputPath(job, new Path(args[])); // VERBOSE OUTPUT
if (job.waitForCompletion(true)) {
errMsg = "FlowGroup: TEST SUCCESSFULLY...";
LOGGER.debug(errMsg);
System.out.println(errMsg);
return ;
} else {
errMsg = "FlowGroup: TEST FAILED...";
LOGGER.debug(errMsg);
System.out.println(errMsg);
return ;
} } public static void main(String[] args) throws Exception {
if (args.length != ) {
String errMsg = "FlowGroup: ARGUMENTS ERROR";
LOGGER.error(errMsg);
System.out.println(errMsg);
System.exit(-);
} int result = ToolRunner.run(new Configuration(), new FlowGroup(), args);
System.exit(result);
}
}

FlowGroupPartition

package com.ares.hadoop.mr.flowgroup;

import java.util.HashMap;

import org.apache.hadoop.mapreduce.Partitioner;

public class FlowGroupPartition<KEY, VALUE> extends Partitioner<KEY, VALUE>{
private static HashMap<String, Integer> groupMap = new HashMap<String, Integer>();
static {
groupMap.put("", );
groupMap.put("", );
groupMap.put("", );
groupMap.put("", );
} @Override
public int getPartition(KEY key, VALUE value, int numPartitions) {
// TODO Auto-generated method stub
return (groupMap.get(key.toString().substring(, )) == null)?:
groupMap.get(key.toString().substring(, ));
} }

【Hadoop】Hadoop MR 自定义分组 Partition机制的更多相关文章

  1. 【Hadoop】MapReduce自定义分区Partition输出各运营商的手机号码

    MapReduce和自定义Partition MobileDriver主类 package Partition; import org.apache.hadoop.io.NullWritable; i ...

  2. 2 weekend110的hadoop的自定义排序实现 + mr程序中自定义分组的实现

    我想得到按流量来排序,而且还是倒序,怎么达到实现呢? 达到下面这种效果, 默认是根据key来排, 我想根据value里的某个排, 解决思路:将value里的某个,放到key里去,然后来排 下面,开始w ...

  3. 一脸懵逼学习Hadoop中的MapReduce程序中自定义分组的实现

    1:首先搞好实体类对象: write 是把每个对象序列化到输出流,readFields是把输入流字节反序列化,实现WritableComparable,Java值对象的比较:一般需要重写toStrin ...

  4. Hadoop自定义分组Group

    matadata: hadoop a spark a hive a hbase a tachyon a storm a redis a 自定义分组 import org.apache.hadoop.c ...

  5. Hadoop mapreduce自定义分组RawComparator

    本文发表于本人博客. 今天接着上次[Hadoop mapreduce自定义排序WritableComparable]文章写,按照顺序那么这次应该是讲解自定义分组如何实现,关于操作顺序在这里不多说了,需 ...

  6. hadoop提交作业自定义排序和分组

    现有数据如下: 3 3 3 2 3 1 2 2 2 1 1 1 要求为: 先按第一列从小到大排序,如果第一列相同,按第二列从小到大排序 如果是hadoop默认的排序方式,只能比较key,也就是第一列, ...

  7. Hadoop【MR的分区、排序、分组】

    [toc] 一.分区 问题:按照条件将结果输出到不同文件中 自定义分区步骤 1.自定义继承Partitioner类,重写getPartition()方法 2.在job驱动Driver中设置自定义的Pa ...

  8. Hadoop日记Day18---MapReduce排序分组

    本节所用到的数据下载地址为:http://pan.baidu.com/s/1bnfELmZ MapReduce的排序分组任务与要求 我们知道排序分组是MapReduce中Mapper端的第四步,其中分 ...

  9. Hadoop Mapreduce分区、分组、二次排序过程详解[转]

    原文地址:Hadoop Mapreduce分区.分组.二次排序过程详解[转]作者: 徐海蛟 教学用途 1.MapReduce中数据流动   (1)最简单的过程:  map - reduce   (2) ...

随机推荐

  1. [BZOJ3600] 没有人的算术 [重量平衡树+权值线段树]

    题面 传送门 思路 这道题目是陈立杰论文<重量平衡树和后缀平衡树在信息学奥赛中的应用 >中关于重量平衡树维护序列排名算法的一个应用 具体方法为:令根节点保存一个实数区间$[0,1]$ 若当 ...

  2. [codeforces] 498D Traffic Jams in th Land

    原题 简单的线段树问题. 对于题目中,a[i]的范围是2~6,我们仔细思考可以得出第0秒和第60秒是一样的(因为2~6的最小公倍数是60,),然后我们可以建一个线段树,里面记录0~59秒时刻开始通过这 ...

  3. vue虚拟dom原理

    Virual DOM是用JS对象记录一个dom节点的副本,当dom发生更改时候,先用虚拟dom进行diff,算出最小差异,然后再修改真实dom. vue的virtual dom的diff算法是基于sn ...

  4. HDU 1159 最长公共子序列(n*m)

    Common Subsequence Time Limit: 2000/1000 MS (Java/Others)    Memory Limit: 65536/32768 K (Java/Other ...

  5. 如何修改win10管理员账户

    首先按下win+x组合键,如下图所示   在弹出菜单选择运行,如下图所示   在运行框中输入netplwiz后点击确定按钮   将下图中要使用本计算机必须输入用户名和密码前面的勾去掉,点击下方应用按钮 ...

  6. ubuntu 解压

    .tar 解包:tar xvf FileName.tar 打包:tar cvf FileName.tar DirName (注:tar是打包,不是压缩!) ---------------------- ...

  7. cms .net webform去服务器控件标签化 pagebase新版本

    这是最近在干一个webform的cms的时候用起来的,原来虽然做过很多技术,什么remoting,wcf,webservice,可是弄来弄去,最后也没个收藏的地儿,全都放在笔记本儿上了,可是人又懒地可 ...

  8. 取代VS, sourceISight的IDE神器CLION

    https://www.jetbrains.com/clion/download/download-thanks.html 随时升级 http://idea.lanyus.com/ m_pRemoti ...

  9. Linux用户态定时器用法以及犯错总结【转】

    转自:http://blog.csdn.net/csdn_logo/article/details/48525703 版权声明:本文为博主原创文章,欢迎转载,转载请注明出处,多谢合作. 采样的时候要用 ...

  10. VS2013 生成sqlite3动态连接库及sqlite3.dll的调用

    一,生成sqlite3动态连接库1,去sqlite官网上下载最近的sqlite源码包,解压后得到四个文件:shell.c,sqlite3.c,sqlite3.h,sqlite3ext.h此处还需要sq ...