MapReduce Demo
功能:统计公司员工一个月内手机上网上行流量、下行流量及总流量。
测试数据如下:
13612345678 6000 1000
13612345678 2000 3000
代码:
程序入口类:DataCount
package cn.terry.mr;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import com.sun.jersey.core.impl.provider.entity.XMLJAXBElementProvider.Text;public class DataCount {public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {Configuration conf=new Configuration();Job job=Job.getInstance(conf);job.setJarByClass(DataCount.class);job.setMapperClass(MRMap.class);FileInputFormat.setInputPaths(job, new Path(args[0]));job.setReducerClass(MRReduce.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(DataBean.class);FileOutputFormat.setOutputPath(job, new Path(args[1]));job.waitForCompletion(true);}}数据实体类: DataBean.java
package cn.terry.mr;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.io.Writable;public class DataBean implements Writable {private String telNo;private Long upPayLoad;private Long downPayLoad;private Long totalPayLoad;public String getTelNo() {return telNo;}public void setTelNo(String telNo) {this.telNo = telNo;}public Long getUpPayLoad() {return upPayLoad;}public void setUpPayLoad(Long upPayLoad) {this.upPayLoad = upPayLoad;}public Long getDownPayLoad() {return downPayLoad;}public void setDownPayLoad(Long downPayLoad) {this.downPayLoad = downPayLoad;}public Long getTotalPayLoad() {return totalPayLoad;}public void setTotalPayLoad(Long totalPayLoad) {this.totalPayLoad = totalPayLoad;}public DataBean() {}public DataBean(String telNo, Long upPayLoad, Long downPayLoad) {this.telNo = telNo;this.upPayLoad = upPayLoad;this.downPayLoad = downPayLoad;this.totalPayLoad=this.upPayLoad+this.downPayLoad;}//serialize@Overridepublic void write(DataOutput out) throws IOException {// TODO Auto-generated method stubout.writeUTF(telNo);out.writeLong(upPayLoad);out.writeLong(downPayLoad);out.writeLong(totalPayLoad);}//deserrialize@Overridepublic void readFields(DataInput in) throws IOException {// TODO Auto-generated method stubthis.telNo=in.readUTF();this.upPayLoad=in.readLong();this.downPayLoad=in.readLong();this.totalPayLoad=in.readLong();}@Overridepublic String toString() {// TODO Auto-generated method stubreturn this.upPayLoad+"\t"+ this.downPayLoad+"\t" + this.totalPayLoad;}}Map类:MRMap.java
package cn.terry.mr;import java.io.IOException;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;public class MRMap extends Mapper<LongWritable,Text,Text,DataBean> {@Overrideprotected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {String line=value.toString();String[] fields=line.split("\t");String telNo=fields[0];Long up=Long.parseLong(fields[1]);Long down= Long.parseLong(fields[2]);DataBean bean=new DataBean(telNo,up,down);context.write(new Text(telNo), bean);}}Reduce类:MRReduce.java
package cn.terry.mr;import java.io.IOException;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;public class MRReduce extends Reducer<Text,DataBean,Text,DataBean> {@Overrideprotected void reduce(Text key, Iterable<DataBean> v2, Context context) throws IOException, InterruptedException {long up_sum=0;long down_sum=0;for(DataBean bean :v2){up_sum+=bean.getUpPayLoad();down_sum+=bean.getDownPayLoad();}DataBean bean=new DataBean("",up_sum,down_sum);context.write(key, bean);}}
17/11/08 11:34:25 INFO client.RMProxy: Connecting to ResourceManager at master/1:80 32
17/11/08 11:34:27 WARN mapreduce.JobResourceUploader: Hadoop command-line option parsing not p erformed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
17/11/08 11:34:27 INFO input.FileInputFormat: Total input paths to process : 1
17/11/08 11:34:28 INFO mapreduce.JobSubmitter: number of splits:1
17/11/08 11:34:28 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1509957441313_00 02
17/11/08 11:34:29 INFO impl.YarnClientImpl: Submitted application application_1509957441313_00 02
17/11/08 11:34:29 INFO mapreduce.Job: The url to track the job: http://master:8088/proxy/appli cation_1509957441313_0002/
17/11/08 11:34:29 INFO mapreduce.Job: Running job: job_1509957441313_0002
17/11/08 11:34:46 INFO mapreduce.Job: Job job_1509957441313_0002 running in uber mode : false
17/11/08 11:34:46 INFO mapreduce.Job: map 0% reduce 0%
17/11/08 11:34:55 INFO mapreduce.Job: Task Id : attempt_1509957441313_0002_m_000000_0, Status : FAILED Error: java.io.IOException: Initialization of all the collectors failed. Error in last collect or was :class com.sun.jersey.core.impl.provider.entity.XMLJAXBElementProvider$Text at org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:415) at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:81) at org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:698) at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:770) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341) at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746
以上错误可看出hadoop引用的Text包出错,需要将DataCount类中Text的包引用改为 import org.apache.hadoop.io.Text;
再次运行:
[root@master bin]# hadoop jar /home/hadoop/mpCount.jar cn.terry.mr.DataCount /data3.txt /MROut417/11/08 16:23:45 INFO client.RMProxy: Connecting to ResourceManager at master/x.x.x.x:803217/11/08 16:23:46 WARN mapreduce.JobResourceUploader: Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.17/11/08 16:23:47 INFO input.FileInputFormat: Total input paths to process : 117/11/08 16:23:47 INFO mapreduce.JobSubmitter: number of splits:117/11/08 16:23:47 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1509957441313_000817/11/08 16:23:48 INFO impl.YarnClientImpl: Submitted application application_1509957441313_000817/11/08 16:23:48 INFO mapreduce.Job: The url to track the job: http://master:8088/proxy/application_1509957441313_0008/17/11/08 16:23:48 INFO mapreduce.Job: Running job: job_1509957441313_000817/11/08 16:24:02 INFO mapreduce.Job: Job job_1509957441313_0008 running in uber mode : false17/11/08 16:24:02 INFO mapreduce.Job: map 0% reduce 0%17/11/08 16:24:14 INFO mapreduce.Job: map 100% reduce 0%17/11/08 16:24:25 INFO mapreduce.Job: map 100% reduce 100%17/11/08 16:24:26 INFO mapreduce.Job: Job job_1509957441313_0008 completed successfully查看结果:
[root@master bin]# hdfs dfs -ls /MROut4Found 2 items-rw-r--r-- 2 root supergroup 0 2017-11-08 16:24 /MROut4/_SUCCESS-rw-r--r-- 2 root supergroup 106 2017-11-08 16:24 /MROut4/part-r-00000[root@master bin]# hdfs dfs -cat /MROut4/part-r-0000013112345678 1800 400 220013512345678 9500 400 990013612345678 8000 4000 1200013812345678 3500 400 3900由于我的chrome和IE版本无法兼容cnblogs的插入code和picture功能,抱歉没能将代码及结果以友好的方式呈现。
MapReduce Demo的更多相关文章
- python - hadoop,mapreduce demo
Hadoop,mapreduce 介绍 59888745@qq.com 大数据工程师是在Linux系统下搭建Hadoop生态系统(cloudera是最大的输出者类似于Linux的红帽), 把用户的交易 ...
- Wordcount on YARN 一个MapReduce示例
Hadoop YARN版本:2.2.0 关于hadoop yarn的环境搭建可以参考这篇博文:Hadoop 2.0安装以及不停集群加datanode hadoop hdfs yarn伪分布式运行,有如 ...
- 关于Mapreduce Text类型赋值的错误
Mapreduce中Text类型数据被无缘无故替换? 今天偶然看到一个mapreduce demo,直接上手操作 统计两个文件中 最大值 文件中数据格式为 名字 数值 输出为 名字(最大值所对应的 ...
- Apache Hadoop2.x 边安装边入门
完整PDF版本:<Apache Hadoop2.x边安装边入门> 目录 第一部分:Linux环境安装 第一步.配置Vmware NAT网络 一. Vmware网络模式介绍 二. NAT模式 ...
- CentOS7 分布式安装 Hadoop 2.8
1. 基本环境 1.1 操作系统 操作系统:CentOS7.3 1.2 三台虚拟机 172.20.20.100 master 172.20.20.101 slave1 172.20.20.102 sl ...
- 在虚拟机上配置安装hadoop集群
原本以为有大神已经总结的很清楚了,就不自己在写了, 但是在自己安装的过程中还是出现了一些问题, 所以打算以自己的方式重新总结一下. 参考https://blog.csdn.net/hliq539 ...
- centos6.6安装hadoop-2.5.0(三、完全分布式安装)
操作系统:centos6.6(三台服务器) 环境:selinux disabled:iptables off:java 1.8.0_131 安装包:hadoop-2.5.0.tar.gz hadoop ...
- centos6.6安装hadoop-2.5.0(一、本地模式安装)
操作系统:centos6.6(一台服务器) 环境:selinux disabled:iptables off:java 1.8.0_131 安装包:hadoop-2.5.0.tar.gz hadoop ...
- 史上最详细的Hadoop环境搭建(转)
转载的文章,请告知侵删.本人只是做个记录,以免以后找不到. 前言 Hadoop在大数据技术体系中的地位至关重要,Hadoop是大数据技术的基础,对Hadoop基础知识的掌握的扎实程度,会决定在大数据技 ...
随机推荐
- Node 内存控制
Node 只能使用部分内存,原因: node 基于 V8 构建,V8 的内存管理机制限制了内存的用量. 在实际的使用中,不小心触碰到这个内存界限,会造成进程退出. V8 是通过堆来进行内存分配的:在代 ...
- JFrame包含的容器(JRootPane)
JFrame对象创建后,此对象包含JRootPane类型的容器.JRootPane 下有GlassPane, 和 LayeredPane,LayeredPane下又有ContentPane , ...
- nginx支持android、ios、微信扫一扫
首先做一个android下载的html页面,页面中识别微信浏览器提示在浏览器中打开,然后在nginx对ios进行识别并跳转到apple store #下载App location ^~ /appDow ...
- ML: 聚类算法R包-网格聚类
网格聚类算法 optpart::clique optpart::clique CLIQUE(Clustering In QUEst)是一种简单的基于网格的聚类方法,用于发现子空间中基于密度的簇.CLI ...
- 关于SQL Server 无法生成 FRunCM 线程(不完全)
在五一的前一天,准备启动数据库完成我剩下的项目代码时,数据库配置管理器出现了一个让人蛋疼的问题sqlserv配置管理器出现请求失败或服务器未及时响应关于这个问题的处理方法,经过我两个小时的百度,网上对 ...
- 下载goland解压错误
把连接里面的 download.jetbrains.8686c.com 换成 download-cf.jetbrains.com
- CentOS 7.4 初次手记:第一章 Linux守护进程(daemon)
第一节 init & sysvinit 6 I sysvinit 运行顺序... 6 II Sysvinit和系统关闭... 7 III Sysvinit 的小结... 7 IV 运行级别.. ...
- 使用apache设置绑定多个域名或网站
来源:百度知道 http://jingyan.baidu.com/article/363872ecec3e496e4ba16fdc.html 配置完成后可能是能访问了,但是跳转的路径总是不对,这时候 ...
- java统计文件字母大小写的数量练习
import java.io.*; import java.lang.*; public class WordStatistic { private BufferedReader br; privat ...
- linux一些基本知识
一.linux i386是32位的,amd64是64位(一般情况不限intel或者amd) server是服务器版,desktop是桌面版 Desktop是社区开源版,拥有一些新功能新软件 ...