1、基本概念

2、Mapper

package com.ares.hadoop.mr.wordcount;

import java.io.IOException;
import java.util.StringTokenizer; import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper; //Long, String, String, Long --> LongWritable, Text, Text, LongWritable
public class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
private final static LongWritable ONE = new LongWritable(1L) ;
private Text word = new Text(); @Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, Text, LongWritable>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
//super.map(key, value, context);
StringTokenizer itr = new StringTokenizer(value.toString(), " ");
while (itr.hasMoreTokens()) {
//efficiency is not well
//context.write(new Text(itr.nextToken()), new LongWritable(1L));
word.set(itr.nextToken());
context.write(word, ONE);
}
}
}

3、Reducer

package com.ares.hadoop.mr.wordcount;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer; public class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable>{
private LongWritable result = new LongWritable(); @Override
protected void reduce(Text key, Iterable<LongWritable> vlaues,
Reducer<Text, LongWritable, Text, LongWritable>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
//super.reduce(arg0, arg1, arg2);
long sum = ;
for (LongWritable value : vlaues) {
sum += value.get();
}
result.set(sum);
context.write(key, result);
}
}

4、JobRunner

package com.ares.hadoop.mr.wordcount;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.Logger; public class MRTest {
private static final Logger LOGGER = Logger.getLogger(MRTest.class); public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
LOGGER.debug("MRTest: MRTest STARTED..."); if (args.length != ) {
LOGGER.error("MRTest: ARGUMENTS ERROR");
System.exit(-);
} Configuration conf = new Configuration();
Job job = Job.getInstance(conf); // JOB NAME
job.setJobName("wordcount"); // JOB MAPPER & REDUCER
job.setJarByClass(MRTest.class);
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class); // MAP & REDUCE
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// MAP
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class); // JOB INPUT & OUTPUT PATH
//FileInputFormat.addInputPath(job, new Path(args[0]));
FileInputFormat.setInputPaths(job, args[]);
FileOutputFormat.setOutputPath(job, new Path(args[])); // VERBOSE OUTPUT
if (job.waitForCompletion(true)) {
LOGGER.debug("MRTest: MRTest SUCCESSFULLY...");
} else {
LOGGER.debug("MRTest: MRTest FAILED...");
} LOGGER.debug("MRTest: MRTest COMPLETED...");
}
}

5、JAR 提交作业 到YARN

hadoop jar wordcount.jar com.ares.hadoop.mr.wordcount.MRTest hdfs://HADOOP-NODE1:9000/word-count/input hdfs://HADOOP-NODE1:9000/word-count/output

【Hadoop】Hadoop mr wordcount基础的更多相关文章

  1. Hadoop中MR程序的几种提交运行模式

    本地模型运行 1:在windows的eclipse里面直接运行main方法,就会将job提交给本地执行器localjobrunner执行       ----输入输出数据可以放在本地路径下(c:/wc ...

  2. 运行第一个Hadoop程序,WordCount

    系统: Ubuntu14.04 Hadoop版本: 2.7.2 参照http://www.cnblogs.com/taichu/p/5264185.html中的分享,来学习运行第一个hadoop程序. ...

  3. Hadoop Mapreduce 案例 wordcount+统计手机流量使用情况

    mapreduce设计思想 概念:它是一个分布式并行计算的应用框架它提供相应简单的api模型,我们只需按照这些模型规则编写程序,即可实现"分布式并行计算"的功能. 案例一:word ...

  4. Hadoop【MR开发规范、序列化】

    Hadoop[MR开发规范.序列化] 目录 Hadoop[MR开发规范.序列化] 一.MapReduce编程规范 1.Mapper阶段 2.Reducer阶段 3.Driver阶段 二.WordCou ...

  5. hadoop修改MR的提交的代码程序的副本数

    hadoop修改MR的提交的代码程序的副本数 Under-Replicated Blocks的数量很多,有7万多个.hadoop fsck -blocks 检查发现有很多replica missing ...

  6. hadoop概述测试题和基础模版代码

    hadoop概述测试题和基础模版代码 1.Hadoop的创始人是DougCutting?() A.正确 B.错误答对了!正确答案:A解析:参考课程里的文档,这个就不解释了2.下列有关Hadoop的说法 ...

  7. hadoop安装与WordCount例子

    1.JDK安装 下载网址: http://www.oracle.com/technetwork/java/javase/downloads/jdk-6u29-download-513648.html  ...

  8. 沉淀,再出发——在Hadoop集群的基础上搭建Spark

    在Hadoop集群的基础上搭建Spark 一.环境准备 在搭建Spark环境之前必须搭建Hadoop平台,尽管以前的一些博客上说在单机的环境下使用本地FS不用搭建Hadoop集群,可是在新版spark ...

  9. 揭秘FaceBook Puma演变及发展——FaceBook公司的实时数据分析平台是建立在Hadoop 和Hive的基础之上,这个根能立稳吗?hive又是sql的Map reduce任务拆分,底层还是依赖hbase和hdfs存储

    在12月2日下午的“大数据技术与应用”分论坛的第一场演讲中,来自全球知名互联网公司——FaceBook公司的软件工程师.研发经理邵铮就带来了一颗重磅炸弹,他将为我们讲解FaceBook公司的实时数据处 ...

随机推荐

  1. ACM ICPC China final G Pandaria

    目录 ACM ICPC China final G Pandaria ACM ICPC China final G Pandaria 题意:给一张\(n\)个点\(m\)条边的无向图,\(c[i]\) ...

  2. webpack配置优化

    1.使用alias简化路径 alias: { 'vue$': 'vue/dist/vue.esm.js', '@': resolve('src') } 2.overlay界面弹出编译错误 devSer ...

  3. jquery对中文进行base64加密,后台用java进行base64解密

    项目中遇到将中文从前台传到后台过程中,出现乱码,一番尝试之后,均是乱码,然后尝试在js代码中先进行base64加密,然后在Java中再进行解密,完美的解决了乱码问题,步骤如下 一,html页面引入jQ ...

  4. php中json_encode和json_decode的用法

    1.json_encode基本用法:数组转字符串 <?php $arr = array ('a'=>1,'b'=>2,'c'=>3,'d'=>4,'e'=>5); ...

  5. js字符串使用占位符拼接

    由于几个老项目中经常用到jquery拼接字符串,各种引号很disgusting 所以写了一个占位符拼接的的方法 String.prototype.signMix= function() { if(ar ...

  6. 前端模块加载规范AMD与CMD小记

    AMD代表:requirejs:    CMD代表:seajs:   AMD CMD 代表 requirejs seajs  执行  提前加载,不管是否调用模块,先解析所以模块 提前加载,在真正需要使 ...

  7. HDU5187 zhx's contest

    Time Limit: 2000/1000 MS (Java/Others)    Memory Limit: 65536/65536 K (Java/Others) Total Submission ...

  8. poj1039 Pipe(计算几何叉积求交点)

    F - Pipe Time Limit:1000MS     Memory Limit:10000KB     64bit IO Format:%I64d & %I64u Submit Sta ...

  9. 1.2.3 创建Cocos2D-iPhone的帮助文档

    http://book.51cto.com/art/201303/383957.htm <Cocos2D权威指南>第1章开始前的准备工作,本章我们将介绍什么是Cocos2D以及有关Coco ...

  10. C3P0连接池工具类使用

    c3p0的基本连接配置文件 c3p0-config.xml <c3p0-config> <default-config> <property name="dri ...