map reduce程序示例

package test2;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**

 样例数据中包含了年份和温度，提出年份里温度最大的

 (0, 0067011990999991950051507+0000+),

 (33, 0043011990999991950051512+0022+),

 (66, 0043011990999991950051518-0011+),

 (99, 0043012650999991949032412+0111+),

 (132, 0043012650999991949032418+0078+),

 (165, 0067011990999991937051507+0001+),

 (198, 0043011990999991937051512-0002+),

 (231, 0043011990999991945051518+0001+),

 (264, 0043012650999991945032412+0002+),

 (297, 0043012650999991945032418+0078+),

 * */

public class mytest {

static String INPUT_PATH="input/t1_num.txt";   //待统计的文件路径

static String OUTPUT_PATH="output/t1_num";    //统计结果存放的路径

static class MyMapper extends Mapper <Object,Object,Text,IntWritable> {     //定义继承mapper类

    protected void map(Object key, Object value, Context context) throws IOException, InterruptedException{    //定义map方法

    String[] arr=value.toString().split("\\),");      //文件中的单词是以“),”分割的，并将每一行定义为一个数组

    for(int i=0;i<arr.length;i++){      //遍历循环每一行，统计单词出现的数量

        String line = arr[i].toString();

        String year = line.substring(line.length()-16, line.length()-12);

        String airTemperature = line.substring(line.length()-6, line.length()-1);

        context.write(new Text(year),new IntWritable(Integer.valueOf(airTemperature)));

    }

        /**

         map过程中，通过对字符串的解析，得到年-温度的key-value对作为输出

         (1950, 0)

         (1950, 22)

         (1950, -11)

         (1949, 111)

         (1949, 78)

         (1937, 1)

         (1937, -2)

         (1945, 1)

         (1945, 2)

         (1945, 78)

         */

 }

}

static class MyReduce extends Reducer<Text,IntWritable,Text,IntWritable>{     //定义继承reducer类

    protected void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException,InterruptedException{      //定义reduce方法

         int max = 0;

         for(IntWritable c:values){     //统计同一个单词的数量

             if(c.get()>max){

                 max = c.get();//获取value值

             }

         }

        IntWritable outValue=new IntWritable(max);//挨个输出

        context.write(key,outValue);

     }

    /**

     在reduce过程，将map过程中的输出，按照相同的key(年份)将value放到同一个列表中作为reduce的输入

     (1950, [0, 22, –11])

     (1949, [111, 78])

     (1937, [1, -2])

     (1945, [1, 2, 78])

     在reduce过程中，在列表中选择出最大的温度，将年-max温度的key-value作为输出：

     (1950, 22)

     (1949, 111)

     (1937, 1)

     (1945, 78)

     */

}

 public static void main(String[] args) throws Exception{    //main函数

     System.setProperty("hadoop.home.dir", "D:\\hadoop-2.7.6");//这一行一定要

     Path outputpath=new Path(OUTPUT_PATH);    //输出路径

     Configuration conf=new Configuration();

     Job job=Job.getInstance(conf);     //定义一个job，启动任务

     FileInputFormat.setInputPaths(job, INPUT_PATH);

     FileOutputFormat.setOutputPath(job,outputpath);

     job.setMapperClass(MyMapper.class);

     job.setReducerClass(MyReduce.class);

     job.setOutputKeyClass(Text.class);

     job.setOutputValueClass(IntWritable.class);

     job.waitForCompletion(true);

    }

}

map reduce程序示例的更多相关文章

Hadoop学习笔记2 - 第一和第二个Map Reduce程序
转载请标注原链接http://www.cnblogs.com/xczyd/p/8608906.html 在Hdfs学习笔记1 - 使用Java API访问远程hdfs集群中,我们已经可以完成了访问hd ...
eclipse 中运行 Hadoop2.7.3 map reduce程序出现错误(null) entry in command string: null chmod 0700
运行map reduce任务报错: (null) entry in command string: null chmod 0700 解决办法: 在https://download.csdn.net/d ...
使用Python实现Map Reduce程序
使用Python实现Map Reduce程序起因想处理一些较大的文件,单机运行效率太低,多线程也达不到要求,最终采用了集群的处理方式. 详细的讨论可以在v2ex上看一下. 步骤 MapReduce ...
第一个map reduce程序
完成了第一个mapReduce例子,记录一下. 实验环境: hadoop在三台ubuntu机器上部署开发在window7上进行 hadoop版本2.2.0 下载了hadoop-eclipse-plu ...
Hadoop 使用Combiner提高Map/Reduce程序效率
众所周知,Hadoop框架使用Mapper将数据处理成一个<key,value>键值对,再网络节点间对其进行整理(shuffle),然后使用Reducer处理数据并进行最终输出. 在上述过 ...
Hadoop实战：使用Combiner提高Map/Reduce程序效率
好不easy算法搞定了.小数据測试也得到了非常好的结果,但是扔到进群上.挂上大数据就挂了.无休止的reduce不会结束了. .. .. .... .. ... .. ================= ...
Hadoop Map/Reduce教程
原文地址:http://hadoop.apache.org/docs/r1.0.4/cn/mapred_tutorial.html 目的先决条件概述输入与输出例子:WordCount v1.0 ...
Map/Reduce 工作机制分析 --- 作业的执行流程
前言从运行我们的 Map/Reduce 程序,到结果的提交,Hadoop 平台其实做了很多事情. 那么 Hadoop 平台到底做了什么事情,让 Map/Reduce 程序可以如此 "轻易& ...
Map/Reduce个人实战--生成数据测试集
背景: 在大数据领域, 由于各方面的原因. 有时需要自己来生成测试数据集, 由于测试数据集较大, 因此采用Map/Reduce的方式去生成. 在这小编(mumuxinfei)结合自身的一些实战经历, ...

随机推荐

spring3.2.2 remoting HTTP invoker 实现方式
最近跟朋友聊天,聊到他们现在项目的架构都是把数据层跟应用层分离开来,中间可以加memcached等的缓存系统,感觉挺好的,很大程度上的降低耦合,然后还明确分配了数据层跟应用层任务.也方便定位.找到问题 ...
[转] 隐马尔可夫(HMM)、前/后向算法、Viterbi算法再次总结
最近工作需要优化LSTM-CRF经典模型中的维特比解码部分,发现对维特比一直是个模糊概念,没有get到本质,搜了一圈,发现一篇好文,mark 博主不让转载,mark个地址吧: https://blog ...
【转】python 面向对象（进阶篇）
[转]python 面向对象(进阶篇) 上一篇<Python 面向对象(初级篇)>文章介绍了面向对象基本知识: 面向对象是一种编程方式,此编程方式的实现是基于对类和对象的使用类 ...
非极大值抑制（NMS）的几种实现
因为之前对比了RoI pooling的几种实现,发现python.pytorch的自带工具函数速度确实很慢,所以这里再对Faster-RCNN中另一个速度瓶颈NMS做一个简单对比试验. 这里做了四组对 ...
CString/string 区别及其转化
CString/string 区别及其转化利用MFC进行编程时,我们从对话框中利用GetWindowText得到的字符串是CString类型,CString是属于MFC的类.而一些标准C/C++库函 ...
windows命令行获取时间
在写Windows批处理脚本时,常常需要获取系统日期.时间戳记,用作文件名.文件夹名.log等等. 本文介绍了如何获取自订的系统日期.时间戳记. 首先,在Windows中,系统日期由以下参数获得: % ...
题解-bzoj1283序列 & bzoj4842 [Neerc2016]Delight for a Cat
因为这两题有递进关系,所以放一起写 Problem bzoj1283 序列题意概要:一个长度为 $n$ 的序列$\{c_i\}$,求一个子集,使得原序列中任意长度为 $m$ 的子串中被选 ...
$Django 数据库图片渲染设计站点设计截断函数
1.数据库图片渲染设计 1.模型层 class User_info (AbstractUser): head_img = models.FileField (upload_to='test', def ...
bootstrap4简单使用和入门03-响应式布局
响应式布局的原理页面源码 <!DOCTYPE html> <html lang="en"> <head> <meta charset=& ...
win10:在关闭防火墙下如何屏蔽特定端口
如果win10没有组策略,请参考:https://www.cnblogs.com/huiy/p/9291392.html 在"开始"菜单选择"运行",输入&qu ...

map reduce程序示例

map reduce程序示例的更多相关文章

随机推荐

热门专题