Hadoop 学习笔记（十） MapReduce实现排序全局变量

一些疑问：
1 全排序的话，最后的应该sortJob.setNumReduceTasks(1);
2 如果多个reduce task都去修改 一个静态的 IntWritable ，IntWritable会乱序吧~
输入数据：
file1
2
32
654
32
15
756
65223
file2
5956
22
650
92
file3
26
54
6

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MySort {

    public static class IntSortMapper extends Mapper<Object, Text, IntWritable, NullWritable>{

        private IntWritable val = new IntWritable();

        public void map(Object key, Text value, Context context) throws IOException, InterruptedException{

            String line = value.toString().trim();

            val.set(Integer.parseInt(line));

            context.write(val, NullWritable.get());

        }

    }

    public static class IntSortReducer extends Reducer<IntWritable, NullWritable, IntWritable,IntWritable>{

        private IntWritable k = new IntWritable();

        public void reduce(IntWritable key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException{

            k.set();

            for (NullWritable value : values) {

                context.write(k, key);

            }

        }

    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        String dir_in = "hdfs://localhost:9000/in_sort";

        String dir_out = "hdfs://localhost:9000/out_sort";

        Path in = new Path(dir_in);

        Path out = new Path(dir_out);

        Configuration conf = new Configuration();

        Job sortJob = new Job(conf, "my_sort");

        sortJob.setJarByClass(MySort.class);

        sortJob.setInputFormatClass(TextInputFormat.class);

        sortJob.setMapperClass(IntSortMapper.class);

        //sortJob.setCombinerClass(SortReducer.class);

        //countJob.setPartitionerClass(HashPartitioner.class);

        sortJob.setMapOutputKeyClass(IntWritable.class);

        sortJob.setMapOutputValueClass(NullWritable.class);

        FileInputFormat.addInputPath(sortJob, in);

        sortJob.setReducerClass(IntSortReducer.class);

        sortJob.setNumReduceTasks();

        sortJob.setOutputKeyClass(IntWritable.class);

        sortJob.setOutputValueClass(IntWritable.class);

        //countJob.setOutputFormatClass(SequenceFileOutputFormat.class);

        FileOutputFormat.setOutputPath(sortJob, out);

        sortJob.waitForCompletion(true);

    }

}

结果：

修改reduce函数（不是用Iterable）

public static class IntSortReducer extends Reducer<IntWritable, NullWritable, IntWritable,IntWritable>{

        private IntWritable k = new IntWritable();

        public void reduce(IntWritable key, NullWritable value, Context context) throws IOException, InterruptedException{

            k.set();

            //for (NullWritable value : values) {

                context.write(k, key);

            //}

        }

    }

结果：（不是很理解，为啥去掉iterable后就只输出一个value  key哪去了呢）

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MySort {

    public static class IntSortMapper extends Mapper<Object, Text, IntWritable, NullWritable>{

        private IntWritable val = new IntWritable();

        public void map(Object key, Text value, Context context) throws IOException, InterruptedException{

            String line = value.toString().trim();

            val.set(Integer.parseInt(line));

            context.write(val, NullWritable.get());

        }

    }

    public static class IntSortReducer extends Reducer<IntWritable, NullWritable, IntWritable,IntWritable>{

        private static IntWritable num = new IntWritable();

        public void reduce(IntWritable key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException{

            for (NullWritable value : values) {

                context.write(num, key);

                num = new IntWritable(num.get() + );

            }

        }

    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        String dir_in = "hdfs://localhost:9000/in_sort";

        String dir_out = "hdfs://localhost:9000/out_sort";

        Path in = new Path(dir_in);

        Path out = new Path(dir_out);

        Configuration conf = new Configuration();

        Job sortJob = new Job(conf, "my_sort");

        sortJob.setJarByClass(MySort.class);

        sortJob.setInputFormatClass(TextInputFormat.class);

        sortJob.setMapperClass(IntSortMapper.class);

        //sortJob.setCombinerClass(SortReducer.class);

        //countJob.setPartitionerClass(HashPartitioner.class);

        sortJob.setMapOutputKeyClass(IntWritable.class);

        sortJob.setMapOutputValueClass(NullWritable.class);

        FileInputFormat.addInputPath(sortJob, in);

        sortJob.setReducerClass(IntSortReducer.class);

        sortJob.setNumReduceTasks();

        sortJob.setOutputKeyClass(IntWritable.class);

        sortJob.setOutputValueClass(IntWritable.class);

        //countJob.setOutputFormatClass(SequenceFileOutputFormat.class);

        FileOutputFormat.setOutputPath(sortJob, out);

        sortJob.waitForCompletion(true);

    }

}
1    2
2    6
3    15
4    22
5    26
6    32
7    32
8    54
9    92
10    650
11    654
12    756
13    5956
14    65223

Hadoop 学习笔记（十） MapReduce实现排序全局变量的更多相关文章

Hadoop学习笔记—11.MapReduce中的排序和分组
一.写在之前的 1.1 回顾Map阶段四大步骤首先,我们回顾一下在MapReduce中,排序和分组在哪里被执行: 从上图中可以清楚地看出,在Step1.4也就是第四步中,需要对不同分区中的数据进行排 ...
Hadoop学习笔记： MapReduce二次排序
本文给出一个实现MapReduce二次排序的例子 package SortTest; import java.io.DataInput; import java.io.DataOutput; impo ...
hadoop 学习笔记：mapreduce框架详解
开始聊mapreduce,mapreduce是hadoop的计算框架,我学hadoop是从hive开始入手,再到hdfs,当我学习hdfs时候,就感觉到hdfs和mapreduce关系的紧密.这个可能 ...
Hadoop学习笔记：MapReduce框架详解
开始聊mapreduce,mapreduce是hadoop的计算框架,我学hadoop是从hive开始入手,再到hdfs,当我学习hdfs时候,就感觉到hdfs和mapreduce关系的紧密.这个可能 ...
【Big Data - Hadoop - MapReduce】hadoop 学习笔记：MapReduce框架详解
开始聊MapReduce,MapReduce是Hadoop的计算框架,我学Hadoop是从Hive开始入手,再到hdfs,当我学习hdfs时候,就感觉到hdfs和mapreduce关系的紧密.这个可能 ...
hadoop 学习笔记：mapreduce框架详解(转)
原文:http://www.cnblogs.com/sharpxiajun/p/3151395.html(有删减) Mapreduce运行机制下面我贴出几张图,这些图都是我在百度图片里找到的比较好的 ...
Hadoop学习笔记—12.MapReduce中的常见算法
一.MapReduce中有哪些常见算法 (1)经典之王:单词计数这个是MapReduce的经典案例,经典的不能再经典了! (2)数据去重 "数据去重"主要是为了掌握和利用并行化思 ...
Hadoop学习笔记： MapReduce Java编程简介
概述本文主要基于Hadoop 1.0.0后推出的新Java API为例介绍MapReduce的Java编程模型.新旧API主要区别在于新API(org.apache.hadoop.mapreduce ...
hadoop 学习笔记 (十) mapreduce2.0
MapReduce的特色---不擅长的方面 >实时计算像mysql一样,在毫秒级或者秒级内返回结果 >流式计算 Mapreduce的输入数据时静态的,不能动态变化 MapReduce自身 ...
三、Hadoop学习笔记————从MapReduce到Yarn
Yarn减轻了JobTracker的负担,对其进行了解耦

随机推荐

C++数据结构和算法每天一练(线性表)
#include <iostream> using namespace std; class ArrayLinerTable { public: void InitLine ...
Java基础知识强化之IO流笔记35：InputStreamReader（Reader字符流的子类）2种read数据方式
1. InputStreamReader(Reader字符流的子类)2种read数据方式: InputStreamReader的read方法: int read():一次读取一个字符 int read ...
mac 神奇时光机
http://bbs.zol.com.cn/nbbbs/d544_8216.html
CentOS 6.7安装Java JDK
1.下载Java JDK 下载地址:http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.htm ...
Windows Python 2.7 安装 Numpy
为了防止无良网站的爬虫抓取文章,特此标识,转载请注明文章出处.LaplaceDemon/ShiJiaqi. http://www.cnblogs.com/shijiaqi1066/p/4846093. ...
C# 的可空合并运算符（??）到底是怎样的宝宝？
前言废语也怪自己小白和不勤奋,没有系统的学习C#相关的东西,工作一年多还是初级小菜,深感不安,来到园子才发现好多钻研技术的人,也渐渐发现自己开始喜欢上了这个编程的世界.今日偶遇??操作符,发现我只看 ...
转-C# 操作 Excel 常见问题收集和整理
经常会有项目需要把表格导出为 Excel 文件,或者是导入一份 Excel 来操作,那么如何在 C# 中操作 Excel 文件成了一个最基本的问题. 做开发这几年来,陆陆续续也接触过这样的需求,但因为 ...
html学习的一些问题
1,什么是 W3C标准?w3c 标准不是一个标准,而是一系列标准,包括:结构标准,表现标准,动作标准. 2,内链元素和块状元素的区别内链元素允许与其他内链元素位于同一行,没有宽和高,如果想设置宽和搞, ...
U3D 精灵的点击监听
U3D游戏中,可能会用到点击对象,完成某项操作, 方法一:可以通过接收Input对象的输入,进行利用方法二:给对象绑定一个collier 组件,然后就能后使用内置方法这里有点不同,方法一,是不管哪 ...
[弹出消息] C#MessageBox帮助类（转载）
点击下载 MessageBox.rar 主要功能如下所示1.显示消息提示对话框 2.控件点击消息确认提示框 3.显示消息提示对话框,并进行页面跳转 4.输出自定义脚本信息 /// <summa ...

Hadoop 学习笔记 （十） MapReduce实现排序 全局变量

Hadoop 学习笔记 （十） MapReduce实现排序 全局变量的更多相关文章

随机推荐

热门专题

Hadoop 学习笔记（十） MapReduce实现排序全局变量

Hadoop 学习笔记（十） MapReduce实现排序全局变量的更多相关文章