使用MapReduce实现温度排序

温度排序代码，具体说明可以搜索其他博客

KeyPair.java

package temperaturesort;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;

import java.io.DataOutput;

import java.io.IOException;

public class KeyPair implements WritableComparable<KeyPair> {

    private int hot;

    private int year;

    public int getYear() {

        return year;

    }

    public void setYear(int year) {

        this.year = year;

    }

    public int getHot() {

        return hot;

    }

    public void setHot(int hot) {

        this.hot = hot;

    }

    public int compareTo(KeyPair o) {

        int result = this.year-o.getYear();

        if(result!=0){

            return result<0?-1:1;

        }

        return -( this.hot < o.getHot() ? -1 :(this.hot == o.getHot()?0:1));

    }

    public void write(DataOutput dataOutput) throws IOException {

        dataOutput.writeInt(year);

        dataOutput.writeInt(hot);

    }

    public void readFields(DataInput dataInput) throws IOException {

        this.year=dataInput.readInt();

        this.hot=dataInput.readInt();

    }

    @Override

    public String toString() {

        return year+"\t"+hot;

    }

    @Override

    public int hashCode() {

        return new Integer(year+hot).hashCode();

    }

}

Sort.java:

package temperaturesort;

import org.apache.hadoop.io.WritableComparable;

import org.apache.hadoop.io.WritableComparator;

public class Sort extends WritableComparator {

    public Sort(){

         super(KeyPair.class,true);

    }

    @Override

    public int compare(WritableComparable a, WritableComparable b) {

        KeyPair key1 = (KeyPair)a;

        KeyPair key2 = (KeyPair)b;

        int result = key1.getYear()-key2.getYear();

        if(result!=0){

            return result<0?-1:1;

        }

        return key1.getHot()< key2.getHot() ? 1 :(key1.getHot() == key2.getHot()?0:-1);

    }

}

Partition.java:

package temperaturesort;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Partitioner;

public class Partition extends Partitioner<KeyPair,Text>{

    @Override

    public int getPartition(KeyPair keyPair, Text text, int num) {

        return keyPair.getYear()*127 % num;

    }

}

Group.java:

package temperaturesort;

import org.apache.hadoop.io.WritableComparable;

import org.apache.hadoop.io.WritableComparator;

public class Group extends WritableComparator {

    public Group(){

        super(KeyPair.class,true);

    }

    @Override

    public int compare(WritableComparable a, WritableComparable b) {

        KeyPair key1 = (KeyPair)a;

        KeyPair key2 = (KeyPair)b;

        return key1.getYear() < key2.getYear() ? -1 : (key1.getYear()==key2.getYear()?0:1);

    }

}

RunJob.java:

package temperaturesort;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

import java.text.ParseException;

import java.text.SimpleDateFormat;

import java.util.Calendar;

import java.util.Date;

public class RunJob {

    public static class TempSortMapper extends Mapper<Object,Text,KeyPair,Text>{

        static SimpleDateFormat simpleDateFormat =new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

        @Override

        protected void map(Object key, Text value, Context context)

                throws IOException, InterruptedException {

            String line = value.toString();

            String[] strArr=line.split("\t");

            if(strArr.length==2){

                try {

                    Date date = simpleDateFormat.parse(strArr[0]);

                    Calendar calendar = Calendar.getInstance();

                    calendar.setTime(date);

                    int year = calendar.get(1);

                    int hot = Integer.parseInt(strArr[1].substring(0,strArr[1].indexOf("C")));

                    KeyPair keyPair =new KeyPair();

                    keyPair.setHot(hot);

                    keyPair.setYear(year);

                    /*System.out.println("-------------------------------------------------------------------");

                    System.out.println(keyPair);*/

                    context.write(keyPair,value);

                } catch (ParseException e) {

                    e.printStackTrace();

                }

            }

        }

    }

    public static class TempSortReducer extends Reducer<KeyPair,Text,KeyPair,Text>{

        @Override

        protected void reduce(KeyPair key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

            for(Text text:values)

                context.write(key,text);

        }

    }

    public static void main(String[] args) throws Exception {

        //System.setProperty("hadoop.home.dir","E:\\softs\\majorSoft\\hadoop-2.7.5");

        Configuration conf = new Configuration();

        conf.set("mapreduce.app-submission.cross-platform", "true");

        Path fileInput = new Path("hdfs://mycluster/testFile/hot.txt");

        Path fileOutput = new Path("hdfs://mycluster/output/hot");

        Job job =Job.getInstance(conf ,"temperatureSort");

        job.setJar("E:\\bigData\\hadoopDemo\\out\\artifacts\\wordCount_jar\\hadoopDemo.jar");

        job.setJarByClass(RunJob.class);

        job.setMapperClass(TempSortMapper.class);

        job.setReducerClass(TempSortReducer.class);

        job.setMapOutputKeyClass(KeyPair.class);

        job.setMapOutputValueClass(Text.class);

        job.setNumReduceTasks(3);

        job.setSortComparatorClass(Sort.class);

        job.setPartitionerClass(Partition.class);

        job.setGroupingComparatorClass(Group.class);

        FileInputFormat.addInputPath(job,fileInput);

        FileOutputFormat.setOutputPath(job,fileOutput);

        System.exit(job.waitForCompletion(true)?0:1);

    }

}

其中自定义的sort和parititon是在mapTask任务之后使用的，而Group是在reduce任务使用的。

使用MapReduce实现温度排序的更多相关文章

详细讲解MapReduce二次排序过程
我在15年处理大数据的时候还都是使用MapReduce, 随着时间的推移, 计算工具的发展, 内存越来越便宜, 计算方式也有了极大的改变. 到现在再做大数据开发的好多同学都是直接使用spark, hi ...
Hadoop学习笔记—11.MapReduce中的排序和分组
一.写在之前的 1.1 回顾Map阶段四大步骤首先,我们回顾一下在MapReduce中,排序和分组在哪里被执行: 从上图中可以清楚地看出,在Step1.4也就是第四步中,需要对不同分区中的数据进行排 ...
Hadoop学习笔记： MapReduce二次排序
本文给出一个实现MapReduce二次排序的例子 package SortTest; import java.io.DataInput; import java.io.DataOutput; impo ...
(转)MapReduce二次排序
一.概述 MapReduce框架对处理结果的输出会根据key值进行默认的排序,这个默认排序可以满足一部分需求,但是也是十分有限的.在我们实际的需求当中,往往有要对reduce输出结果进行二次排序的需求 ...
MapReduce——计算温度最大值（基于全新2.2.0API）
MapReduce——计算温度最大值 (基于全新2.2.0API) deprecated: Job类的所有Constructors, 新的API用静态方法getInstance(conf)来去的Job ...
mapreduce 实现数子排序
设计思路: 使用mapreduce的默认排序,按照key值进行排序的,如果key为封装int的IntWritable类型,那么MapReduce按照数字大小对key排序,如果key为封装为String ...
mapreduce数据处理——统计排序
接上篇https://www.cnblogs.com/sengzhao666/p/11850849.html 2.数据处理: ·统计最受欢迎的视频/文章的Top10访问次数 (id) ·按照地市统计最 ...
[MapReduce_7] MapReduce 中的排序
0. 说明部分排序 && 全排序 && 采样 && 二次排序 1. 介绍 sort 是根据 Key 进行排序 [部分排序] 在每个分区中,分别进行排序 ...
MapReduce二次排序
默认情况下,Map 输出的结果会对 Key 进行默认的排序,但是有时候需要对 Key 排序的同时再对 Value 进行排序,这时候就要用到二次排序了.下面让我们来介绍一下什么是二次排序. 二次排序原理 ...

随机推荐

.net core 2.0使用NLog写日志文件
原文地址:传送门之前也看了 linezero 大佬写的教程,但是总是没有成功写入日志文件.按照曲廉卿的已成功,以下正文: 最近研究了一下NLog的使用方式,简单的入了一下门. 实现的功能,对于不 ...
前端自动化gulp遇上es6从无知到深爱
Gulp是什么? Gulp是前端自动化的工具,但Gulp能用来做什么 1.搭建web服务器 2.使用预处理器Sass,Less 3.压缩优化,可以压缩JS CSS Html 图片 4.自动将更新变化的 ...
Jsonp方式和httpclient方式有什么区别？
jsonp基于js,解决跨域问题,本质发起ajax情求但是Jsonp只支持get请求. 它不安全,它先解析js,然后发起ajax请求,然后获取到返回值,通过浏览器返回,最后解析. JQuery和Spr ...
BootstrapTable与KnockoutJS相结合实现增删改查功能
http://www.jb51.net/article/83910.htm KnockoutJS是一个JavaScript实现的MVVM框架.通过本文给大家介绍BootstrapTable与Knock ...
YII2 源码阅读综述
如何阅读源码呢? 我的方法是,打开xdebug的auto_trace [XDebug] ;xdebug.profiler_append = 0 ;xdebug.profiler_enable = 1 ...
struts2核心配置之Action
一.实现Action类 1.POJO实现(Plain Ordinary Java Object 简单的java对象) public class User1 { public String execut ...
CSS中包含块原理解析
CSS包含块原理解析确定CSS中的包含块也确定就是元素的父元素.关键是:看元素是如何定位的.确定包含块很重要,比如设置百分比.另外也可以进行样式的继承等等. 分两个情况: 相对定位和静态定位静态定 ...
【POJ 3784】 Running Median （对顶堆）
Running Median Description For this problem, you will write a program that reads in a sequence of 32 ...
[Lydsy1806月赛] 路径统计
题面在这里! xjb想的做法竟然不小心把std艹爆了qwq,我也很无奈啊.... 那接下来就说一下我的神奇做法qwq 如果是经常读我博客的童鞋会发现其实我以前就想要做这个题啦,只不过当时读错题啦... ...
【枚举】【SPFA】Urozero Autumn Training Camp 2016 Day 5: NWERC-2016 Problem I. Iron and Coal
那个人派出的队伍的行走的路径一定前半程是重合的,后半程分叉开来. 于是预处理每个点离1号点的最短路,到最近的铁的最短路,到最近的煤的最短路.(三次BFS / SPFA)然后枚举分岔点,尝试更新答案即可 ...

使用MapReduce实现温度排序

使用MapReduce实现温度排序的更多相关文章

随机推荐

热门专题