【Hadoop学习之九】MapReduce案例分析一-天气

环境
　　虚拟机：VMware 10
　　Linux版本：CentOS-6.5-x86_64
　　客户端：Xshell4
　　FTP：Xftp4
　　jdk8
　　hadoop-3.1.1

找出每个月气温最高的2天

1949-10-01 14:21:02        34c

1949-10-01 19:21:02        38c

1949-10-02 14:01:02        36c

1950-01-01 11:21:02        32c

1950-10-01 12:21:02        37c

1951-12-01 12:21:02        23c

1950-10-02 12:21:02        41c

1950-10-03 12:21:02        27c

1951-07-01 12:21:02        45c

1951-07-02 12:21:02        46c

1951-07-03 12:21:03        47c

package test.mr.tq;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**

 * @author Administrator

 * 客户端

 */

public class MyTQ {

    /**

     * 找出每个月气温最高的2天

     * @param args

     */

    public static void main(String[] args) {

        //加载配置文件

        Configuration conf = new Configuration();

        try {

            //创建客户端

            Job job = Job.getInstance(conf,"tian qi");

            job.setJarByClass(MyTQ.class);

            //Map

            job.setMapperClass(TQMapper.class);

            job.setOutputKeyClass(TQ.class);

            job.setOutputValueClass(IntWritable.class);

            //分区类  处理大数据量均衡并发处理

            job.setPartitionerClass(TqPartitioner.class);

            //用于buffer字节数组内的key排序的比较类  温度最高的2天  需要排序

            job.setSortComparatorClass(TqSortComparator.class);

            //Reduce

            job.setReducerClass(TqReducer.class);

            job.setNumReduceTasks(2);

            //用于分组的比较类  年月相同的被视为一组

            job.setGroupingComparatorClass(TqGroupingComparator.class);

            //输入  输出

            Path input = new Path("/root/input");

            FileInputFormat.addInputPath(job, input);

            Path output = new Path("/root/output");

            if (output.getFileSystem(conf).exists(output))

            {

                output.getFileSystem(conf).delete(output, true);

            }

            FileOutputFormat.setOutputPath(job, output);

            //提交

            System.exit(job.waitForCompletion(true) ? 0 : 1);

        } catch (Exception e) {

            e.printStackTrace();

        }

    }

}

package test.mr.tq;

import java.io.DataInput;

import java.io.DataOutput;

import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class TQ implements WritableComparable<TQ>{

    private int year;

    private int month;

    private int day;

    private int wd;

    public int getYear() {

        return year;

    }

    public void setYear(int year) {

        this.year = year;

    }

    public int getMonth() {

        return month;

    }

    public void setMonth(int month) {

        this.month = month;

    }

    public int getDay() {

        return day;

    }

    public void setDay(int day) {

        this.day = day;

    }

    public int getWd() {

        return wd;

    }

    public void setWd(int wd) {

        this.wd = wd;

    }

    /**

     * 反序列化进来

     */

    @Override

    public void readFields(DataInput in) throws IOException {

        this.year = in.readInt();

        this.month = in.readInt();

        this.day = in.readInt();

        this.wd = in.readInt();

    }

    /**

     * 序列化出去

     */

    @Override

    public void write(DataOutput out) throws IOException {

        out.writeInt(year);

        out.writeInt(month);

        out.writeInt(day);

        out.writeInt(wd);

    }

    @Override

    public int compareTo(TQ that) {

        //时间正序

        int y = Integer.compare(this.year, that.getYear());

        if (y == 0)

        {

            int m = Integer.compare(this.month, that.getMonth());

            if (m == 0)

            {

                return Integer.compare(this.day, that.getDay());

            }

            return m;

        }

        return y;

    }

}

package test.mr.tq;

import org.apache.hadoop.io.WritableComparable;

import org.apache.hadoop.io.WritableComparator;

public class TqGroupingComparator extends WritableComparator {

    public TqGroupingComparator()

    {

        super(TQ.class,true);

    }

    /**

     * 面向reduce  按照年月分组

     * 年月不相同  就不属于同一组

     * 返回0表示同一组

     */

    @Override

    public int compare(WritableComparable a, WritableComparable b) {

        TQ t1 = (TQ)a;

        TQ t2 = (TQ)b;

        int y = Integer.compare(t1.getYear(), t2.getYear());

        if (y==0)

        {

            return Integer.compare(t1.getMonth(), t2.getMonth());

        }

        return y;

    }

}

package test.mr.tq;

import java.io.IOException;

import java.text.ParseException;

import java.text.SimpleDateFormat;

import java.util.Calendar;

import java.util.Date;

import org.apache.commons.lang.StringUtils;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

public class TQMapper extends Mapper<LongWritable, Text, TQ, IntWritable> {

    //k：v映射的设计

    //    K                  V

//    1949-10-01 14:21:02        34c

//    1949-10-01 19:21:02        38c

//    1949-10-02 14:01:02        36c

//    1950-01-01 11:21:02        32c

//    1950-10-01 12:21:02        37c

//    1951-12-01 12:21:02        23c

//    1950-10-02 12:21:02        41c

//    1950-10-03 12:21:02        27c

//    1951-07-01 12:21:02        45c

//    1951-07-02 12:21:02        46c

//    1951-07-03 12:21:03        47c

    TQ tq = new TQ();

    IntWritable vwd = new IntWritable();

    @Override

    protected void map(LongWritable key, Text value,

            Context context) throws IOException, InterruptedException

    {

        try

        {

            //1951-07-03 12:21:03        47c

            String[] strs = StringUtils.split(value.toString(),"\t");

            SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");

            Date date = sdf.parse(strs[0]);

            Calendar cal = Calendar.getInstance();

            cal.setTime(date);

            //key

            tq.setYear(cal.get(Calendar.YEAR));

            tq.setMonth(cal.get(Calendar.MONTH)+1);

            tq.setDay(cal.get(Calendar.DAY_OF_MONTH));

            int wd = Integer.parseInt(strs[1].substring(0, strs[1].length()-1));

            tq.setWd(wd);

            //value

            vwd.set(wd);

            //输出

            context.write(tq, vwd);

        }

        catch (ParseException e)

        {

            e.printStackTrace();

        }

    }

}

package test.mr.tq;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.mapreduce.Partitioner;

/**

 * @author wjy

 * K.V==>K.V.P

 * 分区规则设计  尽量使数据分区均衡  避免倾斜

 */

public class TqPartitioner extends Partitioner<TQ, IntWritable> {

    @Override

    public int getPartition(TQ key, IntWritable value, int numPartitions) {

        return key.getYear() % numPartitions;

    }

}

package test.mr.tq;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

public class TqReducer extends Reducer<TQ, IntWritable, Text, IntWritable> {

    Text rkey = new Text();

    IntWritable rval  = new IntWritable();

    @Override

    protected void reduce(TQ key, Iterable<IntWritable> values, Context context)

            throws IOException, InterruptedException

    {

        //相同的key为一组

        // 时间正序             温度倒序

        // 1970 01 01 40

        // 1970 01 02 38

        //迭代values key会随着变化

        int flg = 0;

        int day = 0;

        for (IntWritable wd : values) {

            if (flg == 0)

            {

                day = key.getDay();

                rkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay());

                rval.set(key.getWd());//wd.get()

                context.write(rkey, rval);

                flg ++;

            }

            if (flg != 0 && day != key.getDay())

            {

                rkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay());

                rval.set(key.getWd());//wd.get()

                context.write(rkey, rval);

                break;

            }

        }

    }

}

package test.mr.tq;

import org.apache.hadoop.io.WritableComparable;

import org.apache.hadoop.io.WritableComparator;

public class TqSortComparator extends WritableComparator {

    //对字节数据中map进行排序  所以需要先将Key反序列化为对象  然后再进行比较

    public TqSortComparator()

    {

        super(TQ.class,true);

    }

    /**

     * 按照时间正序  温度倒序对字节数组排序

     */

    @Override

    public int compare(WritableComparable a, WritableComparable b) {

        TQ t1 = (TQ)a;

        TQ t2 = (TQ)b;

        int y = Integer.compare(t1.getYear(), t2.getYear());

        if (y==0)

        {

            int m = Integer.compare(t1.getMonth(), t2.getMonth());

            if (m == 0)

            {

                //前面加一个负号  就可以实现倒序的效果

                return - Integer.compare(t1.getWd(), t2.getWd());

            }

            return m;

        }

        return y;

    }

}

【Hadoop学习之九】MapReduce案例分析一-天气的更多相关文章

【Hadoop学习之十二】MapReduce案例分析四-TF-IDF
环境虚拟机:VMware 10 Linux版本:CentOS-6.5-x86_64 客户端:Xshell4 FTP:Xftp4 jdk8 hadoop-3.1.1 概念TF-IDF(term fre ...
【Hadoop学习之十三】MapReduce案例分析五-ItemCF
环境虚拟机:VMware 10 Linux版本:CentOS-6.5-x86_64 客户端:Xshell4 FTP:Xftp4 jdk8 hadoop-3.1.1 推荐系统——协同过滤(Collab ...
【Hadoop学习之十】MapReduce案例分析二-好友推荐
环境虚拟机:VMware 10 Linux版本:CentOS-6.5-x86_64 客户端:Xshell4 FTP:Xftp4 jdk8 hadoop-3.1.1 最应该推荐的好友TopN,如何排名 ...
【Hadoop学习之十一】MapReduce案例分析三-PageRank
环境虚拟机:VMware 10 Linux版本:CentOS-6.5-x86_64 客户端:Xshell4 FTP:Xftp4 jdk8 hadoop-3.1.1 什么是pagerank?算法原理- ...
Hadoop学习笔记—20.网站日志分析项目案例（一）项目介绍
网站日志分析项目案例(一)项目介绍:当前页面网站日志分析项目案例(二)数据清洗:http://www.cnblogs.com/edisonchou/p/4458219.html 网站日志分析项目案例 ...
Hadoop学习笔记—20.网站日志分析项目案例（二）数据清洗
网站日志分析项目案例(一)项目介绍:http://www.cnblogs.com/edisonchou/p/4449082.html 网站日志分析项目案例(二)数据清洗:当前页面网站日志分析项目案例 ...
Hadoop学习笔记—20.网站日志分析项目案例
1.1 项目来源本次要实践的数据日志来源于国内某技术学习论坛,该论坛由某培训机构主办,汇聚了众多技术学习者,每天都有人发帖.回帖,如图1所示. 图1 项目来源网站-技术学习论坛本次实践的目的就在于 ...
Hadoop学习笔记—20.网站日志分析项目案例（三）统计分析
网站日志分析项目案例(一)项目介绍:http://www.cnblogs.com/edisonchou/p/4449082.html 网站日志分析项目案例(二)数据清洗:http://www.cnbl ...
Hadoop学习笔记—12.MapReduce中的常见算法
一.MapReduce中有哪些常见算法 (1)经典之王:单词计数这个是MapReduce的经典案例,经典的不能再经典了! (2)数据去重 "数据去重"主要是为了掌握和利用并行化思 ...

随机推荐

javascript语法（一）极客时间
脚本和模块 javascript有两种源文件,一种叫脚本,一种叫模块.这个区分主要是在ES6引入的,ES5及之前版本只有一种源文件类型(只有脚本). 脚本是可以有浏览器或者node环境引入执行的,而模 ...
swift 字体自适应，宽高自适应
let kScreenWidth = UIScreen.main.bounds.width let kScreenHeight = UIScreen.main.bounds.height public ...
winform版的dota改键器
从网上找的源码很不齐全,自己补充了一下,有兴趣的可以看看. 首先是建立一个hook类WFChangeKey using System; using System.Reflection; using S ...
林兴爆料小程序很快可以支持各个 App 直接打开小程序
在微信开放平台基础高级产品经理林兴演讲的当场,他爆料了微信小程序一个轰动性新能力:小程序很快可以支持各个 App 直接打开小程序!没错,你没有听错,简单来说,在不久以后,所有的 App 里面都可以看到 ...
萌新接触前端的第三课——JavaScript
JavaScript概述一.JavaScript的历史 1992年Nombas开发出C-minus-minus(C--)的嵌入式脚本语言(最初绑定在CEnvi软件中).后将其改名ScriptEase ...
C++ 常用算法
http://blog.csdn.net/jgzquanquan/article/details/77185711
jquery 无缝轮播
新闻公告无缝轮播--demo 理解:向上移动一个li的高度+margin-bottom值,同时将ul第一个的li插入到ul的最后一个位置. <!DOCTYPE html> <html ...
golang 的 buffered channel 及 unbuffered channel
The channel is divided into two categories: unbuffered and buffered. (1) Unbuffered channelFor unbuf ...
SAP 创建 component
1: 进入x3c 系统,输入 T-CODE BSP_WD_CMPWB 2: 输入以Z开头的组件名. 点击create using wizard 3: 输入应用属性 4: 定义 bol mod ...
通过wui登陆 sap 页面对数据进行高级搜索
1: 登陆QGL系统. 在 T-CODE搜索框输入wui 会跳到搜索的web页面,进行搜索. 或者浏览器输入: https://ldciqgl.wdf.sap.corp:44300/sap(bD1lb ...

【Hadoop学习之九】MapReduce案例分析一-天气

【Hadoop学习之九】MapReduce案例分析一-天气的更多相关文章

随机推荐

热门专题