MapReduce TopN（自主复习）

1.MyTopN 主程序

package com.littlepage.topn;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;

import java.io.IOException;

public class MyTopN {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        Configuration conf=new Configuration(true);

        String[] other=new GenericOptionsParser(conf,args).getRemainingArgs();

        //设定本地环境运行，不进行集群运行

        conf.set("mapreduce.framework.name","local");

        //设定异构平台

        conf.set("mapreduce.app-submission.cross-platform","true");

        Job job=Job.getInstance(conf);

        job.setJarByClass(MyTopN.class);

        job.setJobName("TopN");

        //核心

        //map task

        //input,output

        TextInputFormat.addInputPath(job,new Path(other[]));

        Path outPath=new Path(other[]);

        if(outPath.getFileSystem(conf).exists(outPath)){

            outPath.getFileSystem(conf).delete(outPath,true);

        }

        //map

        job.setMapperClass(TopNMapper.class);

        job.setMapOutputKeyClass(TopNKey.class);

        job.setMapOutputValueClass(IntWritable.class);

        //partitioner

        //只需要满足相同的key获得相同的分区号

        job.setPartitionerClass(TopNPartitioner.class);

        //sortComparator

        job.setSortComparatorClass(TopNSortComparator.class);

        //combine

        //reducetask

        job.setReducerClass(TopNReducer.class);

        //groupingComparator

        job.setGroupingComparatorClass(TopNGroupingComparator.class);

        //output

        TextOutputFormat.setOutputPath(job,outPath);

        job.waitForCompletion(true);

    }

}

2.TopNKey

package com.littlepage.topn;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;

import java.io.DataOutput;

import java.io.IOException;

/**

 * 自定义类型必须实现接口：

 * 序列化/反序列化   比较器

 */

public class TopNKey implements WritableComparable<TopNKey> {

    private int year;

    private int month;

    private int day;

    private int template;

    public int getYear() {

        return year;

    }

    public void setYear(int year) {

        this.year = year;

    }

    public int getMonth() {

        return month;

    }

    public void setMonth(int month) {

        this.month = month;

    }

    public int getDay() {

        return day;

    }

    public void setDay(int day) {

        this.day = day;

    }

    public int getTemplate() {

        return template;

    }

    public void setTemplate(int template) {

        this.template = template;

    }

    @Override

    public void write(DataOutput out) throws IOException {

        out.writeInt(year);

        out.writeInt(month);

        out.writeInt(day);

        out.writeInt(template);

    }

    @Override

    public void readFields(DataInput in) throws IOException {

        this.year = in.readInt();

        this.month = in.readInt();

        this.day = in.readInt();

        this.template = in.readInt();

    }

    @Override

    public int compareTo(TopNKey that) {

        int c1 = Integer.compare(this.year,that.getYear());

        if(c1==){

            int c2 = Integer.compare(this.month,that.getMonth());

            if(c2 == ){

                return Integer.compare(this.day,that.getDay());

            }

            return c2;

        }

        return c1;

    }

}

3.TopNMapper

package com.littlepage.topn;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.util.StringUtils;

import java.io.IOException;

import java.text.ParseException;

import java.text.SimpleDateFormat;

import java.util.Calendar;

import java.util.Date;

public class TopNMapper extends Mapper<LongWritable, Text,TopNKey, IntWritable> {

    TopNKey topNKey = new TopNKey();

    IntWritable intWritable = new IntWritable();

    @Override

    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        //开发习惯

        //value: 2019-6-1 22:22:22   1    31

        String[] strs = StringUtils.split(value.toString(), '\t');

        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");

        try{

            Date date = sdf.parse(strs[]);

            Calendar cal=Calendar.getInstance();

            cal.setTime(date);

            topNKey.setYear(cal.get(Calendar.YEAR));

            topNKey.setMonth(cal.get(Calendar.MONTH)+);

            topNKey.setDay(cal.get(Calendar.DAY_OF_MONTH));

            int template=Integer.parseInt(strs[]);

            topNKey.setTemplate(template);

            intWritable.set(template);

            context.write(topNKey,intWritable);

        }catch (ParseException e){

            e.printStackTrace();

        }

    }

}

4.TopNReducer

package com.littlepage.topn;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

import java.util.Iterator;

public class TopNReducer extends Reducer<TopNKey, IntWritable, Text,IntWritable> {

    Text rkey=new Text();

    IntWritable rval=new IntWritable();

    int flag=;

    int day=;

    @Override

    protected void reduce(TopNKey key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

        Iterator<IntWritable> iter = values.iterator();

        while(iter.hasNext()){

            IntWritable val=iter.next();

            if(flag==){

                rkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay());

                rval.set(key.getTemplate());

                context.write(rkey,rval);

                flag++;

                day=key.getDay();

            }

            if(flag!=&&day!=key.getDay()){

                rkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay());

                rval.set(key.getTemplate());

                context.write(rkey,rval);

                break;

            }

        }

    }

}

5.TopNPartitioner 分区规划，来划分Map之后的结果是存在哪个dn进行处理

package com.littlepage.topn;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.mapreduce.Partitioner;

public class TopNPartitioner extends Partitioner<TopNKey,IntWritable> {

    @Override

    public int getPartition(TopNKey key, IntWritable value, int numPartitions) {

        //1.不能太复杂

        //2.缩小组的维度

        return key.getYear()%numPartitions;//可能会产生数据倾斜

    }


}

6.TopNSortComparator 排序比较器，在Map中精确到月，按温度递减

package com.littlepage.topn;

import org.apache.hadoop.io.RawComparator;

import org.apache.hadoop.io.WritableComparable;

import org.apache.hadoop.io.WritableComparator;

public class TopNSortComparator extends WritableComparator {

    public  TopNSortComparator(){

        super(TopNKey.class,true);

    }

    @Override

    public int compare(WritableComparable a, WritableComparable b) {

        TopNKey k1=(TopNKey) a;

        TopNKey k2=(TopNKey) b;

        //年，月，温度，且温度倒序

        int c1=Integer.compare(k1.getYear(),k2.getYear());

        if(c1==){

            int c2=Integer.compare(k1.getMonth(),k2.getMonth());

            if(c2==){

                return -Integer.compare(k1.getTemplate(),k2.getTemplate());

            }

            return c2;

        }

        return c1;

    }

}

7.TopNGroupingComparator 分组比较器，用于reduce的分组，每一个组是年月，进行reduce操作

package com.littlepage.topn;

import org.apache.hadoop.io.WritableComparable;

import org.apache.hadoop.io.WritableComparator;

public class TopNGroupingComparator extends WritableComparator {

    public TopNGroupingComparator() {

        super(TopNKey.class, true);

    }

    @Override

    public int compare(WritableComparable a, WritableComparable b) {

        TopNKey k1 = (TopNKey) a;

        TopNKey k2 = (TopNKey) b;

        //年，月

        int c1 = Integer.compare(k1.getYear(), k2.getYear());

        if (c1 == ) {

            return Integer.compare(k1.getMonth(), k2.getMonth());

        }

        return c1;

    }

}

TopN案例是MapReduce的典型案例，需牢记

MapReduce TopN（自主复习）的更多相关文章

MapReduce计数程序（自主复习）
1.MyWordCount类注意: 1.本机+测试,两个注释都放开 2.本机跑集群,要开异构平台为true 3.集群跑,把两个注释都注起来,然后在集群上面跑 package com.littlepa ...
Before NOIP 2018
目录总结刷题 2018 - 9 - 24 2018 - 9 - 25 2018 - 9 - 26 2018 - 9 - 27 2018 - 9 - 28 2018 - 9 - 29 2018 - ...
C高级第一次PTA作业要求三
要求一.要求二内容链接:http://www.cnblogs.com/X-JY/p/8550457.html 一.PTA作业中的知识点总结 1.6-1 计算两数的和与差(10 分) (1)*在程序中 ...
mapreduce的cleanUp和setUp的特殊用法（TopN问题）和常规用法
一:特殊用法我们上来不讲普通用法,普通用法放到最后.我们来谈一谈特殊用法,了解这一用法,让你的mapreduce编程能力提高一个档次,毫不夸张!!!扯淡了,让我们进入正题: 我们知道reduce和m ...
Hadoop基础-Map端链式编程之MapReduce统计TopN示例
Hadoop基础-Map端链式编程之MapReduce统计TopN示例作者:尹正杰版权声明:原创作品,谢绝转载!否则将追究法律责任. 一.项目需求对“temp.txt”中的数据进行分析,统计出各 ...
Hadoop学习之路（二十）MapReduce求TopN
前言在Hadoop中,排序是MapReduce的灵魂,MapTask和ReduceTask均会对数据按Key排序,这个操作是MR框架的默认行为,不管你的业务逻辑上是否需要这一操作. 技术点 MapR ...
大数据学习——mapreduce学习topN问题
求每一个订单中成交金额最大的那一笔 top1 数据 Order_0000001,Pdt_01,222.8 Order_0000001,Pdt_05,25.8 Order_0000002,Pdt_05 ...
大数据mapreduce全局排序top-N之python实现
a.txt.b.txt文件如下: a.txt hadoop hadoop hadoop hadoop hadoop hadoop hadoop hadoop hadoop hadoop hadoop ...
hadoop mapreduce求解有序TopN（高效模式）
1.在map阶段对数据先求解改分片的topN,到reduce阶段再合并求解一次,求解过程利用TreeMap的排序特性,不用自己写算法. 2.样板数据,类似如下 1 13682846555 192.16 ...

随机推荐

Java内存泄漏分析和预防
1. 什么是内存泄漏?有什么危害书面说法: 内存泄漏:对象已经没有被应用程序使用,但是垃圾回收器没办法移除它们,因为还在被引用着. 在Java中,内存泄漏就是存在一些被分配的对象,这些对象有下面两个 ...
1436：数列分段II
1436:数列分段II 题解二分答案我们最终答案的取值区间是[ max(a[i]) , ∑a[i] ] 设定 l=max(a[i]) , r=∑a[i] , mid不断二分 mid表示 ...
virtualbox安装xp虚拟机缺少驱动
下载驱动精灵完全版,自带万能驱动
nodejs之mongodb操作
声明: 当查询到数据库数据后,对数据库数据进行遍历,可以采用toArray()函数,具体实现可以看第六点 1.本地安装mongodb 安装包:https://www.mongodb.com/downl ...
WPF 模拟迅雷TabControl界面
WPF模拟迅雷TabControl界面点击查看下载  <Style x:Key="TabControlStyle" Tar ...
三十六：数据库之SQLAlchemy外建之一对一关系
relationship()的uselist参数默认为True,即一对多,如果要一对一,则需让uselist=False 准备工作 from sqlalchemy import create_engi ...
python UI自动化之JS定位
1.话不多说,直接贴入代码上面的 document.getElementById 可以替换成别的定位方式,比如: 通过name获取:document.getElementsByName 通过标签获取 ...
Oracle 安装 RAC 11.2.0.4 centos7.4 -udev磁盘绑定/执行root脚本报错
在centos 7.4上安装oracle rac 11.2.0.4 报错及相关解决 $ cat /etc/redhat-release CentOS Linux release 7.4.1708 (C ...
git clone 指定分支操作
服务器迁移,而且原来本地开发是在同一个目录中切换不同的分支,感觉有点挫,于是打算一个文件目录对应一个分支,这样不会有太大的文件差异. 记录下来本次操作,可能以后还会用到. git初始化一般是这样. g ...
关机命令 shutdown
参考资料:[http://jingyan.baidu.com/article/49ad8bce705f3f5834d8faec.html]

MapReduce TopN（自主复习）

1.MyTopN 主程序

2.TopNKey

3.TopNMapper

4.TopNReducer

5.TopNPartitioner 分区规划，来划分Map之后的结果是存在哪个dn进行处理

6.TopNSortComparator 排序比较器，在Map中精确到月，按温度递减

7.TopNGroupingComparator 分组比较器，用于reduce的分组，每一个组是年月，进行reduce操作

MapReduce TopN（自主复习）的更多相关文章

随机推荐

热门专题