Hadoop MapReduce编程 API入门系列之挖掘气象数据版本1（一）

　　　　下面是版本2。

Hadoop MapReduce编程 API入门系列之挖掘气象数据版本2（九）

这篇博客，给大家，体会不一样的版本编程。

代码

package zhouls.bigdata.myMapReduce.weather;

import java.io.DataInput;

import java.io.DataOutput;

import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class MyKey implements WritableComparable<MyKey>{

    //WritableComparable,实现这个方法，要多很多

    //readFields是读入，write是写出

    private int year;

    private int month;

    private double hot;

    public int getYear() {

    return year;

}

    public void setYear(int year) {

        this.year = year;

    }

    public int getMonth() {

        return month;

    }

    public void setMonth(int month) {

        this.month = month;

    }

    public double getHot() {

        return hot;

    }

    public void setHot(double hot) {

        this.hot = hot;

        }//这一大段的get和set，可以右键，source，产生get和set，自动生成。

    public void readFields(DataInput arg0) throws IOException { //反序列化

        this.year=arg0.readInt();

        this.month=arg0.readInt();

        this.hot=arg0.readDouble();

    }

    public void write(DataOutput arg0) throws IOException { //序列化

        arg0.writeInt(year);

        arg0.writeInt(month);

        arg0.writeDouble(hot);

    }

    //判断对象是否是同一个对象，当该对象作为输出的key

    public int compareTo(MyKey o) {

        int r1 =Integer.compare(this.year, o.getYear());//比较当前的年和你传过来的年

        if(r1==){

        int r2 =Integer.compare(this.month, o.getMonth());

        if(r2==){

            return Double.compare(this.hot, o.getHot());

        }else{

            return r2;

        }

        }else{

            return r1;

        }

    }

}

package zhouls.bigdata.myMapReduce.weather;

import org.apache.hadoop.io.DoubleWritable;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;

public class MyPartitioner extends HashPartitioner<MyKey, DoubleWritable>{//这里就是洗牌

    //执行时间越短越好

    public int getPartition(MyKey key, DoubleWritable value, int numReduceTasks) {

        return (key.getYear()-)%numReduceTasks;//对于一个数据集，找到最小，1949

    }

}

//1949-10-01 14:21:02    34c

//1949-10-02 14:01:02    36c

//1950-01-01 11:21:02    32c

//1950-10-01 12:21:02    37c

//1951-12-01 12:21:02    23c

//1950-10-02 12:21:02    41c

//1950-10-03 12:21:02    27c

//1951-07-01 12:21:02    45c

//1951-07-02 12:21:02    46c

//1951-07-03 12:21:03    47c

package zhouls.bigdata.myMapReduce.weather;

import org.apache.hadoop.io.WritableComparable;

import org.apache.hadoop.io.WritableComparator;

public class MySort extends WritableComparator{

    public MySort(){

        super(MyKey.class,true);//把MyKey传进了

    }

    public int compare(WritableComparable a, WritableComparable b) {//这是排序的精髓

        MyKey k1 =(MyKey) a;

        MyKey k2 =(MyKey) b;

        int r1 =Integer.compare(k1.getYear(), k2.getYear());

        if(r1==){//年相同

        int r2 =Integer.compare(k1.getMonth(), k2.getMonth());

        if(r2==){//月相同

            return -Double.compare(k1.getHot(), k2.getHot());//比较气温

        }else{

            return r2;

        }

        }else{

            return r1;

        }

    }

}

package zhouls.bigdata.myMapReduce.weather;

import org.apache.hadoop.io.WritableComparable;

import org.apache.hadoop.io.WritableComparator;

public class MyGroup extends WritableComparator{

    public MyGroup(){

        super(MyKey.class,true);//把MyKey传进了

}

    public int compare(WritableComparable a, WritableComparable b) {//这是分组的精髓

        MyKey k1 =(MyKey) a;

        MyKey k2 =(MyKey) b;

        int r1 =Integer.compare(k1.getYear(), k2.getYear());

    if(r1==){

        return Integer.compare(k1.getMonth(), k2.getMonth());

    }else{

        return r1;

    }

    }

}

package zhouls.bigdata.myMapReduce.weather;

import java.io.IOException;

import java.text.ParseException;

import java.text.SimpleDateFormat;

import java.util.Calendar;

import java.util.Date;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.DoubleWritable;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class RunJob {

//    1949-10-01 14:21:02    34c WeatherMapper

//    1949-10-02 14:01:02    36c

//    1950-01-01 11:21:02    32c    分区在MyPartitioner.java

//    1950-10-01 12:21:02    37c

//    1951-12-01 12:21:02    23c    排序在MySort.java

//    1950-10-02 12:21:02    41c

//    1950-10-03 12:21:02    27c    分组在MyGroup.java

//    1951-07-01 12:21:02    45c

//    1951-07-02 12:21:02    46c    再，WeatherReducer

//    1951-07-03 12:21:03    47c

//key：每行第一个隔开符（制表符）左边为key，右边为value    自定义类型MyKey，洗牌，

    static class WeatherMapper extends Mapper<Text, Text, MyKey, DoubleWritable>{

    SimpleDateFormat sdf =new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

    NullWritable v =NullWritable.get();

//    1949-10-01 14:21:02是自定义类型MyKey，即key

//    34c是DoubleWritable，即value

    protected void map(Text key, Text value,Context context) throws IOException, InterruptedException {

    try {

        Date date =sdf.parse(key.toString());

        Calendar c =Calendar.getInstance();

        //Calendar 类是一个抽象类，可以通过调用 getInstance() 静态方法获取一个 Calendar 对象，

        //此对象已由当前日期时间初始化，即默认代表当前时间，如 Calendar c = Calendar.getInstance();

        c.setTime(date);

        int year =c.get(Calendar.YEAR);

        int month =c.get(Calendar.MONTH);

        double hot =Double.parseDouble(value.toString().substring(, value.toString().lastIndexOf("c")));

        MyKey k =new MyKey();

        k.setYear(year);

        k.setMonth(month);

        k.setHot(hot);

        context.write(k, new DoubleWritable(hot));

    } catch (Exception e) {

        e.printStackTrace();

    }

    }

}

    static class WeatherReducer extends Reducer<MyKey, DoubleWritable, Text, NullWritable>{

    protected void reduce(MyKey arg0, Iterable<DoubleWritable> arg1,Context arg2)throws IOException, InterruptedException {

        int i=;

        for(DoubleWritable v :arg1){

        i++;

        String msg =arg0.getYear()+"\t"+arg0.getMonth()+"\t"+v.get();//"\t"是制表符

        arg2.write(new Text(msg), NullWritable.get());

                if(i==){

                    break;

                }

        }

    }

}

public static void main(String[] args) {

    Configuration config =new Configuration();

//    config.set("fs.defaultFS", "hdfs://HadoopMaster:9000");

//    config.set("yarn.resourcemanager.hostname", "HadoopMaster");

//    config.set("mapred.jar", "C:\\Users\\Administrator\\Desktop\\wc.jar");

//    config.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", ",");//默认分隔符是制表符"\t"，这里自定义，如","

    try {

        FileSystem fs =FileSystem.get(config);

        Job job =Job.getInstance(config);

        job.setJarByClass(RunJob.class);

        job.setJobName("weather");

        job.setMapperClass(WeatherMapper.class);

        job.setReducerClass(WeatherReducer.class);

        job.setMapOutputKeyClass(MyKey.class);

        job.setMapOutputValueClass(DoubleWritable.class);

        job.setPartitionerClass(MyPartitioner.class);

        job.setSortComparatorClass(MySort.class);

        job.setGroupingComparatorClass(MyGroup.class);

        job.setNumReduceTasks();

        job.setInputFormatClass(KeyValueTextInputFormat.class);

//    FileInputFormat.addInputPath(job, new Path("hdfs://HadoopMaster:9000/weather.txt"));//输入路径，下有weather.txt

//

//    Path outpath =new Path("hdfs://HadoopMaster:9000/out/weather");

        FileInputFormat.addInputPath(job, new Path("./data/weather.txt"));//输入路径，下有weather.txt

    Path outpath =new Path("./out/weather");

    if(fs.exists(outpath)){

        fs.delete(outpath, true);

    }

    FileOutputFormat.setOutputPath(job, outpath);

        boolean f= job.waitForCompletion(true);

        if(f){

        }

    } catch (Exception e) {

        e.printStackTrace();

    }

    }

}

欢迎大家，加入我的微信公众号：大数据躺过的坑

同时，大家可以关注我的个人博客：

http://www.cnblogs.com/zlslch/ 和 http://www.cnblogs.com/lchzls/

以及对应本平台的QQ群：161156071（大数据躺过的坑）

Hadoop MapReduce编程 API入门系列之挖掘气象数据版本3（九）的更多相关文章

Hadoop MapReduce编程 API入门系列之挖掘气象数据版本2（十）
下面,是版本1. Hadoop MapReduce编程 API入门系列之挖掘气象数据版本1(一) 这篇博文,包括了,实际生产开发非常重要的,单元测试和调试代码.这里不多赘述,直接送上代码. MRUni ...
Hadoop MapReduce编程 API入门系列之统计学生成绩版本2（十八）
不多说,直接上代码. 统计出每个年龄段的男.女学生的最高分这里,为了空格符的差错,直接,我们有时候,像如下这样的来排数据. 代码 package zhouls.bigdata.myMapRedu ...
Hadoop MapReduce编程 API入门系列之压缩和计数器（三十）
不多说,直接上代码. Hadoop MapReduce编程 API入门系列之小文件合并(二十九) 生成的结果,作为输入源. 代码 package zhouls.bigdata.myMapReduce. ...
Hadoop MapReduce编程 API入门系列之join（二十六）（未完）
不多说,直接上代码. 天气记录数据库 Station ID Timestamp Temperature 气象站数据库 Station ID Station Name 气象站和天气记录合并之后的示意图如 ...
Hadoop MapReduce编程 API入门系列之MapReduce多种输入格式（十七）
不多说,直接上代码. 代码 package zhouls.bigdata.myMapReduce.ScoreCount; import java.io.DataInput; import java.i ...
Hadoop MapReduce编程 API入门系列之自定义多种输入格式数据类型和排序多种输出格式（十一）
推荐 MapReduce分析明星微博数据 http://git.oschina.net/ljc520313/codeexample/tree/master/bigdata/hadoop/mapredu ...
Hadoop MapReduce编程 API入门系列之wordcount版本1（五）
这个很简单哈,编程的版本很多种. 代码版本1 package zhouls.bigdata.myMapReduce.wordcount5; import java.io.IOException; im ...
Hadoop MapReduce编程 API入门系列之薪水统计（三十一）
不多说,直接上代码. 代码 package zhouls.bigdata.myMapReduce.SalaryCount; import java.io.IOException; import jav ...
Hadoop MapReduce编程 API入门系列之Crime数据分析（二十五）（未完）
不多说,直接上代码. 一共12列,我们只需提取有用的列:第二列(犯罪类型).第四列(一周的哪一天).第五列(具体时间)和第七列(犯罪场所). 思路分析基于项目的需求,我们通过以下几步完成: 1.首先 ...

随机推荐

安卓桌布显示的dip和px
安卓程序设计界面显示设置图像大小,在layout.xml里面有dip和px选项,dip为什么暂时还不知道,或许是设计桌布的设定像素比率,px为像素值: 比如我的手机是 Lenovo K920,屏幕 ...
vue货币格式化组件、局部过滤功能以及全局过滤功能
一.在这里介绍一个vue的时间格式化插件: moment 使用方法: .npm install moment --save. 2 定义时间格式化全局过滤器在main.js中导入组件 import ...
团体程序设计天梯赛-练习集-L1-034. 点赞
L1-034. 点赞时间限制 200 ms 内存限制 65536 kB 代码长度限制 8000 B 判题程序 Standard 作者陈越微博上有个“点赞”功能,你可以为你喜欢的博文点个赞表示支持 ...
Android 性能测试初探（四）
书接上文 Android 性能测试初探(三) 自从 cpu及内存后,GPU 这个词对于 PC 性能测试者也不陌生了,什么 3Dmax,安兔兔之类的第三方软件让 GPU 在移动端性能测试领域都知晓,但对 ...
Docker拉取images时报错Error response from daemon
docker拉取redis时,抛出以下错误: [master@localhost ~]$ docker pull redis Using default tag: latest Error respo ...
PAT_A1111#Online Map
Source: PAT A1111 Online Map (30 分) Description: Input our current position and a destination, an on ...
Linux基础知识和命令
1,用户登录: root(管理员),权限最大.安全生产必要时再使用. 普通用户:权限有限.2, 终端终端设备终端:输入和输出设备,如:键盘鼠标显示器; 图形终端:虚拟机进入窗 ...
Linux之部署虚拟环境、安装系统
本章涵盖了Linux的优势和哲学思想,零基础详细记录了部署虚拟环境安装Linux系统,完整演示了VM虚拟机的安装与配置过程,以及Centos 7系统的安装.配置过程和初始化方法. Linux优势分析: ...
25.partial update内置乐观锁并发控制
主要知识点 (1)partial update内置乐观锁并发控制 (2)retry_on_conflict post /index/type/id/_update?retry_on_confl ...
QT5的模块介绍【摘】
Qt 5 模块分为 Essentials Modules 和 Add-on Modules 两部分.前者是基础模块,在所有平台上都可用:后者是扩展模块,建立在基础模块的基础之上,在能够运行 Qt 的平 ...

Hadoop MapReduce编程 API入门系列之挖掘气象数据版本3（九）

Hadoop MapReduce编程 API入门系列之挖掘气象数据版本1（一）

Hadoop MapReduce编程 API入门系列之挖掘气象数据版本2（九）

Hadoop MapReduce编程 API入门系列之挖掘气象数据版本3（九）的更多相关文章

随机推荐

热门专题