准备数据access.log

要用到的只有第二个手机号,倒数第三上行流量,倒数第二下行流量

1363157985066   13726230503 00-FD-07-A4-72-B8:CMCC  120.196.100.82  i02.c.aliimg.com        24  27  2481    24681   200
1363157995052   13826544101 5C-0E-8B-C7-F1-E0:CMCC  120.197.40.4            4   0   264 0   200
1363157991076   13926435656 20-10-7A-28-CC-0A:CMCC  120.196.100.99          2   4   132 1512    200
1363154400022   13926251106 5C-0E-8B-8B-B1-50:CMCC  120.197.40.4            4   0   240 0   200
1363157993044   18211575961 94-71-AC-CD-E6-18:CMCC-EASY 120.196.100.99  iface.qiyi.com  视频网站    15  12  1527    2106    200
1363157995074   84138413    5C-0E-8B-8C-E8-20:7DaysInn  120.197.40.4    122.72.52.12        20  16  4116    1432    200
1363157993055   13560439658 C4-17-FE-BA-DE-D9:CMCC  120.196.100.99          18  15  1116    954 200
1363157995033   15920133257 5C-0E-8B-C7-BA-20:CMCC  120.197.40.4    sug.so.360.cn   信息安全    20  20  3156    2936    200
1363157983019   13719199419 68-A1-B7-03-07-B1:CMCC-EASY 120.196.100.82          4   0   240 0   200
1363157984041   13660577991 5C-0E-8B-92-5C-20:CMCC-EASY 120.197.40.4    s19.cnzz.com    站点统计    24  9   6960    690 200
1363157973098   15013685858 5C-0E-8B-C7-F7-90:CMCC  120.197.40.4    rank.ie.sogou.com   搜索引擎    28  27  3659    3538    200
1363157986029   15989002119 E8-99-C4-4E-93-E0:CMCC-EASY 120.196.100.99  www.umeng.com   站点统计    3   3   1938    180 200
1363157992093   13560439658 C4-17-FE-BA-DE-D9:CMCC  120.196.100.99          15  9   918 4938    200
1363157986041   13480253104 5C-0E-8B-C7-FC-80:CMCC-EASY 120.197.40.4            3   3   180 180 200
1363157984040   13602846565 5C-0E-8B-8B-B6-00:CMCC  120.197.40.4    2052.flash2-http.qq.com 综合门户    15  12  1938    2910    200
1363157995093   13922314466 00-FD-07-A2-EC-BA:CMCC  120.196.100.82  img.qfc.cn      12  12  3008    3720    200
1363157982040   13502468823 5C-0A-5B-6A-0B-D4:CMCC-EASY 120.196.100.99  y0.ifengimg.com 综合门户    57  102 7335    110349  200
1363157986072   18320173382 84-25-DB-4F-10-1A:CMCC-EASY 120.196.100.99  input.shouji.sogou.com  搜索引擎    21  18  9531    2412    200
1363157990043   13925057413 00-1F-64-E1-E6-9A:CMCC  120.196.100.55  t3.baidu.com    搜索引擎    69  63  11058   48243   200
1363157988072   13760778710 00-FD-07-A4-7B-08:CMCC  120.196.100.82          2   2   120 120 200
1363157985066   13726238888 00-FD-07-A4-72-B8:CMCC  120.196.100.82  i02.c.aliimg.com        24  27  2481    24681   200
1363157993055   13560436666 C4-17-FE-BA-DE-D9:CMCC  120.196.100.99          18  15  1116    954 200
1363157985066   13726238888 00-FD-07-A4-72-B8:CMCC  120.196.100.82  i02.c.aliimg.com        24  27  10000   20000   200

自定义复杂数据类型

import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/**
 * 自定义复杂数据类型:
 *  (1)需要实现Writable接口
 *  (2)需要实现接口中的write和readFields方法
 *  (3)比较隐蔽的一点,需要定义默认的空构造方法
 * 属性必须覆盖getter/setter方法!
 */
public class Access implements Writable {
    private String phone; //手机号
    private long up; //上行流量
    private long down; //下行流量
    private long sum; //总流量(上行+下行)
    public Access(){}
    public Access(String phone,long up,long down){
        this.phone = phone;
        this.up = up;
        this.down = down;
        this.sum = up + down;
    }
    @Override
    public void write(DataOutput out) throws IOException {
        out.writeUTF(phone); //String
        out.writeLong(up);   //long
        out.writeLong(down);
        out.writeLong(sum);
    }
    @Override
    public void readFields(DataInput in) throws IOException {
        //按照write输入的顺序获取
        this.phone = in.readUTF();
        this.up = in.readLong();
        this.down = in.readLong();
        this.sum = in.readLong();
    }

    // 省略setter/getter...

    @Override
    public String toString() {
        return "Access{" +
                "phone='" + phone + '\'' +
                ", up=" + up +
                ", down=" + down +
                ", sum=" + sum +
                '}';
    }
}

自定义Mapper处理

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;

/**
 * 自定义Mapper处理类
 *      LongWritable:偏移量key
 *      Text:        一行数据value
 *      Text:        手机号作为key
 *      Access:      复杂类型对象作为value
 */
public class AccessMapper extends Mapper<LongWritable, Text,Text,Access> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] words = value.toString().split("\t");
        String phone = words[1]; //取出手机号
        long up = Long.parseLong(words[words.length-3]); //上行流量
        long down = Long.parseLong(words[words.length-2]); //下行流量
        long sum = up + down;   //总流量
        //写入缓存
        context.write(new Text(phone),new Access(phone,up,down));
    }
}

自定义Reducer处理

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;

/**
 * 自定义Reducer处理类
 */
public class AccessReducer extends Reducer<Text,Access,Text,Access> {
    @Override
    protected void reduce(Text key, Iterable<Access> values, Context context) throws IOException, InterruptedException {
        //values里都是手机号相同的对象,即<Access,Access>
        long ups = 0;
        long downs = 0;
        for (Access access : values) {
               ups += access.getUp();
               downs += access.getDown();
        }
        // 写入context
        context.write(key,new Access(key.toString(),ups,downs));
    }
}

编写Driver类

public class AccessLocalApp {
    public static void main(String[] args) throws Exception{
        Configuration conf = new Configuration();
        // 创建一个Job
        Job job = Job.getInstance(conf);
        job.setJarByClass(AccessLocalApp.class);
        // 设置Mapper和Reduer类
        job.setMapperClass(AccessMapper.class);
        job.setReducerClass(AccessReducer.class);
        // 设置Mapper和Reducer输出的key,value的类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Access.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Access.class);
        // 输入,输出路径
        FileInputFormat.setInputPaths(job,new Path("input"));
        FileOutputFormat.setOutputPath(job,new Path("output"));
        // 提交作业Job
        job.waitForCompletion(true);
    }
}

输出结果part-r-00000

13480253104 Access{phone='13480253104', up=180, down=180, sum=360}
13502468823 Access{phone='13502468823', up=7335, down=110349, sum=117684}
13560436666 Access{phone='13560436666', up=1116, down=954, sum=2070}
13560439658 Access{phone='13560439658', up=2034, down=5892, sum=7926}
13602846565 Access{phone='13602846565', up=1938, down=2910, sum=4848}
13660577991 Access{phone='13660577991', up=6960, down=690, sum=7650}
13719199419 Access{phone='13719199419', up=240, down=0, sum=240}
13726230503 Access{phone='13726230503', up=2481, down=24681, sum=27162}
13726238888 Access{phone='13726238888', up=12481, down=44681, sum=57162}
13760778710 Access{phone='13760778710', up=120, down=120, sum=240}
13826544101 Access{phone='13826544101', up=264, down=0, sum=264}
13922314466 Access{phone='13922314466', up=3008, down=3720, sum=6728}
13925057413 Access{phone='13925057413', up=11058, down=48243, sum=59301}
13926251106 Access{phone='13926251106', up=240, down=0, sum=240}
13926435656 Access{phone='13926435656', up=132, down=1512, sum=1644}
15013685858 Access{phone='15013685858', up=3659, down=3538, sum=7197}
15920133257 Access{phone='15920133257', up=3156, down=2936, sum=6092}
15989002119 Access{phone='15989002119', up=1938, down=180, sum=2118}
18211575961 Access{phone='18211575961', up=1527, down=2106, sum=3633}
18320173382 Access{phone='18320173382', up=9531, down=2412, sum=11943}
84138413    Access{phone='84138413', up=4116, down=1432, sum=5548}

重构思路

  • 可以看到,输出结果是Access{...}类型,其实就是toString格式问题,可以修改如下:
public String toString() {
    return phone+","+up+","+down+","+sum;
}
  • 使用NullWritable
// Reducer类型
public class AccessReducer extends Reducer<Text,Access, NullWritable,Access> { ... }
// Reducer输出key
context.write(NullWritable.get(),new Access(key.toString(),ups,downs));
  • 升级Reducer的完整代码
public class AccessReducer extends Reducer<Text,Access, NullWritable,Access> {
    @Override
    protected void reduce(Text key, Iterable<Access> values, Context context) throws IOException, InterruptedException {
        //values里都是手机号相同的对象,即<Access,Access>
        long ups = 0;
        long downs = 0;
        for (Access access : values) {
               ups += access.getUp();
               downs += access.getDown();
        }
        // 写入context
        context.write(NullWritable.get(),new Access(key.toString(),ups,downs));
    }
}

新的输出结果

13480253104,180,180,360
13502468823,7335,110349,117684
13560436666,1116,954,2070
13560439658,2034,5892,7926
13602846565,1938,2910,4848
13660577991,6960,690,7650
13719199419,240,0,240
13726230503,2481,24681,27162
13726238888,12481,44681,57162
13760778710,120,120,240
13826544101,264,0,264
13922314466,3008,3720,6728
13925057413,11058,48243,59301
13926251106,240,0,240
13926435656,132,1512,1644
15013685858,3659,3538,7197
15920133257,3156,2936,6092
15989002119,1938,180,2118
18211575961,1527,2106,3633
18320173382,9531,2412,11943
84138413,4116,1432,5548

这样就拿到我们想要的数据结果了!

自定义Partitioner

需求:将统计结果按照手机号的前缀进行区分,写到不同的文件中。

  • 自定义Partitioner实现
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

/**
 * MapReduce自定义分区规则
 */
public class AccessPartitioner extends Partitioner<Text,Access> {
    /**
     * @param phone 手机号
     */
    @Override
    public int getPartition(Text phone, Access access, int numPartitions) {
        if (phone.toString().startsWith("13")){
            return 0;
        }else if(phone.toString().startsWith("15")){
            return 1;
        }else {
            return 2;
        }
    }
}
  • Driiver参数
// 设置自定义分区规则
job.setPartitionerClass(AccessPartitioner.class);
// 设置Reduce个数
job.setNumReduceTasks(3);
  • 再次运行,就得到了想要的统计结果part-r-00000part-r-00001part-r-00002
13480253104,180,180,360
13502468823,7335,110349,117684
13560436666,1116,954,2070
13560439658,2034,5892,7926
13602846565,1938,2910,4848
13660577991,6960,690,7650
13719199419,240,0,240
13726230503,2481,24681,27162
13726238888,12481,44681,57162
13760778710,120,120,240
13826544101,264,0,264
13922314466,3008,3720,6728
13925057413,11058,48243,59301
13926251106,240,0,240
13926435656,132,1512,1644
15013685858,3659,3538,7197
15920133257,3156,2936,6092
15989002119,1938,180,2118
18211575961,1527,2106,3633
18320173382,9531,2412,11943
84138413,4116,1432,5548

MapReduce流量统计的更多相关文章

  1. Mapreduce的序列化和流量统计程序开发

    一.Hadoop数据序列化的数据类型 Java数据类型 => Hadoop数据类型 int IntWritable float FloatWritable long LongWritable d ...

  2. mapreduce数据处理——统计排序

    接上篇https://www.cnblogs.com/sengzhao666/p/11850849.html 2.数据处理: ·统计最受欢迎的视频/文章的Top10访问次数 (id) ·按照地市统计最 ...

  3. Hadoop_17_MapRduce_案例2_实现用户手机流量统计(ReduceTask并行度控制)

    需求:1.统计每一个用户(手机号)所耗费的总上行流量.下行流量,总流量 1.数据如下:保存为.dat文件(因为以\t切分数据,文件格式必须合适) 1363157985066 13726230503 0 ...

  4. iOS 网络流量统计

    在开发中,有时候需要获取流量统计信息.研究发现:通过函数getifaddrs来得到系统网络接口的信息,网络接口的信息,包含在if_data字段中, 有很多信息, 但我现在只关心ifi_ibytes,  ...

  5. MapReduce 单词统计案例编程

    MapReduce 单词统计案例编程 一.在Linux环境安装Eclipse软件 1.   解压tar包 下载安装包eclipse-jee-kepler-SR1-linux-gtk-x86_64.ta ...

  6. ios 使用可视化工具charles转换pcap文件,进行流量统计(通过tcpdump抓包)

    环境准备:使用mac电脑,下载xcode,Charles 连接iPhone手机,打开xcode-window-devices-查看设备UDID 打开终端:rvictl –s 设备号 ,查看虚拟端口号 ...

  7. 安卓App流量统计

    http://keepcleargas.bitbucket.org/2013/10/12/android-App-Traffic.html 安卓App流量统计 12 OCT 2013 android流 ...

  8. Android流量统计TrafficStats类

    对于Android流量统计来说在2.2版中新加入了TrafficStats类可以轻松获取,其实本身TrafficStats类也是读取Linux提供的文件对象系统类型的文本进行解析. android.n ...

  9. 利用iptables实现基于端口的网络流量统计

    如何统计某个应用的网络流量(包括网络流入量和网络流出量)问题,可以转换成如何基于端口号进行网络流量统计的问题.大部分网络应用程序都是传输层及以上的协议,因此基于端口号(tcp, udp)统计网络流量基 ...

随机推荐

  1. delphi ehLib 安装包下载及安装方法

    1.下载安装包,这里提供一个百度云盘共享链接,D7-XE8都有:https://pan.baidu.com/s/1DTlxok4RiSmDokuabnGvQw2.添加环境变量,菜单"Tool ...

  2. vim主题颜色

    1.VIM主题 查看Vim示例当前的颜色主题 打开一个Vim窗口,输入命令:color或:colorscheme后回车查看当前的颜色主题. Vim实例中设置颜色主题 输入命令"colorsc ...

  3. did not finish being created even after we waited 189 seconds or 61 attempts. And its status is downloading

    did not finish being created even after we waited 189 seconds or 61 attempts. And its status is down ...

  4. 2019余姚培训游记+ZJOJD2划水记

    2019余姚培训游记 突然就想写一个... 注意:以下全是胡言乱语的自high,还有很多错别字 Day 0 来的比较早,早上就到了 上午把一本小说看完了,是一个年轻作者的处女作. 我觉得我第一本书一定 ...

  5. 构建一个maven聚合类型的横向可扩展项目

    那个时候初入java这个大家庭,学习的方向很乱.毕业后,在公司磨练了一年,总想着是该交一份答卷了,可能成绩不会很好,但求及格!那么考试题目呢,我计划搭建一个横向可扩展的项目,可以在平台自扩展各种子项目 ...

  6. Exp1 PC平台逆向破解

    本次实践的对象是一个名为pwn1的linux可执行文件. 该程序正常执行流程是:main调用foo函数,foo函数会简单回显任何用户输入的字符串. 该程序同时包含另一个代码片段,getShell,会返 ...

  7. netty的简单的应用例子

    一.简单的聊天室程序 public class ChatClient { public static void main(String[] args) throws InterruptedExcept ...

  8. Apache Shiro Java反序列化漏洞分析

    1. 前言 最近工作上刚好碰到了这个漏洞,当时的漏洞环境是: shiro-core 1.2.4 commons-beanutils 1.9.1 最终利用ysoserial的CommonsBeanuti ...

  9. IntelliJ IDEA 2018最新版注册码激活方法

    一.首先点击intellij idea 2018 二.选择激活码 三.输入以下激活码intellij idea 2018 最新版本 注册激活码 **************************** ...

  10. photoshop关于图层的一些操作,几乎全部操作

    千里之行始于足下,ps如果想要有上升的空间,还是扎实基础,自从看了那本phshop从入门到精通就很少学了,也错过了很多知识,其实还是有很多的不明白.期待进一步的思考和解惑. 首先来说第一个知识点: 1 ...