Hadoop MapReduce编程 API入门系列之统计学生成绩版本2（十八）

　　不多说，直接上代码。

　　统计出每个年龄段的男、女学生的最高分

　　这里，为了空格符的差错，直接，我们有时候，像如下这样的来排数据。

代码

package zhouls.bigdata.myMapReduce.Gender;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapred.JobConf;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Partitioner;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

/**

*

* @function 统计不同年龄段内 男、女最高分数

*

*

*/

/*

Alice<tab>23<tab>female<tab>45

Bob<tab>34<tab>male<tab>89

Chris<tab>67<tab>male<tab>97

Kristine<tab>38<tab>female<tab>53

Connor<tab>25<tab>male<tab>27

Daniel<tab>78<tab>male<tab>95

James<tab>34<tab>male<tab>79

Alex<tab>52<tab>male<tab>69

Nancy<tab>7<tab>female<tab>98

Adam<tab>9<tab>male<tab>37

Jacob<tab>7<tab>male<tab>23

Mary<tab>6<tab>female<tab>93

Clara<tab>87<tab>female<tab>72

Monica<tab>56<tab>female<tab>92

*/

public class Gender extends Configured implements Tool {

/*

*

* @function Mapper 解析输入数据，然后按需求输出

* @input key=行偏移量 value=学生数据

* @output key=gender value=name+age+score

*

*/

public static class PCMapper extends Mapper<Object, Text, Text, Text>

{

public void map(Object key, Text value, Context context) throws IOException, InterruptedException

{//拿Alice<tab>23<tab>female<tab>45

String[] tokens = value.toString().split("<tab>");//使用分隔符<tab>，将数据解析为数组 tokens

//得到Alice    23    female    45

//即tokens[0] tokens[1] tokens[2] tokens[3]

String gender = tokens[].toString();//性别

String nameAgeScore = tokens[] + "\t" + tokens[] + "\t"+ tokens[];

//输出 key=gender value=name+age+score

//输出 key=female value=Alice    +23+45

context.write(new Text(gender), new Text(nameAgeScore));//将 （female ， Alice+ 23+ 45) 写入到context中

}

}

public static class MyHashPartitioner extends Partitioner<Text, Text>

{

/** Use {@link Object#hashCode()} to partition. */

@Override

public int getPartition(Text key, Text value,int numReduceTasks)

{

return (key.hashCode()) % numReduceTasks;

}

}

/**

*

* @function Partitioner 根据 age 选择 reduce 分区

*

*/

public static class PCPartitioner extends Partitioner<Text, Text>

{

@Override

public int getPartition(Text key, Text value, int numReduceTasks)

{

// TODO Auto-generated method stub

String[] nameAgeScore = value.toString().split("\t");

String age = nameAgeScore[];//学生年龄

int ageInt = Integer.parseInt(age);//按年龄段分区

// 默认指定分区 0

if (numReduceTasks == )

return ;

//年龄小于等于20，指定分区0

if (ageInt <= ) {

return ;

}

// 年龄大于20，小于等于50，指定分区1

if (ageInt >  && ageInt <= ) {

return  % numReduceTasks;

}

// 剩余年龄，指定分区2

else

return  % numReduceTasks;

}

}

/**

*

* @function 定义Combiner 合并 Mapper 输出结果

*

*/

public static class PCCombiner extends Reducer<Text, Text, Text, Text>

{

private Text text = new Text();

public void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException

{

int maxScore = Integer.MIN_VALUE;

String name = " ";

String age = " ";

int score = ;

for (Text val : values)

{

String[] valTokens = val.toString().split("\\t");

score = Integer.parseInt(valTokens[]);

if (score > maxScore)

{

name = valTokens[];

age = valTokens[];

maxScore = score;

}

}

text.set(name + "\t" + age + "\t" + maxScore);

context.write(key, text);

}

}

/*

*

* @function Reducer 统计出 不同年龄段、不同性别 的最高分

* input key=gender value=name+age+score

* output key=name value=age+gender+score

*

*/

static class PCReducer extends Reducer<Text, Text, Text, Text>

{

@Override

public void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException

{

int maxScore = Integer.MIN_VALUE;

String name = " ";

String age = " ";

String gender = " ";

int score = ;

// 根据key，迭代 values 集合，求出最高分

for (Text val : values)

{

String[] valTokens = val.toString().split("\\t");

score = Integer.parseInt(valTokens[]);

if (score > maxScore)

{

name = valTokens[];

age = valTokens[];

gender = key.toString();

maxScore = score;

}

}

context.write(new Text(name), new Text("age- " + age + "\t" + gender + "\tscore-" + maxScore));

}

}

/**

* @function 任务驱动方法

* @param args

* @return

* @throws Exception

*/

@Override

public int run(String[] args) throws Exception

{

// TODO Auto-generated method stub

Configuration conf = new Configuration();//读取配置文件

Path mypath = new Path(args[]);

FileSystem hdfs = mypath.getFileSystem(conf);

if (hdfs.isDirectory(mypath))

{

hdfs.delete(mypath, true);

}

@SuppressWarnings("deprecation")

Job job = new Job(conf, "gender");//新建一个任务

job.setJarByClass(Gender.class);//主类

job.setMapperClass(PCMapper.class);//Mapper

job.setReducerClass(PCReducer.class);//Reducer

job.setPartitionerClass(MyHashPartitioner.class);

//job.setPartitionerClass(PCPartitioner.class);//设置Partitioner类

job.setNumReduceTasks();// reduce个数设置为3

job.setMapOutputKeyClass(Text.class);//map 输出key类型

job.setMapOutputValueClass(Text.class);//map 输出value类型

job.setCombinerClass(PCCombiner.class);//设置Combiner类

job.setOutputKeyClass(Text.class);//输出结果 key类型

job.setOutputValueClass(Text.class);//输出结果 value 类型

FileInputFormat.addInputPath(job, new Path(args[]));// 输入路径

FileOutputFormat.setOutputPath(job, new Path(args[]));// 输出路径

job.waitForCompletion(true);//提交任务

return ;

}

/**

* @function main 方法

* @param args

* @throws Exception

*/

public static void main(String[] args) throws Exception

{

//    String[] args0 = {

//    "hdfs://HadoopMaster:9000/gender/gender.txt",

//    "hdfs://HadoopMaster:9000/out/partition/" };

String[] args0 = {

"./data/gender/gender.txt",

"./out/gender" };

int ec = ToolRunner.run(new Configuration(),new Gender(), args0);

System.exit(ec);

}

}

　　　　或者

　　　　代码

package com.dajiangtai.hadoop.junior;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapred.JobConf;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Partitioner;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

/**

 *

 * @function 统计不同年龄段内    男、女最高分数

 * @author zhouls

 *

 */

 /*

Alice<tab>23<tab>female<tab>45

Bob<tab>34<tab>male<tab>89

Chris<tab>67<tab>male<tab>97

Kristine<tab>38<tab>female<tab>53

Connor<tab>25<tab>male<tab>27

Daniel<tab>78<tab>male<tab>95

James<tab>34<tab>male<tab>79

Alex<tab>52<tab>male<tab>69

Nancy<tab>7<tab>female<tab>98

Adam<tab>9<tab>male<tab>37

Jacob<tab>7<tab>male<tab>23

Mary<tab>6<tab>female<tab>93

Clara<tab>87<tab>female<tab>72

Monica<tab>56<tab>female<tab>92

*/

public class Gender extends Configured implements Tool {

    /*

     *

     * @function Mapper 解析输入数据，然后按需求输出

     * @input  key=行偏移量   value=学生数据

     * @output key=gender  value=name+age+score

     *

     */

    public static class PCMapper extends Mapper<Object, Text, Text, Text>

    {

        public void map(Object key, Text value, Context context) throws IOException, InterruptedException

        {//拿Alice<tab>23<tab>female<tab>45

            String[] tokens = value.toString().split("<tab>");//使用分隔符<tab>，将数据解析为数组 tokens

                            //得到Alice        23         female            45

                            //即tokens[0]   tokens[1]  tokens[2]  tokens[3]

            String gender = tokens[].toString();//性别

            String nameAgeScore = tokens[] + "\t" + tokens[] + "\t"+ tokens[];

            //输出  key=gender  value=name+age+score

            //输出     key=female  value=Alice    +23+45

            context.write(new Text(gender), new Text(nameAgeScore));//将 （female ， Alice+  23+ 45) 写入到context中

        }

    }

    public static class MyHashPartitioner extends Partitioner<Text, Text>

    {

          /** Use {@link Object#hashCode()} to partition. */

          @Override

          public int getPartition(Text key, Text value,int numReduceTasks)

          {

            return (key.hashCode()) % numReduceTasks;

          }

        }

    /**

     *

     * @function Partitioner 根据 age 选择 reduce 分区

     *

     */

    public static class PCPartitioner extends Partitioner<Text, Text>

    {

        @Override

        public int getPartition(Text key, Text value, int numReduceTasks)

        {

            // TODO Auto-generated method stub

            String[] nameAgeScore = value.toString().split("\t");

            String age = nameAgeScore[];//学生年龄

            int ageInt = Integer.parseInt(age);//按年龄段分区

            // 默认指定分区 0

            if (numReduceTasks == )

                return ;

            //年龄小于等于20，指定分区0

            if (ageInt <= ) {

                return ;

            }

            // 年龄大于20，小于等于50，指定分区1

            if (ageInt >  && ageInt <= ) {

                return  % numReduceTasks;

            }

            // 剩余年龄，指定分区2

            else

                return  % numReduceTasks;

        }

    }

    /**

     *

     * @function 定义Combiner 合并 Mapper 输出结果

     *

     */

    public static class PCCombiner extends Reducer<Text, Text, Text, Text>

    {

        private Text text = new Text();

        public void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException

        {

            int maxScore = Integer.MIN_VALUE;

            String name = " ";

            String age = " ";

            int score = ;

            for (Text val : values)

            {

                String[] valTokens = val.toString().split("\\t");

                score = Integer.parseInt(valTokens[]);

                if (score > maxScore)

                {

                    name = valTokens[];

                    age = valTokens[];

                    maxScore = score;

                }

            }

            text.set(name + "\t" + age + "\t" + maxScore);

            context.write(key, text);

        }

    }

    /*

     *

     * @function Reducer 统计出 不同年龄段、不同性别 的最高分

     * input key=gender value=name+age+score

     * output key=name value=age+gender+score

     *

     */

    static class PCReducer extends Reducer<Text, Text, Text, Text>

    {

        @Override

        public void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException

        {

            int maxScore = Integer.MIN_VALUE;

            String name = " ";

            String age = " ";

            String gender = " ";

            int score = ;

            // 根据key，迭代 values 集合，求出最高分

            for (Text val : values)

                {

                String[] valTokens = val.toString().split("\\t");

                score = Integer.parseInt(valTokens[]);

                if (score > maxScore)

                {

                    name = valTokens[];

                    age = valTokens[];

                    gender = key.toString();

                    maxScore = score;

                }

            }

            context.write(new Text(name), new Text("age- " + age + "\t" + gender + "\tscore-" + maxScore));

        }

    }

    /**

     * @function 任务驱动方法

     * @param args

     * @return

     * @throws Exception

     */

    @Override

    public int run(String[] args) throws Exception

    {

        // TODO Auto-generated method stub

        Configuration conf = new Configuration();//读取配置文件

        Path mypath = new Path(args[]);

        FileSystem hdfs = mypath.getFileSystem(conf);

        if (hdfs.isDirectory(mypath))

        {

            hdfs.delete(mypath, true);

        }

        @SuppressWarnings("deprecation")

        Job job = new Job(conf, "gender");//新建一个任务

        job.setJarByClass(Gender.class);//主类

        job.setMapperClass(PCMapper.class);//Mapper

        job.setReducerClass(PCReducer.class);//Reducer

        job.setPartitionerClass(MyHashPartitioner.class);

        //job.setPartitionerClass(PCPartitioner.class);//设置Partitioner类

        job.setNumReduceTasks();// reduce个数设置为3

        job.setMapOutputKeyClass(Text.class);//map 输出key类型

        job.setMapOutputValueClass(Text.class);//map 输出value类型

        job.setCombinerClass(PCCombiner.class);//设置Combiner类

        job.setOutputKeyClass(Text.class);//输出结果 key类型

        job.setOutputValueClass(Text.class);//输出结果 value 类型

        FileInputFormat.addInputPath(job, new Path(args[]));// 输入路径

        FileOutputFormat.setOutputPath(job, new Path(args[]));// 输出路径

        job.waitForCompletion(true);//提交任务

        return ;

    }

    /**

     * @function main 方法

     * @param args

     * @throws Exception

     */

    public static void main(String[] args) throws Exception

    {

        String[] args0 = {

                "hdfs://master:9000/middle/partition/gender.txt",

                "hdfs://master:9000/middle/partition/out/" };

        int ec = ToolRunner.run(new Configuration(),new Gender(), args0);

        System.exit(ec);

    }

}

Hadoop MapReduce编程 API入门系列之统计学生成绩版本2（十八）的更多相关文章

Hadoop MapReduce编程 API入门系列之压缩和计数器（三十）
不多说,直接上代码. Hadoop MapReduce编程 API入门系列之小文件合并(二十九) 生成的结果,作为输入源. 代码 package zhouls.bigdata.myMapReduce. ...
Hadoop MapReduce编程 API入门系列之挖掘气象数据版本2（十）
下面,是版本1. Hadoop MapReduce编程 API入门系列之挖掘气象数据版本1(一) 这篇博文,包括了,实际生产开发非常重要的,单元测试和调试代码.这里不多赘述,直接送上代码. MRUni ...
Hadoop MapReduce编程 API入门系列之挖掘气象数据版本3（九）
不多说,直接上干货! 下面,是版本1. Hadoop MapReduce编程 API入门系列之挖掘气象数据版本1(一) 下面是版本2. Hadoop MapReduce编程 API入门系列之挖掘气象数 ...
Hadoop MapReduce编程 API入门系列之小文件合并（二十九）
不多说,直接上代码. Hadoop 自身提供了几种机制来解决相关的问题,包括HAR,SequeueFile和CombineFileInputFormat. Hadoop 自身提供的几种小文件合并机制 ...
Hadoop MapReduce编程 API入门系列之薪水统计（三十一）
不多说,直接上代码. 代码 package zhouls.bigdata.myMapReduce.SalaryCount; import java.io.IOException; import jav ...
Hadoop MapReduce编程 API入门系列之MapReduce多种输入格式（十七）
不多说,直接上代码. 代码 package zhouls.bigdata.myMapReduce.ScoreCount; import java.io.DataInput; import java.i ...
Hadoop MapReduce编程 API入门系列之Crime数据分析（二十五）（未完）
不多说,直接上代码. 一共12列,我们只需提取有用的列:第二列(犯罪类型).第四列(一周的哪一天).第五列(具体时间)和第七列(犯罪场所). 思路分析基于项目的需求,我们通过以下几步完成: 1.首先 ...
Hadoop MapReduce编程 API入门系列之计数器（二十七）
不多说,直接上代码. MapReduce 计数器是什么? 计数器是用来记录job的执行进度和状态的.它的作用可以理解为日志.我们可以在程序的某个位置插入计数器,记录数据或者进度的变化情况. Ma ...
Hadoop MapReduce编程 API入门系列之join（二十六）（未完）
不多说,直接上代码. 天气记录数据库 Station ID Timestamp Temperature 气象站数据库 Station ID Station Name 气象站和天气记录合并之后的示意图如 ...

随机推荐

POJ 1000
#include <iostream> int main() { using std::cin; using std::cout; using std::endl; int a,b; ci ...
Altova MapForce AMS/ACI/ISF自定义模板
目前为止,我在百度上得到关于MapForce的信息少之又少,所以把自己的一些经验写下来,与大家分享. 如果要生成xml的话,就可以直接创建xml架构当作数据的目标文件. 以下是我做的AMS&A ...
Cache-Control官方文档
https://tools.ietf.org/html/draft-ietf-httpbis-p6-cache-25#page-21 5.2. Cache-Control The "Cach ...
C#—接口和抽象类的区别？
一.接口接口是指对协定进行定义的引用类型,其他类型实现接口,以保证它们支持某些操作.接口指定必须由类提供的成员或实现它的其他接口.与类相似,接口可以包含方法.属性.索引器和事件作为成员. 1.接口存 ...
LeetCode SQL题目（第一弹）
LeetCode SQL题目注意:Leetcode上的SQL编程题都提供了数据表的架构程序,只需要将它贴入本地数据库即可调试自己编写的程序不管是MS-SQL Server还是MySQL都需要登陆才 ...
[luogu 1092] 虫食算 (暴力搜索剪枝)
传送门 Description Input 包含四行. 第一行有一个正整数 (N≤26). 后面的三行,每行有一个由大写字母组成的字符串,分别代表两个加数以及和.这3个字符串左右两端都没有空格,从高位 ...
Linux思维导图之sed、实战习题
命令解释: ◆sed 2p /etc/passwd第二行打印了两次其余一次 ◆sed-n '2p' /etc/passwd 只打印出第二行 ◆sed-n 1,4p' /etc/passwd 只打印出1 ...
ip代理池学习
代理的作用网上有许多售卖代理的网站,也有免费的,不过其功效性会能影响.通过代理网站,我们可以向访问的目标访问器隐藏自己的真实ip,避免ip地址以访问频率过高等原因被封. 步骤 1.搜集一个免费的代理 ...
Jquery-自定义表单验证
jQuery自定义表单验证
DBMS_SPACE包的使用
最近有朋友问到了DBMS_SPACE包的使用,也看了一下,大部分是关于dbms_space.space_usage的使用,space_usage这个过程的例子已经很多了,我也就不再多说了,除了这个过程 ...

Hadoop MapReduce编程 API入门系列之统计学生成绩版本2（十八）

Hadoop MapReduce编程 API入门系列之统计学生成绩版本2（十八）的更多相关文章

随机推荐

热门专题