mapreduce编程练习（二）倒排索引 Combiner的使用以及练习

问题一：请使用利用Combiner的方式：根据图示内容编写maprdeuce程序

示例程序

package com.greate.learn;

import java.io.IOException;

import java.net.URI;

import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.DoubleWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.InputSplit;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

public class GetFile_Statistics extends Configured implements Tool {

	public static class CountMapper extends Mapper<LongWritable, Text, Text, Text>{

		private Text word = new Text();

		private Text one = new Text(1+"");

		@Override

		protected void map(LongWritable key,Text value,Mapper<LongWritable, Text, Text, Text>.Context context)

				throws IOException,InterruptedException{

					System.out.println("line pos:" + key.toString());

					String line = value.toString();

					String fileName = ((FileSplit) context.getInputSplit()).getPath().getName();

					StringTokenizer tokenizer = new StringTokenizer(line);

					while (tokenizer.hasMoreElements()) {

						word.set(tokenizer.nextToken()+" :  "+fileName);

						context.write(word, one);

					}

				}

	}

	public static class Combiner extends Reducer<Text, Text, Text, Text>{

		@Override

		protected void reduce(Text key, Iterable<Text> values,

				Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {

			int sum = 0;

			for(Text v : values){

				sum += Integer.parseInt(v.toString());

			}

			System.out.println("sum:" + sum);

			String[] valueString = key.toString().split(" : ");

			context.write(new Text(valueString[0]), new Text(valueString[1]+":" + sum));

		}

	}

	public static class CountReducer extends Reducer<Text, Text, Text, Text>{

		static String beforeKey = "";

		static String beforeValue ="";

		@Override

		protected void reduce(Text key, Iterable<Text> values,

				Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {

			String key2 = key.toString();

			String value = "";

			for(Text text: values){

				value = text.toString();

				if(key2.equals(beforeKey)){

					beforeKey = key2;

					beforeValue = beforeValue +";"+value;

				}else{

					beforeKey =  key2;

					beforeValue = value;

				}

			}

			context.write(new Text(beforeKey), new Text(beforeValue));

		}

	}

	static FileSystem fs = null;

	static Configuration conf=null;

	public static void init() throws Exception{

		conf = new Configuration();

		conf.set("fs.defaultFS", "hdfs://localhost:9000/");

		 fs = FileSystem.get(new URI("hdfs://localhost:9000/"),conf,"hadoop");

	}

	public int run(String[] args) throws Exception {

		Job job = Job.getInstance(getConf(),"WordCount");

		job.setJarByClass(GetFile_Statistics.class);

		job.setMapperClass(CountMapper.class);

		job.setCombinerClass(Combiner.class);

		job.setReducerClass(CountReducer.class);

		job.setOutputKeyClass(Text.class);

		job.setOutputValueClass(Text.class);

		Path in = new Path("/GetFile_Statistics/input");

		if(fs.exists(in)){

			FileInputFormat.addInputPath(job, in);

		}else{

			System.out.println("文件夹不存在，需要创建！");

		}

		Path os = new Path("/GetFile_Statistics/output");

		int flage = 0;

		if(fs.exists(os)){

			System.out.println("文件夹存在！不再创建！");

			 fs.delete(os, true);

			 FileOutputFormat.setOutputPath(job, os);

			 flage = job.waitForCompletion(false) ? 0:1;

		}else{

			FileOutputFormat.setOutputPath(job, os);

			flage = job.waitForCompletion(false) ? 0:1;

		}

		return  flage;

	}

	public static void main(String[] args) throws Exception {

		init();

		int res = ToolRunner.run(new GetFile_Statistics(), args);

		System.exit(res);

	}

}

问题二：现有一批电话通信清单，记录了用户A拨打某些特殊号码（如120，10086,13800138000等）的记录。需要做一个统计结果，记录拨打给用户B的所有用户A。

示例程序

package com.greate.learn;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

public class PhoneNumber_Statistic extends Configured implements Tool{

	public static void main (String[] args) throws Exception{

			ToolRunner.run(new PhoneNumber_Statistic(), args);

	}

	public int run(String[] arg0) throws Exception{

		Configuration conf = getConf();

	    Job job = new Job(conf);

		job.setJarByClass(getClass());

		FileSystem fs = FileSystem.get(conf);

		FileInputFormat.setInputPaths(job, new Path("/PhoneNumber_Statistics/input/"));

		FileOutputFormat.setOutputPath(job, new Path("/PhoneNumber_Statistics/output/"));

		job.setOutputKeyClass(Text.class);

		job.setOutputValueClass(Text.class);

		job.setMapperClass(numberMap.class);

		job.setReducerClass(numberReduce.class);

		job.waitForCompletion(true);

		return 0;

	}

}

class numberMap extends Mapper<LongWritable, Text, Text, Text>{

		protected void map(LongWritable key, Text value, Context context)

throws IOException,InterruptedException{

			String[] list = value.toString().split(" ");

			String keyy = list[1];

			String valuee = list[0];

			context.write(new Text(keyy), new Text(valuee));

		}

}

class numberReduce extends Reducer<Text, Text, Text, Text>{																					//��������

	protected void reduce(Text key, Iterable<Text> values, Context context)

		throws IOException,InterruptedException{

		String valuee;

        String out = "";

        for(Text value:values){

        	valuee  = value.toString() + " | ";

        	out +=valuee;

        }

        context.write(key,new Text(out));

	}

}

mapreduce编程练习（二）倒排索引 Combiner的使用以及练习的更多相关文章

hadoop2.2编程：mapreduce编程之二次排序
mr自带的例子中的源码SecondarySort,我重新写了一下,基本没变. 这个例子中定义的map和reduce如下,关键是它对输入输出类型的定义:(java泛型编程) public static ...
Hadoop MapReduce编程 API入门系列之倒排索引（二十四）
不多说,直接上代码. 2016-12-12 21:54:04,509 INFO [org.apache.hadoop.metrics.jvm.JvmMetrics] - Initializing JV ...
《Data-Intensive Text Processing with mapReduce》读书笔记之二：mapreduce编程、框架及运行
搜狐视频的屌丝男士第二季大结局了,惊现波多野老师,怀揣着无比鸡冻的心情啊,可惜随着剧情的推进发展,并没有出现期待中的屌丝奇遇,大鹏还是没敢冲破尺度的界线.想百度些种子吧,又不想让电脑留下污点证据,要知 ...
三、MapReduce编程实例
前文一.CentOS7 hadoop3.3.1安装(单机分布式.伪分布式.分布式二.JAVA API实现HDFS MapReduce编程实例 @ 目录前文 MapReduce编程实例前言注意 ...
Hadoop MapReduce编程学习
一直在搞spark,也没时间弄hadoop,不过Hadoop基本的编程我觉得我还是要会吧,看到一篇不错的文章,不过应该应用于hadoop2.0以前,因为代码中有 conf.set("map ...
hadoop2.2编程：使用MapReduce编程实例（转）
原文链接:http://www.cnblogs.com/xia520pi/archive/2012/06/04/2534533.html 从网上搜到的一篇hadoop的编程实例,对于初学者真是帮助太大 ...
MapReduce编程实例4
MapReduce编程实例: MapReduce编程实例(一),详细介绍在集成环境中运行第一个MapReduce程序 WordCount及代码分析 MapReduce编程实例(二),计算学生平均成绩 ...
批处理引擎MapReduce编程模型
批处理引擎MapReduce编程模型作者:尹正杰版权声明:原创作品,谢绝转载!否则将追究法律责任. MapReduce是一个经典的分布式批处理计算引擎,被广泛应用于搜索引擎索引构建,大规模数据处理 ...
大数据笔记（十）——Shuffle与MapReduce编程案例（A）
一.什么是Shuffle yarn-site.xml文件配置的时候有这个参数:yarn.nodemanage.aux-services:mapreduce_shuffle 因为mapreduce程序运 ...
Hadoop MapReduce编程 API入门系列之压缩和计数器（三十）
不多说,直接上代码. Hadoop MapReduce编程 API入门系列之小文件合并(二十九) 生成的结果,作为输入源. 代码 package zhouls.bigdata.myMapReduce. ...

随机推荐

架构设计哲学【三种方式：支持DevOps的原则】
三种方式:支持DevOps的原则 2012年8月22日作者Gene Kim 45条评论这篇文章是杨波老师分享的一篇文章:这几年对他架构影响最深的一篇文章.主要描述是关于DevOps的,但对系统架构同 ...
canvas--总结二
canvas图形绘制矩形绘制 rect(x,y,w,h) 没有独立路径 strokeRect(x,y,w,h) 有独立路径,不影响别的绘制 fillRect(x,y,w,h) 有独立路径,不影响别 ...
Spring(3) --事务,隔离级别,设计模式
什么是事务?事物的四大特性? 事务是指单个逻辑工作单元执行的一系列操作(ACID),这些操作要么全部执行,要么全部不执行,是不可中断的. (1)原子性(Atomicity)是指事务所有操作是不可中断的 ...
剑指offer 面试题10.2：青蛙变态跳台阶
题目描述一只青蛙一次可以跳上1级台阶,也可以跳上2级--它也可以跳上n级.求该青蛙跳上一个n级的台阶总共有多少种跳法. 编程思想因为n级台阶,第一步有n种跳法:跳1级.跳2级.到跳n级跳1级,剩下 ...
CAN总线采样点测试
采样点是什么? 采样点是接受节点判断信号逻辑的位置,CAN通讯属于异步通讯.需要通过不断的重新同步才能保证收发节点的采样准确. 若采样点太靠前,则因为线缆原因,DUT外发报文尚未稳定,容易发生采样错误 ...
docker logs 查看容器日志操作
查看日志官方文档:https://docs.docker.com/engine/reference/commandline/logs/ # 查看指定数量的实时日志 # docker logs -tf ...
为啥使用innodb_flush_method=o_direct 就能减轻io压力呢
为啥使用innodb_flush_method=o_direct 就能减轻io压力呢
Sentinel上下文创建及执行
Sentinel上下文创建及执行,入口示例代码: public static void fun() { Entry entry = null; try { entry = SphU.entry(SOU ...
【老孟Flutter】如何提高Flutter应用程序的性能
首先 Flutter 是一个非常高性能的框架,因此大多时候不需要开发者做出特殊的处理,只需要避免常见的性能问题即可获得高性能的应用程序. 重建最小化原则在调用 setState() 方法重建组件时, ...
BAPI_PO_CHANGE
这两天用BAPI更改采购订单,遇到了一些问题,最后调试解决了.记录如下吧.要修改的是采购订单的物料号和批次,在网上看到其它人写过关于 BAPI_PO_CHANGE的用法,但是具体问题还要具体分析啊. ...

mapreduce编程练习（二）倒排索引 Combiner的使用以及练习

mapreduce编程练习（二）倒排索引 Combiner的使用以及练习的更多相关文章

随机推荐

热门专题