MapReduce编程系列 — 5:单表关联
1、项目名称:
Tom Lucy
Tom Jack
Jone Lucy
Jone Jack
Lucy Mary
Lucy Ben
Jack Alice
Jack Jesse
Terry Alice
Terry Jesse
Philip Terry
Philip Alima
Mark Terry
Mark Alma
package com.stjoin; import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser; public class STjoin {
public static int time = 0;
//map将输入分割成child和parent,然后正序输出一次作为右表,
//反序输出一次作为左表,需要注意的是在输出的value中必须加上左右表区别标志
public static class Map extends Mapper<Object, Text, Text, Text>{
public void map(Object key,Text value,Context context)throws IOException,InterruptedException{
String childname = new String();
String parentname = new String();
String relationtype = new String();
String line = value.toString();
System.out.println("mapper...............");
int i = 0;
while(line.charAt(i) != ' '){
i++;
}
String[] values = {line.substring(0, i),line.substring(i+1)};
System.out.println("child:"+values[0]+" parent:"+values[1]);
if(values[0].compareTo("child") != 0){//如果是child,则为0,否则为-1
childname=values[0];
parentname=values[1];
//左表
relationtype="1";
context.write(new Text(values[1]),new Text(relationtype+"+"+childname+"+"+parentname));
System.out.println("key:"+values[1]+" value: "+relationtype+"+"+childname+"+"+parentname);
//右表
relationtype = "2";
context.write(new Text(values[0]), new Text(relationtype+"+"+childname+"+"+parentname));
System.out.println("key:"+values[0]+" value: "+relationtype+"+"+childname+"+"+parentname);
}
}
}
public static class Reduce extends Reducer<Text, Text, Text, Text>{
public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException{
System.out.println("reduce.....................");
System.out.println("key:"+key+" values:"+values);
//输出表头
if(time==0){
context.write(new Text("grandchild"), new Text("grandparent"));
time++;
}
int grandchildnum = 0;
String grandchild[] = new String[10];
int grandparentnum = 0;
String grandparent[] = new String[10]; Iterator ite = values.iterator();
while(ite.hasNext()){
String record = ite.next().toString();
System.out.println("record: "+record); int len = record.length();
int i = 2;
if(len==0) continue;
char relationtype = record.charAt(0);
String childname = new String();
String parentname = new String();
//获取value-list中的value的child
while(record.charAt(i)!='+'){
childname = childname + record.charAt(i);
i++;
}
System.out.println("childname: "+childname);
i=i+1;
//获取value-list中的value的parent
while(i<len){
parentname=parentname+record.charAt(i);
i++;
}
System.out.println("parentname: "+parentname);
//左表,取出child放入grandchild数组中
if (relationtype=='1') {
grandchild[grandchildnum] = childname;
grandchildnum++;
}
//右表,取出child放入grandparent数组中
else{
grandparent[grandparentnum]=parentname;
grandparentnum++;
}
}
//grandchild和grandparent数组求笛卡儿积
if(grandparentnum!=0&&grandchildnum!=0){
for(int m = 0 ; m < grandchildnum ; m++){
for(int n = 0 ; n < grandparentnum; n++){
context.write(new Text(grandchild[m]), new Text(grandparent[n]));
System.out.println("grandchild: "+grandchild[m]+" grandparent: "+grandparent[n]);
}
}
}
}
} public static void main(String [] args)throws Exception{
Configuration conf = new Configuration();
String otherArgs[] = new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length != 2){
System.err.println("Usage: sort<in><out>");
System.exit(2);
}
Job job = new Job(conf,"single table join");
job.setJarByClass(STjoin.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job,new Path(otherArgs[1])); System.exit(job.waitForCompletion(true)? 0 : 1);
}
}
版本2(简化版):
package com.stjoin; import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser; public class STjoin {
public static int time = 0; public static class Map extends Mapper<Object, Text, Text, Text>{
public void map(Object key,Text value,Context context)throws IOException,InterruptedException{
String relationtype = new String();
String line = value.toString();
System.out.println("mapper...............");
int i = 0;
//遍历方法一:一个字符一个字符对照确定分割点
/* while(line.charAt(i) != ' '){
i++;
}
String[] values = {line.substring(0, i),line.substring(i+1)};
*/
//遍历方法二:使用迭代器取出child和parent
String[] values = new String[10];
StringTokenizer itr = new StringTokenizer(line);
while(itr.hasMoreTokens()){
values[i] = itr.nextToken();
i = i+1;
} System.out.println("child:"+values[0]+" parent:"+values[1]);
if(values[0].compareTo("child") != 0){//如果是child,则为0,否则为-1 relationtype="1";
context.write(new Text(values[1]),new Text(relationtype+"+"+values[0]));
System.out.println("key:"+values[1]+" value: "+relationtype+"+"+values[0]);
relationtype = "2";
context.write(new Text(values[0]), new Text(relationtype+"+"+values[1]));
System.out.println("key:"+values[0]+" value: "+relationtype+"+"+values[1]);
}
}
} public static class Reduce extends Reducer<Text, Text, Text, Text>{
public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException{
System.out.println("reduce.....................");
System.out.println("key:"+key+" values:"+values);
if(time==0){
context.write(new Text("grandchild"), new Text("grandparent"));
time++;
}
int grandchildnum = 0;
String grandchild[] = new String[10];
int grandparentnum = 0;
String grandparent[] = new String[10]; String name = new String();
//遍历方法一:用迭代器
// Iterator ite = values.iterator();
// while(ite.hasNext()){ //遍历方法二:用for循环
for(Text val : values){
// String record = ite.next().toString();
String record = val.toString();
System.out.println("record: "+record); int i = 2;
char relationtype = record.charAt(0);
name = record.substring(i); System.out.println("name: "+name); if (relationtype=='1') {
grandchild[grandchildnum] = name;
grandchildnum++;
}
else{
grandparent[grandparentnum]=name;
grandparentnum++;
}
}
//遍历方法三:就是详细方法的charAt(),一个一个字符遍历
if(grandparentnum!=0&&grandchildnum!=0){
for(int m = 0 ; m < grandchildnum ; m++){
for(int n = 0 ; n < grandparentnum; n++){
context.write(new Text(grandchild[m]), new Text(grandparent[n]));
System.out.println("grandchild: "+grandchild[m]+" grandparent: "+grandparent[n]);
}
}
}
}
}
public static void main(String [] args)throws Exception{
Configuration conf = new Configuration();
String otherArgs[] = new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length != 2){
System.err.println("Usage: sort<in><out>");
System.exit(2);
}
Job job = new Job(conf,"single table join");
job.setJarByClass(STjoin.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job,new Path(otherArgs[1])); System.exit(job.waitForCompletion(true)? 0 : 1);
}
}
14/09/22 20:31:48 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
14/09/22 20:31:48 INFO input.FileInputFormat: Total input paths to process : 1
14/09/22 20:31:48 WARN snappy.LoadSnappy: Snappy native library not loaded
14/09/22 20:31:48 INFO mapred.JobClient: Running job: job_local_0001
14/09/22 20:31:48 INFO util.ProcessTree: setsid exited with exit code 0
14/09/22 20:31:48 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@1a430c37
14/09/22 20:31:48 INFO mapred.MapTask: io.sort.mb = 100
14/09/22 20:31:48 INFO mapred.MapTask: data buffer = 79691776/99614720
14/09/22 20:31:48 INFO mapred.MapTask: record buffer = 262144/327680
mapper...............
child:child parent:parent
mapper...............
child:Tom parent:Lucy
key:Lucy value: 1+Tom+Lucy
key:Tom value: 2+Tom+Lucy
mapper...............
child:Tom parent:Jack
key:Jack value: 1+Tom+Jack
key:Tom value: 2+Tom+Jack
mapper...............
child:Jone parent:Lucy
key:Lucy value: 1+Jone+Lucy
key:Jone value: 2+Jone+Lucy
mapper...............
child:Jone parent:Jack
key:Jack value: 1+Jone+Jack
key:Jone value: 2+Jone+Jack
mapper...............
child:Lucy parent:Mary
key:Mary value: 1+Lucy+Mary
key:Lucy value: 2+Lucy+Mary
mapper...............
child:Lucy parent:Ben
key:Ben value: 1+Lucy+Ben
key:Lucy value: 2+Lucy+Ben
mapper...............
child:Jack parent:Alice
key:Alice value: 1+Jack+Alice
14/09/22 20:31:49 INFO mapred.MapTask: Starting flush of map output
key:Jack value: 2+Jack+Alice
mapper...............
child:Jack parent:Jesse
key:Jesse value: 1+Jack+Jesse
key:Jack value: 2+Jack+Jesse
mapper...............
child:Terry parent:Alice
key:Alice value: 1+Terry+Alice
key:Terry value: 2+Terry+Alice
mapper...............
child:Terry parent:Jesse
key:Jesse value: 1+Terry+Jesse
key:Terry value: 2+Terry+Jesse
mapper...............
child:Philip parent:Terry
key:Terry value: 1+Philip+Terry
key:Philip value: 2+Philip+Terry
mapper...............
child:Philip parent:Alima
key:Alima value: 1+Philip+Alima
key:Philip value: 2+Philip+Alima
mapper...............
child:Mark parent:Terry
key:Terry value: 1+Mark+Terry
key:Mark value: 2+Mark+Terry
mapper...............
child:Mark parent:Alma
key:Alma value: 1+Mark+Alma
key:Mark value: 2+Mark+Alma
14/09/22 20:31:49 INFO mapred.MapTask: Finished spill 0
14/09/22 20:31:49 INFO mapred.Task: Task:attempt_local_0001_m_000000_0 is done. And is in the process of commiting
14/09/22 20:31:49 INFO mapred.JobClient: map 0% reduce 0%
14/09/22 20:31:51 INFO mapred.LocalJobRunner:
14/09/22 20:31:51 INFO mapred.Task: Task 'attempt_local_0001_m_000000_0' done.
14/09/22 20:31:51 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@5c448d98
14/09/22 20:31:51 INFO mapred.LocalJobRunner:
14/09/22 20:31:51 INFO mapred.Merger: Merging 1 sorted segments
14/09/22 20:31:51 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 564 bytes
14/09/22 20:31:51 INFO mapred.LocalJobRunner:
reduce.....................
key:Alice values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Jack+Alice
childname: Jack
parentname: Alice
record: 1+Terry+Alice
childname: Terry
parentname: Alice
reduce.....................
key:Alima values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Philip+Alima
childname: Philip
parentname: Alima
reduce.....................
key:Alma values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Mark+Alma
childname: Mark
parentname: Alma
reduce.....................
key:Ben values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Lucy+Ben
childname: Lucy
parentname: Ben
reduce.....................
key:Jack values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 2+Jack+Alice
childname: Jack
parentname: Alice
record: 2+Jack+Jesse
childname: Jack
parentname: Jesse
record: 1+Tom+Jack
childname: Tom
parentname: Jack
record: 1+Jone+Jack
childname: Jone
parentname: Jack
grandchild: Tom grandparent: Alice
grandchild: Tom grandparent: Jesse
grandchild: Jone grandparent: Alice
grandchild: Jone grandparent: Jesse
reduce.....................
key:Jesse values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Jack+Jesse
childname: Jack
parentname: Jesse
record: 1+Terry+Jesse
childname: Terry
parentname: Jesse
reduce.....................
key:Jone values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 2+Jone+Lucy
childname: Jone
parentname: Lucy
record: 2+Jone+Jack
childname: Jone
parentname: Jack
reduce.....................
key:Lucy values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Tom+Lucy
childname: Tom
parentname: Lucy
record: 1+Jone+Lucy
childname: Jone
parentname: Lucy
record: 2+Lucy+Mary
childname: Lucy
parentname: Mary
record: 2+Lucy+Ben
childname: Lucy
parentname: Ben
grandchild: Tom grandparent: Mary
grandchild: Tom grandparent: Ben
grandchild: Jone grandparent: Mary
grandchild: Jone grandparent: Ben
reduce.....................
key:Mark values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 2+Mark+Terry
childname: Mark
parentname: Terry
record: 2+Mark+Alma
childname: Mark
parentname: Alma
reduce.....................
key:Mary values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Lucy+Mary
childname: Lucy
parentname: Mary
reduce.....................
key:Philip values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 2+Philip+Terry
childname: Philip
parentname: Terry
record: 2+Philip+Alima
childname: Philip
parentname: Alima
reduce.....................
key:Terry values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 2+Terry+Alice
childname: Terry
parentname: Alice
record: 2+Terry+Jesse
childname: Terry
parentname: Jesse
record: 1+Philip+Terry
childname: Philip
parentname: Terry
record: 1+Mark+Terry
childname: Mark
parentname: Terry
grandchild: Philip grandparent: Alice
grandchild: Philip grandparent: Jesse
grandchild: Mark grandparent: Alice
grandchild: Mark grandparent: Jesse
reduce.....................
key:Tom values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 2+Tom+Jack
childname: Tom
parentname: Jack
record: 2+Tom+Lucy
childname: Tom
parentname: Lucy
14/09/22 20:31:52 INFO mapred.Task: Task:attempt_local_0001_r_000000_0 is done. And is in the process of commiting
14/09/22 20:31:52 INFO mapred.LocalJobRunner:
14/09/22 20:31:52 INFO mapred.Task: Task attempt_local_0001_r_000000_0 is allowed to commit now
14/09/22 20:31:52 INFO output.FileOutputCommitter: Saved output of task 'attempt_local_0001_r_000000_0' to hdfs://localhost:9000/user/hadoop/stjoin_output07
14/09/22 20:31:52 INFO mapred.JobClient: map 100% reduce 0%
14/09/22 20:31:54 INFO mapred.LocalJobRunner: reduce > reduce
14/09/22 20:31:54 INFO mapred.Task: Task 'attempt_local_0001_r_000000_0' done.
14/09/22 20:31:55 INFO mapred.JobClient: map 100% reduce 100%
14/09/22 20:31:55 INFO mapred.JobClient: Job complete: job_local_0001
14/09/22 20:31:55 INFO mapred.JobClient: Counters: 22
14/09/22 20:31:55 INFO mapred.JobClient: Map-Reduce Framework
14/09/22 20:31:55 INFO mapred.JobClient: Spilled Records=56
14/09/22 20:31:55 INFO mapred.JobClient: Map output materialized bytes=568
14/09/22 20:31:55 INFO mapred.JobClient: Reduce input records=28
14/09/22 20:31:55 INFO mapred.JobClient: Virtual memory (bytes) snapshot=0
14/09/22 20:31:55 INFO mapred.JobClient: Map input records=15
14/09/22 20:31:55 INFO mapred.JobClient: SPLIT_RAW_BYTES=117
14/09/22 20:31:55 INFO mapred.JobClient: Map output bytes=506
14/09/22 20:31:55 INFO mapred.JobClient: Reduce shuffle bytes=0
14/09/22 20:31:55 INFO mapred.JobClient: Physical memory (bytes) snapshot=0
14/09/22 20:31:55 INFO mapred.JobClient: Reduce input groups=13
14/09/22 20:31:55 INFO mapred.JobClient: Combine output records=0
14/09/22 20:31:55 INFO mapred.JobClient: Reduce output records=13
14/09/22 20:31:55 INFO mapred.JobClient: Map output records=28
14/09/22 20:31:55 INFO mapred.JobClient: Combine input records=0
14/09/22 20:31:55 INFO mapred.JobClient: CPU time spent (ms)=0
14/09/22 20:31:55 INFO mapred.JobClient: Total committed heap usage (bytes)=408420352
14/09/22 20:31:55 INFO mapred.JobClient: File Input Format Counters
14/09/22 20:31:55 INFO mapred.JobClient: Bytes Read=163
14/09/22 20:31:55 INFO mapred.JobClient: FileSystemCounters
14/09/22 20:31:55 INFO mapred.JobClient: HDFS_BYTES_READ=326
14/09/22 20:31:55 INFO mapred.JobClient: FILE_BYTES_WRITTEN=81802
14/09/22 20:31:55 INFO mapred.JobClient: FILE_BYTES_READ=912
14/09/22 20:31:55 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=149
14/09/22 20:31:55 INFO mapred.JobClient: File Output Format Counters
14/09/22 20:31:55 INFO mapred.JobClient: Bytes Written=149
14/09/22 20:26:02 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
14/09/22 20:26:02 INFO input.FileInputFormat: Total input paths to process : 1
14/09/22 20:26:02 WARN snappy.LoadSnappy: Snappy native library not loaded
14/09/22 20:26:03 INFO mapred.JobClient: Running job: job_local_0001
14/09/22 20:26:03 INFO util.ProcessTree: setsid exited with exit code 0
14/09/22 20:26:03 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@3b8c40d0
14/09/22 20:26:03 INFO mapred.MapTask: io.sort.mb = 100
14/09/22 20:26:03 INFO mapred.MapTask: data buffer = 79691776/99614720
14/09/22 20:26:03 INFO mapred.MapTask: record buffer = 262144/327680
mapper...............
child:child parent:parent
mapper...............
child:Tom parent:Lucy
key:Lucy value: 1+Tom
key:Tom value: 2+Lucy
mapper...............
child:Tom parent:Jack
key:Jack value: 1+Tom
key:Tom value: 2+Jack
mapper...............
child:Jone parent:Lucy
key:Lucy value: 1+Jone
key:Jone value: 2+Lucy
mapper...............
child:Jone parent:Jack
key:Jack value: 1+Jone
key:Jone value: 2+Jack
mapper...............
child:Lucy parent:Mary
key:Mary value: 1+Lucy
key:Lucy value: 2+Mary
mapper...............
child:Lucy parent:Ben
key:Ben value: 1+Lucy
key:Lucy value: 2+Ben
mapper...............
child:Jack parent:Alice
key:Alice value: 1+Jack
key:Jack value: 2+Alice
mapper...............
child:Jack parent:Jesse
key:Jesse value: 1+Jack
key:Jack value: 2+Jesse
mapper...............
child:Terry parent:Alice
key:Alice value: 1+Terry
key:Terry value: 2+Alice
mapper...............
child:Terry parent:Jesse
key:Jesse value: 1+Terry
key:Terry value: 2+Jesse
mapper...............
child:Philip parent:Terry
key:Terry value: 1+Philip
key:Philip value: 2+Terry
mapper...............
child:Philip parent:Alima
key:Alima value: 1+Philip
key:Philip value: 2+Alima
mapper...............
child:Mark parent:Terry
key:Terry value: 1+Mark
key:Mark value: 2+Terry
mapper...............
child:Mark parent:Alma
key:Alma value: 1+Mark
key:Mark value: 2+Alma
14/09/22 20:26:03 INFO mapred.MapTask: Starting flush of map output
14/09/22 20:26:03 INFO mapred.MapTask: Finished spill 0
14/09/22 20:26:03 INFO mapred.Task: Task:attempt_local_0001_m_000000_0 is done. And is in the process of commiting
14/09/22 20:26:04 INFO mapred.JobClient: map 0% reduce 0%
14/09/22 20:26:06 INFO mapred.LocalJobRunner:
14/09/22 20:26:06 INFO mapred.Task: Task 'attempt_local_0001_m_000000_0' done.
14/09/22 20:26:06 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@4eba27a5
14/09/22 20:26:06 INFO mapred.LocalJobRunner:
14/09/22 20:26:06 INFO mapred.Merger: Merging 1 sorted segments
14/09/22 20:26:06 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 414 bytes
14/09/22 20:26:06 INFO mapred.LocalJobRunner:
reduce.....................
key:Alice values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Jack
name: Jack
record: 1+Terry
name: Terry
reduce.....................
key:Alima values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Philip
name: Philip
reduce.....................
key:Alma values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Mark
name: Mark
reduce.....................
key:Ben values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Lucy
name: Lucy
reduce.....................
key:Jack values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 2+Alice
name: Alice
record: 2+Jesse
name: Jesse
record: 1+Tom
name: Tom
record: 1+Jone
name: Jone
grandchild: Tom grandparent: Alice
grandchild: Tom grandparent: Jesse
grandchild: Jone grandparent: Alice
grandchild: Jone grandparent: Jesse
reduce.....................
key:Jesse values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Jack
name: Jack
record: 1+Terry
name: Terry
reduce.....................
key:Jone values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 2+Lucy
name: Lucy
record: 2+Jack
name: Jack
reduce.....................
key:Lucy values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Tom
name: Tom
record: 1+Jone
name: Jone
record: 2+Mary
name: Mary
record: 2+Ben
name: Ben
grandchild: Tom grandparent: Mary
grandchild: Tom grandparent: Ben
grandchild: Jone grandparent: Mary
grandchild: Jone grandparent: Ben
reduce.....................
key:Mark values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 2+Terry
name: Terry
record: 2+Alma
name: Alma
reduce.....................
key:Mary values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Lucy
name: Lucy
reduce.....................
key:Philip values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 2+Terry
name: Terry
record: 2+Alima
name: Alima
reduce.....................
key:Terry values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 2+Alice
name: Alice
record: 2+Jesse
name: Jesse
record: 1+Philip
name: Philip
record: 1+Mark
name: Mark
grandchild: Philip grandparent: Alice
grandchild: Philip grandparent: Jesse
grandchild: Mark grandparent: Alice
grandchild: Mark grandparent: Jesse
reduce.....................
key:Tom values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 2+Jack
name: Jack
record: 2+Lucy
name: Lucy
14/09/22 20:26:06 INFO mapred.Task: Task:attempt_local_0001_r_000000_0 is done. And is in the process of commiting
14/09/22 20:26:06 INFO mapred.LocalJobRunner:
14/09/22 20:26:06 INFO mapred.Task: Task attempt_local_0001_r_000000_0 is allowed to commit now
14/09/22 20:26:06 INFO output.FileOutputCommitter: Saved output of task 'attempt_local_0001_r_000000_0' to hdfs://localhost:9000/user/hadoop/stjoin_output06
14/09/22 20:26:07 INFO mapred.JobClient: map 100% reduce 0%
14/09/22 20:26:09 INFO mapred.LocalJobRunner: reduce > reduce
14/09/22 20:26:09 INFO mapred.Task: Task 'attempt_local_0001_r_000000_0' done.
14/09/22 20:26:10 INFO mapred.JobClient: map 100% reduce 100%
14/09/22 20:26:10 INFO mapred.JobClient: Job complete: job_local_0001
14/09/22 20:26:10 INFO mapred.JobClient: Counters: 22
14/09/22 20:26:10 INFO mapred.JobClient: Map-Reduce Framework
14/09/22 20:26:10 INFO mapred.JobClient: Spilled Records=56
14/09/22 20:26:10 INFO mapred.JobClient: Map output materialized bytes=418
14/09/22 20:26:10 INFO mapred.JobClient: Reduce input records=28
14/09/22 20:26:10 INFO mapred.JobClient: Virtual memory (bytes) snapshot=0
14/09/22 20:26:10 INFO mapred.JobClient: Map input records=15
14/09/22 20:26:10 INFO mapred.JobClient: SPLIT_RAW_BYTES=117
14/09/22 20:26:10 INFO mapred.JobClient: Map output bytes=356
14/09/22 20:26:10 INFO mapred.JobClient: Reduce shuffle bytes=0
14/09/22 20:26:10 INFO mapred.JobClient: Physical memory (bytes) snapshot=0
14/09/22 20:26:10 INFO mapred.JobClient: Reduce input groups=13
14/09/22 20:26:10 INFO mapred.JobClient: Combine output records=0
14/09/22 20:26:10 INFO mapred.JobClient: Reduce output records=13
14/09/22 20:26:10 INFO mapred.JobClient: Map output records=28
14/09/22 20:26:10 INFO mapred.JobClient: Combine input records=0
14/09/22 20:26:10 INFO mapred.JobClient: CPU time spent (ms)=0
14/09/22 20:26:10 INFO mapred.JobClient: Total committed heap usage (bytes)=406847488
14/09/22 20:26:10 INFO mapred.JobClient: File Input Format Counters
14/09/22 20:26:10 INFO mapred.JobClient: Bytes Read=163
14/09/22 20:26:10 INFO mapred.JobClient: FileSystemCounters
14/09/22 20:26:10 INFO mapred.JobClient: HDFS_BYTES_READ=326
14/09/22 20:26:10 INFO mapred.JobClient: FILE_BYTES_WRITTEN=81502
14/09/22 20:26:10 INFO mapred.JobClient: FILE_BYTES_READ=762
14/09/22 20:26:10 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=149
14/09/22 20:26:10 INFO mapred.JobClient: File Output Format Counters
14/09/22 20:26:10 INFO mapred.JobClient: Bytes Written=149
Tom Alice
Tom Jesse
Jone Alice
Jone Jesse
Tom Mary
Tom Ben
Jone Mary
Jone Ben
Philip Alice
Philip Jesse
Mark Alice
Mark Jesse
MapReduce编程系列 — 5:单表关联的更多相关文章
- MapReduce应用案例--单表关联
1. 实例描述 单表关联这个实例要求从给出的数据中寻找出所关心的数据,它是对原始数据所包含信息的挖掘. 实例中给出child-parent 表, 求出grandchild-grandparent表. ...
- Hadoop on Mac with IntelliJ IDEA - 8 单表关联NullPointerException
简化陆喜恒. Hadoop实战(第2版)5.4单表关联的代码时遇到空指向异常,经分析是逻辑问题,在此做个记录. 环境:Mac OS X 10.9.5, IntelliJ IDEA 13.1.5, Ha ...
- Hadoop 单表关联
前面的实例都是在数据上进行一些简单的处理,为进一步的操作打基础.单表关联这个实例要求从给出的数据中寻找到所关心的数据,它是对原始数据所包含信息的挖掘.下面进入这个实例. 1.实例描述 实例中给出chi ...
- MapRedece(单表关联)
源数据:Child--Parent表 Tom Lucy Tom Jack Jone Lucy Jone Jack Lucy Marry Lucy Ben Jack Alice Jack Jesse T ...
- MR案例:单表关联查询
"单表关联"这个实例要求从给出的数据中寻找所关心的数据,它是对原始数据所包含信息的挖掘. 需求:实例中给出 child-parent(孩子—父母)表,要求输出 grandchild ...
- MySQL 性能优化系列之一 单表预处理
MySQL 性能优化系列之一 单表预处理 背景介绍 我们经常在写多表关联的SQL时,会想到 left jion(左关联),right jion(右关联),inner jion(内关联)等. 但是,当表 ...
- MySql系列之单表查询
单表查询的语法 SELECT 字段1,字段2... FROM 表名 WHERE 条件 GROUP BY field HAVING 筛选 ORDER BY field LIMIT 限制条数 关键字的执行 ...
- MapReduce编程系列 — 6:多表关联
1.项目名称: 2.程序代码: 版本一(详细版): package com.mtjoin; import java.io.IOException; import java.util.Iterator; ...
- 【原创】MapReduce编程系列之表连接
问题描述 需要连接的表如下:其中左边是child,右边是parent,我们要做的是找出grandchild和grandparent的对应关系,为此需要进行表的连接. Tom Lucy Tom Jim ...
随机推荐
- java递归方法
一个方法体内调用他自身,称为方法递归. 方法递归是一种隐式的循环,Tahiti重复执行某段代码,但这种重复执行无需循环控制 /* Author:oliver QIN DATE:2015-12-19 D ...
- Linux内核学习方法
Makefile不是Make Love 从前在学校,混了四年,没有学到任何东西,每天就是逃课,上网,玩游戏,睡觉.毕业的时候,人家跟我说Makefile我完全不知,但是一说Make Love我就来劲了 ...
- iOS开发网络篇—大文件的多线程断点下载(转)
http://www.cnblogs.com/wendingding/p/3947550.html iOS开发网络篇—多线程断点下载 说明:本文介绍多线程断点下载.项目中使用了苹果自带的类,实现了 ...
- Cocos2dx中的四种控件及主要用法
1.控件:即控制对象,控制按钮之类的精灵 2.主要介绍四大类控件: CCControlSlider:进度条 CCControlSwitch:开关 CCScale9Sprite:9妹图(用于缩放) CC ...
- 精灵的属性Zorder的设置
1.Zorder是CCSprite从父类CCNode那继承来的protected属性: class CCNode{ protected: int m_nZOrder; ...
- COCOS2DX2.2.2 创建CCEditBox输入框架实现文本及密码输入
本文转载于: http://5.quanpao.com/?p=561 使用CCEditBox需要启用扩展库既extension ,因此需要引入这个空间名 有两种方法, using namespace ...
- KVM通过qemu实现USB重定向
KVM是通过qemu来支持USB设备的,可以在启动的时候就指定需要连接的USB设备,也可以系统启动后动态的添加删除.通过qemu的help可知,使用qemu的usb_add host:xxx:xxx来 ...
- oracle里如何将两个日期的时间差返回**时**分的格式
SELECT EXTRACT(DAY FROM (sysdate-to_date('2012-03-29 00:00:00','YYYY-MM-DD HH24:MI:ss')) DAY TO SECO ...
- 浅析游戏引擎的资源管理机制——扒一扒Unity3D中隐藏在背后的资源管理
游戏中通常有大量资源,如网格.材质.纹理.动画.着色器程序和音乐等,游戏引擎作为做游戏的工具,自然要提供良好的资源管理,让游戏开发者用最简单的方式使用资源.游戏引擎的资源管理包括两大部分:离线资源管理 ...
- Linux环境下的Nodejs
最近在学习Node.js,在window下总是觉得不那么爽快.最简单而且环保的方法是在虚拟机中安装一个Linux. { 1.Linux:家中的Linux为Centos. 2.VirtuallyBox: ...