MapReduce编程系列 — 5:单表关联
1、项目名称:

Tom Lucy
Tom Jack
Jone Lucy
Jone Jack
Lucy Mary
Lucy Ben
Jack Alice
Jack Jesse
Terry Alice
Terry Jesse
Philip Terry
Philip Alima
Mark Terry
Mark Alma
package com.stjoin; import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser; public class STjoin {
public static int time = 0;
//map将输入分割成child和parent,然后正序输出一次作为右表,
//反序输出一次作为左表,需要注意的是在输出的value中必须加上左右表区别标志
public static class Map extends Mapper<Object, Text, Text, Text>{
public void map(Object key,Text value,Context context)throws IOException,InterruptedException{
String childname = new String();
String parentname = new String();
String relationtype = new String();
String line = value.toString();
System.out.println("mapper...............");
int i = 0;
while(line.charAt(i) != ' '){
i++;
}
String[] values = {line.substring(0, i),line.substring(i+1)};
System.out.println("child:"+values[0]+" parent:"+values[1]);
if(values[0].compareTo("child") != 0){//如果是child,则为0,否则为-1
childname=values[0];
parentname=values[1];
//左表
relationtype="1";
context.write(new Text(values[1]),new Text(relationtype+"+"+childname+"+"+parentname));
System.out.println("key:"+values[1]+" value: "+relationtype+"+"+childname+"+"+parentname);
//右表
relationtype = "2";
context.write(new Text(values[0]), new Text(relationtype+"+"+childname+"+"+parentname));
System.out.println("key:"+values[0]+" value: "+relationtype+"+"+childname+"+"+parentname);
}
}
}
public static class Reduce extends Reducer<Text, Text, Text, Text>{
public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException{
System.out.println("reduce.....................");
System.out.println("key:"+key+" values:"+values);
//输出表头
if(time==0){
context.write(new Text("grandchild"), new Text("grandparent"));
time++;
}
int grandchildnum = 0;
String grandchild[] = new String[10];
int grandparentnum = 0;
String grandparent[] = new String[10]; Iterator ite = values.iterator();
while(ite.hasNext()){
String record = ite.next().toString();
System.out.println("record: "+record); int len = record.length();
int i = 2;
if(len==0) continue;
char relationtype = record.charAt(0);
String childname = new String();
String parentname = new String();
//获取value-list中的value的child
while(record.charAt(i)!='+'){
childname = childname + record.charAt(i);
i++;
}
System.out.println("childname: "+childname);
i=i+1;
//获取value-list中的value的parent
while(i<len){
parentname=parentname+record.charAt(i);
i++;
}
System.out.println("parentname: "+parentname);
//左表,取出child放入grandchild数组中
if (relationtype=='1') {
grandchild[grandchildnum] = childname;
grandchildnum++;
}
//右表,取出child放入grandparent数组中
else{
grandparent[grandparentnum]=parentname;
grandparentnum++;
}
}
//grandchild和grandparent数组求笛卡儿积
if(grandparentnum!=0&&grandchildnum!=0){
for(int m = 0 ; m < grandchildnum ; m++){
for(int n = 0 ; n < grandparentnum; n++){
context.write(new Text(grandchild[m]), new Text(grandparent[n]));
System.out.println("grandchild: "+grandchild[m]+" grandparent: "+grandparent[n]);
}
}
}
}
} public static void main(String [] args)throws Exception{
Configuration conf = new Configuration();
String otherArgs[] = new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length != 2){
System.err.println("Usage: sort<in><out>");
System.exit(2);
}
Job job = new Job(conf,"single table join");
job.setJarByClass(STjoin.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job,new Path(otherArgs[1])); System.exit(job.waitForCompletion(true)? 0 : 1);
}
}
版本2(简化版):
package com.stjoin; import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser; public class STjoin {
public static int time = 0; public static class Map extends Mapper<Object, Text, Text, Text>{
public void map(Object key,Text value,Context context)throws IOException,InterruptedException{
String relationtype = new String();
String line = value.toString();
System.out.println("mapper...............");
int i = 0;
//遍历方法一:一个字符一个字符对照确定分割点
/* while(line.charAt(i) != ' '){
i++;
}
String[] values = {line.substring(0, i),line.substring(i+1)};
*/
//遍历方法二:使用迭代器取出child和parent
String[] values = new String[10];
StringTokenizer itr = new StringTokenizer(line);
while(itr.hasMoreTokens()){
values[i] = itr.nextToken();
i = i+1;
} System.out.println("child:"+values[0]+" parent:"+values[1]);
if(values[0].compareTo("child") != 0){//如果是child,则为0,否则为-1 relationtype="1";
context.write(new Text(values[1]),new Text(relationtype+"+"+values[0]));
System.out.println("key:"+values[1]+" value: "+relationtype+"+"+values[0]);
relationtype = "2";
context.write(new Text(values[0]), new Text(relationtype+"+"+values[1]));
System.out.println("key:"+values[0]+" value: "+relationtype+"+"+values[1]);
}
}
} public static class Reduce extends Reducer<Text, Text, Text, Text>{
public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException{
System.out.println("reduce.....................");
System.out.println("key:"+key+" values:"+values);
if(time==0){
context.write(new Text("grandchild"), new Text("grandparent"));
time++;
}
int grandchildnum = 0;
String grandchild[] = new String[10];
int grandparentnum = 0;
String grandparent[] = new String[10]; String name = new String();
//遍历方法一:用迭代器
// Iterator ite = values.iterator();
// while(ite.hasNext()){ //遍历方法二:用for循环
for(Text val : values){
// String record = ite.next().toString();
String record = val.toString();
System.out.println("record: "+record); int i = 2;
char relationtype = record.charAt(0);
name = record.substring(i); System.out.println("name: "+name); if (relationtype=='1') {
grandchild[grandchildnum] = name;
grandchildnum++;
}
else{
grandparent[grandparentnum]=name;
grandparentnum++;
}
}
//遍历方法三:就是详细方法的charAt(),一个一个字符遍历
if(grandparentnum!=0&&grandchildnum!=0){
for(int m = 0 ; m < grandchildnum ; m++){
for(int n = 0 ; n < grandparentnum; n++){
context.write(new Text(grandchild[m]), new Text(grandparent[n]));
System.out.println("grandchild: "+grandchild[m]+" grandparent: "+grandparent[n]);
}
}
}
}
}
public static void main(String [] args)throws Exception{
Configuration conf = new Configuration();
String otherArgs[] = new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length != 2){
System.err.println("Usage: sort<in><out>");
System.exit(2);
}
Job job = new Job(conf,"single table join");
job.setJarByClass(STjoin.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job,new Path(otherArgs[1])); System.exit(job.waitForCompletion(true)? 0 : 1);
}
}
14/09/22 20:31:48 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
14/09/22 20:31:48 INFO input.FileInputFormat: Total input paths to process : 1
14/09/22 20:31:48 WARN snappy.LoadSnappy: Snappy native library not loaded
14/09/22 20:31:48 INFO mapred.JobClient: Running job: job_local_0001
14/09/22 20:31:48 INFO util.ProcessTree: setsid exited with exit code 0
14/09/22 20:31:48 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@1a430c37
14/09/22 20:31:48 INFO mapred.MapTask: io.sort.mb = 100
14/09/22 20:31:48 INFO mapred.MapTask: data buffer = 79691776/99614720
14/09/22 20:31:48 INFO mapred.MapTask: record buffer = 262144/327680
mapper...............
child:child parent:parent
mapper...............
child:Tom parent:Lucy
key:Lucy value: 1+Tom+Lucy
key:Tom value: 2+Tom+Lucy
mapper...............
child:Tom parent:Jack
key:Jack value: 1+Tom+Jack
key:Tom value: 2+Tom+Jack
mapper...............
child:Jone parent:Lucy
key:Lucy value: 1+Jone+Lucy
key:Jone value: 2+Jone+Lucy
mapper...............
child:Jone parent:Jack
key:Jack value: 1+Jone+Jack
key:Jone value: 2+Jone+Jack
mapper...............
child:Lucy parent:Mary
key:Mary value: 1+Lucy+Mary
key:Lucy value: 2+Lucy+Mary
mapper...............
child:Lucy parent:Ben
key:Ben value: 1+Lucy+Ben
key:Lucy value: 2+Lucy+Ben
mapper...............
child:Jack parent:Alice
key:Alice value: 1+Jack+Alice
14/09/22 20:31:49 INFO mapred.MapTask: Starting flush of map output
key:Jack value: 2+Jack+Alice
mapper...............
child:Jack parent:Jesse
key:Jesse value: 1+Jack+Jesse
key:Jack value: 2+Jack+Jesse
mapper...............
child:Terry parent:Alice
key:Alice value: 1+Terry+Alice
key:Terry value: 2+Terry+Alice
mapper...............
child:Terry parent:Jesse
key:Jesse value: 1+Terry+Jesse
key:Terry value: 2+Terry+Jesse
mapper...............
child:Philip parent:Terry
key:Terry value: 1+Philip+Terry
key:Philip value: 2+Philip+Terry
mapper...............
child:Philip parent:Alima
key:Alima value: 1+Philip+Alima
key:Philip value: 2+Philip+Alima
mapper...............
child:Mark parent:Terry
key:Terry value: 1+Mark+Terry
key:Mark value: 2+Mark+Terry
mapper...............
child:Mark parent:Alma
key:Alma value: 1+Mark+Alma
key:Mark value: 2+Mark+Alma
14/09/22 20:31:49 INFO mapred.MapTask: Finished spill 0
14/09/22 20:31:49 INFO mapred.Task: Task:attempt_local_0001_m_000000_0 is done. And is in the process of commiting
14/09/22 20:31:49 INFO mapred.JobClient: map 0% reduce 0%
14/09/22 20:31:51 INFO mapred.LocalJobRunner:
14/09/22 20:31:51 INFO mapred.Task: Task 'attempt_local_0001_m_000000_0' done.
14/09/22 20:31:51 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@5c448d98
14/09/22 20:31:51 INFO mapred.LocalJobRunner:
14/09/22 20:31:51 INFO mapred.Merger: Merging 1 sorted segments
14/09/22 20:31:51 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 564 bytes
14/09/22 20:31:51 INFO mapred.LocalJobRunner:
reduce.....................
key:Alice values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Jack+Alice
childname: Jack
parentname: Alice
record: 1+Terry+Alice
childname: Terry
parentname: Alice
reduce.....................
key:Alima values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Philip+Alima
childname: Philip
parentname: Alima
reduce.....................
key:Alma values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Mark+Alma
childname: Mark
parentname: Alma
reduce.....................
key:Ben values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Lucy+Ben
childname: Lucy
parentname: Ben
reduce.....................
key:Jack values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 2+Jack+Alice
childname: Jack
parentname: Alice
record: 2+Jack+Jesse
childname: Jack
parentname: Jesse
record: 1+Tom+Jack
childname: Tom
parentname: Jack
record: 1+Jone+Jack
childname: Jone
parentname: Jack
grandchild: Tom grandparent: Alice
grandchild: Tom grandparent: Jesse
grandchild: Jone grandparent: Alice
grandchild: Jone grandparent: Jesse
reduce.....................
key:Jesse values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Jack+Jesse
childname: Jack
parentname: Jesse
record: 1+Terry+Jesse
childname: Terry
parentname: Jesse
reduce.....................
key:Jone values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 2+Jone+Lucy
childname: Jone
parentname: Lucy
record: 2+Jone+Jack
childname: Jone
parentname: Jack
reduce.....................
key:Lucy values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Tom+Lucy
childname: Tom
parentname: Lucy
record: 1+Jone+Lucy
childname: Jone
parentname: Lucy
record: 2+Lucy+Mary
childname: Lucy
parentname: Mary
record: 2+Lucy+Ben
childname: Lucy
parentname: Ben
grandchild: Tom grandparent: Mary
grandchild: Tom grandparent: Ben
grandchild: Jone grandparent: Mary
grandchild: Jone grandparent: Ben
reduce.....................
key:Mark values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 2+Mark+Terry
childname: Mark
parentname: Terry
record: 2+Mark+Alma
childname: Mark
parentname: Alma
reduce.....................
key:Mary values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 1+Lucy+Mary
childname: Lucy
parentname: Mary
reduce.....................
key:Philip values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 2+Philip+Terry
childname: Philip
parentname: Terry
record: 2+Philip+Alima
childname: Philip
parentname: Alima
reduce.....................
key:Terry values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 2+Terry+Alice
childname: Terry
parentname: Alice
record: 2+Terry+Jesse
childname: Terry
parentname: Jesse
record: 1+Philip+Terry
childname: Philip
parentname: Terry
record: 1+Mark+Terry
childname: Mark
parentname: Terry
grandchild: Philip grandparent: Alice
grandchild: Philip grandparent: Jesse
grandchild: Mark grandparent: Alice
grandchild: Mark grandparent: Jesse
reduce.....................
key:Tom values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@f67d4d0
record: 2+Tom+Jack
childname: Tom
parentname: Jack
record: 2+Tom+Lucy
childname: Tom
parentname: Lucy
14/09/22 20:31:52 INFO mapred.Task: Task:attempt_local_0001_r_000000_0 is done. And is in the process of commiting
14/09/22 20:31:52 INFO mapred.LocalJobRunner:
14/09/22 20:31:52 INFO mapred.Task: Task attempt_local_0001_r_000000_0 is allowed to commit now
14/09/22 20:31:52 INFO output.FileOutputCommitter: Saved output of task 'attempt_local_0001_r_000000_0' to hdfs://localhost:9000/user/hadoop/stjoin_output07
14/09/22 20:31:52 INFO mapred.JobClient: map 100% reduce 0%
14/09/22 20:31:54 INFO mapred.LocalJobRunner: reduce > reduce
14/09/22 20:31:54 INFO mapred.Task: Task 'attempt_local_0001_r_000000_0' done.
14/09/22 20:31:55 INFO mapred.JobClient: map 100% reduce 100%
14/09/22 20:31:55 INFO mapred.JobClient: Job complete: job_local_0001
14/09/22 20:31:55 INFO mapred.JobClient: Counters: 22
14/09/22 20:31:55 INFO mapred.JobClient: Map-Reduce Framework
14/09/22 20:31:55 INFO mapred.JobClient: Spilled Records=56
14/09/22 20:31:55 INFO mapred.JobClient: Map output materialized bytes=568
14/09/22 20:31:55 INFO mapred.JobClient: Reduce input records=28
14/09/22 20:31:55 INFO mapred.JobClient: Virtual memory (bytes) snapshot=0
14/09/22 20:31:55 INFO mapred.JobClient: Map input records=15
14/09/22 20:31:55 INFO mapred.JobClient: SPLIT_RAW_BYTES=117
14/09/22 20:31:55 INFO mapred.JobClient: Map output bytes=506
14/09/22 20:31:55 INFO mapred.JobClient: Reduce shuffle bytes=0
14/09/22 20:31:55 INFO mapred.JobClient: Physical memory (bytes) snapshot=0
14/09/22 20:31:55 INFO mapred.JobClient: Reduce input groups=13
14/09/22 20:31:55 INFO mapred.JobClient: Combine output records=0
14/09/22 20:31:55 INFO mapred.JobClient: Reduce output records=13
14/09/22 20:31:55 INFO mapred.JobClient: Map output records=28
14/09/22 20:31:55 INFO mapred.JobClient: Combine input records=0
14/09/22 20:31:55 INFO mapred.JobClient: CPU time spent (ms)=0
14/09/22 20:31:55 INFO mapred.JobClient: Total committed heap usage (bytes)=408420352
14/09/22 20:31:55 INFO mapred.JobClient: File Input Format Counters
14/09/22 20:31:55 INFO mapred.JobClient: Bytes Read=163
14/09/22 20:31:55 INFO mapred.JobClient: FileSystemCounters
14/09/22 20:31:55 INFO mapred.JobClient: HDFS_BYTES_READ=326
14/09/22 20:31:55 INFO mapred.JobClient: FILE_BYTES_WRITTEN=81802
14/09/22 20:31:55 INFO mapred.JobClient: FILE_BYTES_READ=912
14/09/22 20:31:55 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=149
14/09/22 20:31:55 INFO mapred.JobClient: File Output Format Counters
14/09/22 20:31:55 INFO mapred.JobClient: Bytes Written=149
14/09/22 20:26:02 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
14/09/22 20:26:02 INFO input.FileInputFormat: Total input paths to process : 1
14/09/22 20:26:02 WARN snappy.LoadSnappy: Snappy native library not loaded
14/09/22 20:26:03 INFO mapred.JobClient: Running job: job_local_0001
14/09/22 20:26:03 INFO util.ProcessTree: setsid exited with exit code 0
14/09/22 20:26:03 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@3b8c40d0
14/09/22 20:26:03 INFO mapred.MapTask: io.sort.mb = 100
14/09/22 20:26:03 INFO mapred.MapTask: data buffer = 79691776/99614720
14/09/22 20:26:03 INFO mapred.MapTask: record buffer = 262144/327680
mapper...............
child:child parent:parent
mapper...............
child:Tom parent:Lucy
key:Lucy value: 1+Tom
key:Tom value: 2+Lucy
mapper...............
child:Tom parent:Jack
key:Jack value: 1+Tom
key:Tom value: 2+Jack
mapper...............
child:Jone parent:Lucy
key:Lucy value: 1+Jone
key:Jone value: 2+Lucy
mapper...............
child:Jone parent:Jack
key:Jack value: 1+Jone
key:Jone value: 2+Jack
mapper...............
child:Lucy parent:Mary
key:Mary value: 1+Lucy
key:Lucy value: 2+Mary
mapper...............
child:Lucy parent:Ben
key:Ben value: 1+Lucy
key:Lucy value: 2+Ben
mapper...............
child:Jack parent:Alice
key:Alice value: 1+Jack
key:Jack value: 2+Alice
mapper...............
child:Jack parent:Jesse
key:Jesse value: 1+Jack
key:Jack value: 2+Jesse
mapper...............
child:Terry parent:Alice
key:Alice value: 1+Terry
key:Terry value: 2+Alice
mapper...............
child:Terry parent:Jesse
key:Jesse value: 1+Terry
key:Terry value: 2+Jesse
mapper...............
child:Philip parent:Terry
key:Terry value: 1+Philip
key:Philip value: 2+Terry
mapper...............
child:Philip parent:Alima
key:Alima value: 1+Philip
key:Philip value: 2+Alima
mapper...............
child:Mark parent:Terry
key:Terry value: 1+Mark
key:Mark value: 2+Terry
mapper...............
child:Mark parent:Alma
key:Alma value: 1+Mark
key:Mark value: 2+Alma
14/09/22 20:26:03 INFO mapred.MapTask: Starting flush of map output
14/09/22 20:26:03 INFO mapred.MapTask: Finished spill 0
14/09/22 20:26:03 INFO mapred.Task: Task:attempt_local_0001_m_000000_0 is done. And is in the process of commiting
14/09/22 20:26:04 INFO mapred.JobClient: map 0% reduce 0%
14/09/22 20:26:06 INFO mapred.LocalJobRunner:
14/09/22 20:26:06 INFO mapred.Task: Task 'attempt_local_0001_m_000000_0' done.
14/09/22 20:26:06 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@4eba27a5
14/09/22 20:26:06 INFO mapred.LocalJobRunner:
14/09/22 20:26:06 INFO mapred.Merger: Merging 1 sorted segments
14/09/22 20:26:06 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 414 bytes
14/09/22 20:26:06 INFO mapred.LocalJobRunner:
reduce.....................
key:Alice values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Jack
name: Jack
record: 1+Terry
name: Terry
reduce.....................
key:Alima values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Philip
name: Philip
reduce.....................
key:Alma values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Mark
name: Mark
reduce.....................
key:Ben values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Lucy
name: Lucy
reduce.....................
key:Jack values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 2+Alice
name: Alice
record: 2+Jesse
name: Jesse
record: 1+Tom
name: Tom
record: 1+Jone
name: Jone
grandchild: Tom grandparent: Alice
grandchild: Tom grandparent: Jesse
grandchild: Jone grandparent: Alice
grandchild: Jone grandparent: Jesse
reduce.....................
key:Jesse values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Jack
name: Jack
record: 1+Terry
name: Terry
reduce.....................
key:Jone values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 2+Lucy
name: Lucy
record: 2+Jack
name: Jack
reduce.....................
key:Lucy values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Tom
name: Tom
record: 1+Jone
name: Jone
record: 2+Mary
name: Mary
record: 2+Ben
name: Ben
grandchild: Tom grandparent: Mary
grandchild: Tom grandparent: Ben
grandchild: Jone grandparent: Mary
grandchild: Jone grandparent: Ben
reduce.....................
key:Mark values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 2+Terry
name: Terry
record: 2+Alma
name: Alma
reduce.....................
key:Mary values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 1+Lucy
name: Lucy
reduce.....................
key:Philip values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 2+Terry
name: Terry
record: 2+Alima
name: Alima
reduce.....................
key:Terry values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 2+Alice
name: Alice
record: 2+Jesse
name: Jesse
record: 1+Philip
name: Philip
record: 1+Mark
name: Mark
grandchild: Philip grandparent: Alice
grandchild: Philip grandparent: Jesse
grandchild: Mark grandparent: Alice
grandchild: Mark grandparent: Jesse
reduce.....................
key:Tom values:org.apache.hadoop.mapreduce.ReduceContext$ValueIterable@5914dda1
record: 2+Jack
name: Jack
record: 2+Lucy
name: Lucy
14/09/22 20:26:06 INFO mapred.Task: Task:attempt_local_0001_r_000000_0 is done. And is in the process of commiting
14/09/22 20:26:06 INFO mapred.LocalJobRunner:
14/09/22 20:26:06 INFO mapred.Task: Task attempt_local_0001_r_000000_0 is allowed to commit now
14/09/22 20:26:06 INFO output.FileOutputCommitter: Saved output of task 'attempt_local_0001_r_000000_0' to hdfs://localhost:9000/user/hadoop/stjoin_output06
14/09/22 20:26:07 INFO mapred.JobClient: map 100% reduce 0%
14/09/22 20:26:09 INFO mapred.LocalJobRunner: reduce > reduce
14/09/22 20:26:09 INFO mapred.Task: Task 'attempt_local_0001_r_000000_0' done.
14/09/22 20:26:10 INFO mapred.JobClient: map 100% reduce 100%
14/09/22 20:26:10 INFO mapred.JobClient: Job complete: job_local_0001
14/09/22 20:26:10 INFO mapred.JobClient: Counters: 22
14/09/22 20:26:10 INFO mapred.JobClient: Map-Reduce Framework
14/09/22 20:26:10 INFO mapred.JobClient: Spilled Records=56
14/09/22 20:26:10 INFO mapred.JobClient: Map output materialized bytes=418
14/09/22 20:26:10 INFO mapred.JobClient: Reduce input records=28
14/09/22 20:26:10 INFO mapred.JobClient: Virtual memory (bytes) snapshot=0
14/09/22 20:26:10 INFO mapred.JobClient: Map input records=15
14/09/22 20:26:10 INFO mapred.JobClient: SPLIT_RAW_BYTES=117
14/09/22 20:26:10 INFO mapred.JobClient: Map output bytes=356
14/09/22 20:26:10 INFO mapred.JobClient: Reduce shuffle bytes=0
14/09/22 20:26:10 INFO mapred.JobClient: Physical memory (bytes) snapshot=0
14/09/22 20:26:10 INFO mapred.JobClient: Reduce input groups=13
14/09/22 20:26:10 INFO mapred.JobClient: Combine output records=0
14/09/22 20:26:10 INFO mapred.JobClient: Reduce output records=13
14/09/22 20:26:10 INFO mapred.JobClient: Map output records=28
14/09/22 20:26:10 INFO mapred.JobClient: Combine input records=0
14/09/22 20:26:10 INFO mapred.JobClient: CPU time spent (ms)=0
14/09/22 20:26:10 INFO mapred.JobClient: Total committed heap usage (bytes)=406847488
14/09/22 20:26:10 INFO mapred.JobClient: File Input Format Counters
14/09/22 20:26:10 INFO mapred.JobClient: Bytes Read=163
14/09/22 20:26:10 INFO mapred.JobClient: FileSystemCounters
14/09/22 20:26:10 INFO mapred.JobClient: HDFS_BYTES_READ=326
14/09/22 20:26:10 INFO mapred.JobClient: FILE_BYTES_WRITTEN=81502
14/09/22 20:26:10 INFO mapred.JobClient: FILE_BYTES_READ=762
14/09/22 20:26:10 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=149
14/09/22 20:26:10 INFO mapred.JobClient: File Output Format Counters
14/09/22 20:26:10 INFO mapred.JobClient: Bytes Written=149
Tom Alice
Tom Jesse
Jone Alice
Jone Jesse
Tom Mary
Tom Ben
Jone Mary
Jone Ben
Philip Alice
Philip Jesse
Mark Alice
Mark Jesse
MapReduce编程系列 — 5:单表关联的更多相关文章
- MapReduce应用案例--单表关联
1. 实例描述 单表关联这个实例要求从给出的数据中寻找出所关心的数据,它是对原始数据所包含信息的挖掘. 实例中给出child-parent 表, 求出grandchild-grandparent表. ...
- Hadoop on Mac with IntelliJ IDEA - 8 单表关联NullPointerException
简化陆喜恒. Hadoop实战(第2版)5.4单表关联的代码时遇到空指向异常,经分析是逻辑问题,在此做个记录. 环境:Mac OS X 10.9.5, IntelliJ IDEA 13.1.5, Ha ...
- Hadoop 单表关联
前面的实例都是在数据上进行一些简单的处理,为进一步的操作打基础.单表关联这个实例要求从给出的数据中寻找到所关心的数据,它是对原始数据所包含信息的挖掘.下面进入这个实例. 1.实例描述 实例中给出chi ...
- MapRedece(单表关联)
源数据:Child--Parent表 Tom Lucy Tom Jack Jone Lucy Jone Jack Lucy Marry Lucy Ben Jack Alice Jack Jesse T ...
- MR案例:单表关联查询
"单表关联"这个实例要求从给出的数据中寻找所关心的数据,它是对原始数据所包含信息的挖掘. 需求:实例中给出 child-parent(孩子—父母)表,要求输出 grandchild ...
- MySQL 性能优化系列之一 单表预处理
MySQL 性能优化系列之一 单表预处理 背景介绍 我们经常在写多表关联的SQL时,会想到 left jion(左关联),right jion(右关联),inner jion(内关联)等. 但是,当表 ...
- MySql系列之单表查询
单表查询的语法 SELECT 字段1,字段2... FROM 表名 WHERE 条件 GROUP BY field HAVING 筛选 ORDER BY field LIMIT 限制条数 关键字的执行 ...
- MapReduce编程系列 — 6:多表关联
1.项目名称: 2.程序代码: 版本一(详细版): package com.mtjoin; import java.io.IOException; import java.util.Iterator; ...
- 【原创】MapReduce编程系列之表连接
问题描述 需要连接的表如下:其中左边是child,右边是parent,我们要做的是找出grandchild和grandparent的对应关系,为此需要进行表的连接. Tom Lucy Tom Jim ...
随机推荐
- c 指针兼容性问题
指针兼容性问题: const指针不能赋值给非const指针. 非const指针可以赋值给const 指针,但前提是只是一层间接运算 Example: int *pt1; const *pt2; con ...
- linux总线、设备和设备驱动的关系
之一:bus_type 总线是处理器和一个或多个设备之间的通道,在设备模型中,所有的设备都通过总线相连,甚至是内部的虚拟"platform"总线.可以通过ls -l /sys/bu ...
- jQuery将字符串转换成json
_menus = eval('(' + json.data + ')'); _menus = parseJSON('(' + json.data + ')');
- nginx demo
server_names_hash_bucket_size 512;upstream node_app { server 127.0.0.1:3000; } server { listen 80; s ...
- 修改myeclipse的jsp模板
在myeclipse的安装目录下: C:\Users\Seeker\AppData\Local\MyEclipse Professional\plugins 找到com.genuitec.eclips ...
- 常用汉字的Unicode码表
\u96d5\u864e\u7684\u4e00\u4e86\u662f\u6211\u4e0d\u5728\u4eba\u4eec\u6709\u6765\u4ed6\u8fd9\u4e0a\u77 ...
- 动画(Animation) 之 (闪烁、左右摇摆、上下晃动等效果)
左右晃动的效果: (这边显示没那么流畅) 一.续播 (不知道取什么名字好,就是先播放动画A, 接着播放动画B) 有两种方式. 第一种,分别动画两个动画,A和B, 然后先播放动画A,设置A 的 Ani ...
- 文字沟通工具使用SignalR,跨域例子源代码
其他网站已经有很多关于SignalR的介绍了.这里不多介绍. 安装:Install-Package Microsoft.AspNet.SignalR -Version 1.1.4 参考自:http:/ ...
- 3563: DZY Loves Chinese - BZOJ
Description神校XJ之学霸兮,Dzy皇考曰JC.摄提贞于孟陬兮,惟庚寅Dzy以降.纷Dzy既有此内美兮,又重之以修能.遂降临于OI界,欲以神力而凌♂辱众生. 今Dzy有一魞歄图,其上有N座祭 ...
- Jquery 判断浏览器类型
$(function() { //FireFox2及以上 if ($.browser.MozillaSideBar && $.browser.version >= "1 ...