【Hadoop】Hadoop MR异常处理
1、代码示例
package com.ares.hadoop.mr.flowsort; import java.io.IOException; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger; import com.ares.hadoop.mr.exception.LineException; public class FlowSort extends Configured implements Tool {
private static final Logger LOGGER = Logger.getLogger(FlowSort.class);
enum Counter {
LINESKIP
} public static class FlowSortMapper extends Mapper<LongWritable, Text,
FlowBean, NullWritable> {
private String line;
private int length;
private final static char separator = '\t'; private String phoneNum;
private long upFlow;
private long downFlow;
private long sumFlow; private FlowBean flowBean = new FlowBean();
private NullWritable nullWritable = NullWritable.get(); @Override
protected void map(
LongWritable key,
Text value,
Mapper<LongWritable, Text, FlowBean, NullWritable>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
//super.map(key, value, context);
String errMsg;
try {
line = value.toString();
String[] fields = StringUtils.split(line, separator);
length = fields.length;
if (length != ) {
throw new LineException(key.get() + ", " + line + " LENGTH INVALID, IGNORE...");
} phoneNum = fields[];
upFlow = Long.parseLong(fields[]);
downFlow = Long.parseLong(fields[]);
sumFlow = Long.parseLong(fields[]); flowBean.setPhoneNum(phoneNum);
flowBean.setUpFlow(upFlow);
flowBean.setDownFlow(downFlow);
flowBean.setSumFlow(sumFlow); context.write(flowBean, nullWritable);
} catch (LineException e) {
// TODO: handle exception
LOGGER.error(e);
System.out.println(e);
context.getCounter(Counter.LINESKIP).increment();
return;
} catch (NumberFormatException e) {
// TODO: handle exception
errMsg = key.get() + ", " + line + " FLOW DATA INVALID, IGNORE...";
LOGGER.error(errMsg);
System.out.println(errMsg);
context.getCounter(Counter.LINESKIP).increment();
return;
} catch (Exception e) {
// TODO: handle exception
LOGGER.error(e);
System.out.println(e);
context.getCounter(Counter.LINESKIP).increment();
return;
}
}
} public static class FlowSortReducer extends Reducer<FlowBean, NullWritable,
FlowBean, NullWritable> {
@Override
protected void reduce(
FlowBean key,
Iterable<NullWritable> values,
Reducer<FlowBean, NullWritable, FlowBean, NullWritable>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
//super.reduce(arg0, arg1, arg2);
context.write(key, NullWritable.get());
}
} @Override
public int run(String[] args) throws Exception {
// TODO Auto-generated method stub
String errMsg = "FlowSort: TEST STARTED...";
LOGGER.debug(errMsg);
System.out.println(errMsg); Configuration conf = new Configuration();
//FOR Eclipse JVM Debug
//conf.set("mapreduce.job.jar", "flowsum.jar");
Job job = Job.getInstance(conf); // JOB NAME
job.setJobName("FlowSort"); // JOB MAPPER & REDUCER
job.setJarByClass(FlowSort.class);
job.setMapperClass(FlowSortMapper.class);
job.setReducerClass(FlowSortReducer.class); // MAP & REDUCE
job.setOutputKeyClass(FlowBean.class);
job.setOutputValueClass(NullWritable.class);
// MAP
job.setMapOutputKeyClass(FlowBean.class);
job.setMapOutputValueClass(NullWritable.class); // JOB INPUT & OUTPUT PATH
//FileInputFormat.addInputPath(job, new Path(args[0]));
FileInputFormat.setInputPaths(job, args[]);
FileOutputFormat.setOutputPath(job, new Path(args[])); // VERBOSE OUTPUT
if (job.waitForCompletion(true)) {
errMsg = "FlowSort: TEST SUCCESSFULLY...";
LOGGER.debug(errMsg);
System.out.println(errMsg);
return ;
} else {
errMsg = "FlowSort: TEST FAILED...";
LOGGER.debug(errMsg);
System.out.println(errMsg);
return ;
} } public static void main(String[] args) throws Exception {
if (args.length != ) {
String errMsg = "FlowSort: ARGUMENTS ERROR";
LOGGER.error(errMsg);
System.out.println(errMsg);
System.exit(-);
} int result = ToolRunner.run(new Configuration(), new FlowSort(), args);
System.exit(result);
}
}
2、参考资料:
Java异常处理:http://fluagen.blog.51cto.com/146595/304197
Hadoop获取出错行:http://www.aboutyun.com/thread-13962-1-1.html
http://blog.csdn.net/wuzhongju/article/details/8937893
http://blog.163.com/zhangxin0112zx@126/blog/static/161401319201501174156642/
【Hadoop】Hadoop MR异常处理的更多相关文章
- hadoop修改MR的提交的代码程序的副本数
hadoop修改MR的提交的代码程序的副本数 Under-Replicated Blocks的数量很多,有7万多个.hadoop fsck -blocks 检查发现有很多replica missing ...
- Hadoop【MR开发规范、序列化】
Hadoop[MR开发规范.序列化] 目录 Hadoop[MR开发规范.序列化] 一.MapReduce编程规范 1.Mapper阶段 2.Reducer阶段 3.Driver阶段 二.WordCou ...
- [Hadoop]Hadoop章2 HDFS原理及读写过程
HDFS(Hadoop Distributed File System )Hadoop分布式文件系统. HDFS有很多特点: ① 保存多个副本,且提供容错机制,副本丢失或宕机自动恢复.默认存3份. ② ...
- hadoop hadoop install (1)
vmuser@vmuser-VirtualBox:~$ sudo useradd -m hadoop -s /bin/bash[sudo] vmuser 的密码: vmuser@vmuser-Virt ...
- Hadoop hadoop 机架感知配置
机架感知脚本 使用python3编写机架感知脚本,报存到topology.py,给予执行权限 import sys import os DEFAULT_RACK="/default-rack ...
- Hadoop中MR程序的几种提交运行模式
本地模型运行 1:在windows的eclipse里面直接运行main方法,就会将job提交给本地执行器localjobrunner执行 ----输入输出数据可以放在本地路径下(c:/wc ...
- hadoop之 mr输出到hbase
1.注意问题: 1.在开发过程中一定要导入hbase源码中的lib库否则出现如下错误 TableMapReducUtil 找不到什么-- 2.编码: import java.io.IOExceptio ...
- spark为什么比hadoop的mr要快?
1.前言 Spark是基于内存的计算,而Hadoop是基于磁盘的计算:Spark是一种内存计算技术. 但是事实上,不光Spark是内存计算,Hadoop其实也是内存计算. Spark和Hadoop的根 ...
- Hadoop【MR的分区、排序、分组】
[toc] 一.分区 问题:按照条件将结果输出到不同文件中 自定义分区步骤 1.自定义继承Partitioner类,重写getPartition()方法 2.在job驱动Driver中设置自定义的Pa ...
随机推荐
- 洛谷 P2114 [NOI2014]起床困难综合症 解题报告
P2114 [NOI2014]起床困难综合症 题目描述 21世纪,许多人得了一种奇怪的病:起床困难综合症,其临床表现为:起床难,起床后精神不佳.作为一名青春阳光好少年,atm一直坚持与起床困难综合症作 ...
- json数据格式的简单案例
json数据是一种文本字符串,它是javascript的原生数据格式,在数据需要多次重复使用时,json数据是ajax请求的首先.(注:ajax返回的数据格式支持三种分别为:文本格式,json.和xm ...
- yum升级kernel
# uname -a Linux host -.el6.x86_64 # SMP Fri May :: BST x86_64 x86_64 x86_64 GNU/Linux # cat /etc/re ...
- jsp的九大内置对象及EL表达式的隐含对象
九大内置对象: request request对象具有请求域,即完成客户端的请求之前,该对象一直有效. response response对象具有页面作用域,即访问一个页面 ...
- bananahill(NOIP模拟赛Round 8)
题目描述 香蕉川由座香蕉山组成,第i座山有它的高度.小Z准备从左到右爬这里的恰好座香蕉山,但他不希望山的高度起伏太大,太过颠簸,会让本就体育不好的他过于劳累.所以他定义了爬山的劳累度是所有爬的相邻的两 ...
- mogadm修改硬盘状态
#查看主机序列mogadm host list #查看空间快满的分区 df -h#检查硬盘序列和状态ls -l /data/mogile_data/ #查看mogadm目录下软链接的目录对应的分区 # ...
- hdu 5162(水题)
题目链接: http://acm.hdu.edu.cn/showproblem.php?pid=5162 题解:看了半天以为测试用例写错了.这题玩文字游戏.它问的是当前第i名是原数组中的第几个. #i ...
- laravel按月/时间区间查询数据
public function payRollList($time) { $member_phone = Session::get(phone'); $engineer = Engineer::get ...
- 华农oj Problem K: 负2进制【有技巧构造/待补】
Problem K: 负2进制 Time Limit: 2 Sec Memory Limit: 128 MB Submit: 51 Solved: 6 [Submit][Status][Web Boa ...
- Python的程序结构[2] -> 类/Class[5] -> 内建类 bytes 和 bytearray
内建类 bytes 和 bytearray / Built-in Type bytes and bytearray 关于内建类 Python的内建类 bytes 主要有以下几点: class byte ...