【Hadoop】Hadoop MR异常处理
1、代码示例
package com.ares.hadoop.mr.flowsort; import java.io.IOException; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger; import com.ares.hadoop.mr.exception.LineException; public class FlowSort extends Configured implements Tool {
private static final Logger LOGGER = Logger.getLogger(FlowSort.class);
enum Counter {
LINESKIP
} public static class FlowSortMapper extends Mapper<LongWritable, Text,
FlowBean, NullWritable> {
private String line;
private int length;
private final static char separator = '\t'; private String phoneNum;
private long upFlow;
private long downFlow;
private long sumFlow; private FlowBean flowBean = new FlowBean();
private NullWritable nullWritable = NullWritable.get(); @Override
protected void map(
LongWritable key,
Text value,
Mapper<LongWritable, Text, FlowBean, NullWritable>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
//super.map(key, value, context);
String errMsg;
try {
line = value.toString();
String[] fields = StringUtils.split(line, separator);
length = fields.length;
if (length != ) {
throw new LineException(key.get() + ", " + line + " LENGTH INVALID, IGNORE...");
} phoneNum = fields[];
upFlow = Long.parseLong(fields[]);
downFlow = Long.parseLong(fields[]);
sumFlow = Long.parseLong(fields[]); flowBean.setPhoneNum(phoneNum);
flowBean.setUpFlow(upFlow);
flowBean.setDownFlow(downFlow);
flowBean.setSumFlow(sumFlow); context.write(flowBean, nullWritable);
} catch (LineException e) {
// TODO: handle exception
LOGGER.error(e);
System.out.println(e);
context.getCounter(Counter.LINESKIP).increment();
return;
} catch (NumberFormatException e) {
// TODO: handle exception
errMsg = key.get() + ", " + line + " FLOW DATA INVALID, IGNORE...";
LOGGER.error(errMsg);
System.out.println(errMsg);
context.getCounter(Counter.LINESKIP).increment();
return;
} catch (Exception e) {
// TODO: handle exception
LOGGER.error(e);
System.out.println(e);
context.getCounter(Counter.LINESKIP).increment();
return;
}
}
} public static class FlowSortReducer extends Reducer<FlowBean, NullWritable,
FlowBean, NullWritable> {
@Override
protected void reduce(
FlowBean key,
Iterable<NullWritable> values,
Reducer<FlowBean, NullWritable, FlowBean, NullWritable>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
//super.reduce(arg0, arg1, arg2);
context.write(key, NullWritable.get());
}
} @Override
public int run(String[] args) throws Exception {
// TODO Auto-generated method stub
String errMsg = "FlowSort: TEST STARTED...";
LOGGER.debug(errMsg);
System.out.println(errMsg); Configuration conf = new Configuration();
//FOR Eclipse JVM Debug
//conf.set("mapreduce.job.jar", "flowsum.jar");
Job job = Job.getInstance(conf); // JOB NAME
job.setJobName("FlowSort"); // JOB MAPPER & REDUCER
job.setJarByClass(FlowSort.class);
job.setMapperClass(FlowSortMapper.class);
job.setReducerClass(FlowSortReducer.class); // MAP & REDUCE
job.setOutputKeyClass(FlowBean.class);
job.setOutputValueClass(NullWritable.class);
// MAP
job.setMapOutputKeyClass(FlowBean.class);
job.setMapOutputValueClass(NullWritable.class); // JOB INPUT & OUTPUT PATH
//FileInputFormat.addInputPath(job, new Path(args[0]));
FileInputFormat.setInputPaths(job, args[]);
FileOutputFormat.setOutputPath(job, new Path(args[])); // VERBOSE OUTPUT
if (job.waitForCompletion(true)) {
errMsg = "FlowSort: TEST SUCCESSFULLY...";
LOGGER.debug(errMsg);
System.out.println(errMsg);
return ;
} else {
errMsg = "FlowSort: TEST FAILED...";
LOGGER.debug(errMsg);
System.out.println(errMsg);
return ;
} } public static void main(String[] args) throws Exception {
if (args.length != ) {
String errMsg = "FlowSort: ARGUMENTS ERROR";
LOGGER.error(errMsg);
System.out.println(errMsg);
System.exit(-);
} int result = ToolRunner.run(new Configuration(), new FlowSort(), args);
System.exit(result);
}
}
2、参考资料:
Java异常处理:http://fluagen.blog.51cto.com/146595/304197
Hadoop获取出错行:http://www.aboutyun.com/thread-13962-1-1.html
http://blog.csdn.net/wuzhongju/article/details/8937893
http://blog.163.com/zhangxin0112zx@126/blog/static/161401319201501174156642/
【Hadoop】Hadoop MR异常处理的更多相关文章
- hadoop修改MR的提交的代码程序的副本数
hadoop修改MR的提交的代码程序的副本数 Under-Replicated Blocks的数量很多,有7万多个.hadoop fsck -blocks 检查发现有很多replica missing ...
- Hadoop【MR开发规范、序列化】
Hadoop[MR开发规范.序列化] 目录 Hadoop[MR开发规范.序列化] 一.MapReduce编程规范 1.Mapper阶段 2.Reducer阶段 3.Driver阶段 二.WordCou ...
- [Hadoop]Hadoop章2 HDFS原理及读写过程
HDFS(Hadoop Distributed File System )Hadoop分布式文件系统. HDFS有很多特点: ① 保存多个副本,且提供容错机制,副本丢失或宕机自动恢复.默认存3份. ② ...
- hadoop hadoop install (1)
vmuser@vmuser-VirtualBox:~$ sudo useradd -m hadoop -s /bin/bash[sudo] vmuser 的密码: vmuser@vmuser-Virt ...
- Hadoop hadoop 机架感知配置
机架感知脚本 使用python3编写机架感知脚本,报存到topology.py,给予执行权限 import sys import os DEFAULT_RACK="/default-rack ...
- Hadoop中MR程序的几种提交运行模式
本地模型运行 1:在windows的eclipse里面直接运行main方法,就会将job提交给本地执行器localjobrunner执行 ----输入输出数据可以放在本地路径下(c:/wc ...
- hadoop之 mr输出到hbase
1.注意问题: 1.在开发过程中一定要导入hbase源码中的lib库否则出现如下错误 TableMapReducUtil 找不到什么-- 2.编码: import java.io.IOExceptio ...
- spark为什么比hadoop的mr要快?
1.前言 Spark是基于内存的计算,而Hadoop是基于磁盘的计算:Spark是一种内存计算技术. 但是事实上,不光Spark是内存计算,Hadoop其实也是内存计算. Spark和Hadoop的根 ...
- Hadoop【MR的分区、排序、分组】
[toc] 一.分区 问题:按照条件将结果输出到不同文件中 自定义分区步骤 1.自定义继承Partitioner类,重写getPartition()方法 2.在job驱动Driver中设置自定义的Pa ...
随机推荐
- BZOJ1055[HAOI2008]玩具取名 【区间dp + 记忆化搜索】
题目 某人有一套玩具,并想法给玩具命名.首先他选择WING四个字母中的任意一个字母作为玩具的基本名字.然后 他会根据自己的喜好,将名字中任意一个字母用“WING”中任意两个字母代替,使得自己的名字能够 ...
- 方伯伯的OJ ( onlinejudge )
方伯伯的OJ 题目描述 方伯伯正在做他的OJ.现在他在处理OJ 上的用户排名问题. OJ 上注册了n 个用户,编号为1 ∼ n,一开始他们按照编号排名.方伯伯会按照心情对这些用户做以下四种操作,修改用 ...
- JSP、JSTL、EF学习笔记
JSP 1)Java Server Page,在html中嵌入java代码 2)九个内置(隐式)对象 request response out page pageContext config sess ...
- SSM+redis整合(mybatis整合redis做二级缓存)
SSM:是Spring+Struts+Mybatis ,另外还使用了PageHelper 前言: 这里主要是利用redis去做mybatis的二级缓存,mybaits映射文件中所有的select都会刷 ...
- 编译gdb 报错 No module named gdb.frames
将源码目录下的python模块拷贝到指定目录即可 cd /root/gdb-7.12/ cp -rp gdb/python/lib/gdb /usr/local/share/gdb/python/ 编 ...
- Cocos2D研究院之CCNode详解(三)
http://www.xuanyusong.com/archives/950 上一章我们了解了cocos2d的项目路径以及工作原理,这次作者要真刀真枪地讲解代码了,咱们先来看看cocos2d最常用.也 ...
- ectouch 微信支付成功后订单状态未改变的解决办法 (转载)
原文地址: 微信支付支付成功后,返回到mobile/wx_native_callback.php 之前代码 define('IN_ECS', true); require(dirname(__FILE ...
- Appium+python自动化18-brew、carthage和appium-doctor【转载】
前言 本篇安装brew.carthage,解决启动appium时的报错问题,另外安装appium-doctor检查appium的环境 1.brew 2.carthage 3.appium-doctor ...
- VirtualBox虚拟机安装CentOS 7
新建虚拟机 因为比较简单,所以对于VirtualBox就不做过多介绍了,直接下载安装即可,安装好之后打开Oracle VM VirtualBox管理器,点击新建,选择Red Hat(根据windows ...
- 漂亮的弹窗口插件——sweetAlert的使用
想必你已经受够了单调的alert弹窗吧? 为了更好的用户体验性,现在介绍一款漂亮的弹窗口插件——sweetAlert,现在就来介绍它的使用 1.首先在官网下载它的CSS和JavaScript文件:ht ...