MapReduce-读取文件写入HBase

MapReduce直接写入HBase

代码如下

package com.hbase.mapreduce;

import java.io.IOException;

import org.apache.commons.cli.CommandLine;

import org.apache.commons.cli.CommandLineParser;

import org.apache.commons.cli.HelpFormatter;

import org.apache.commons.cli.Option;

import org.apache.commons.cli.Options;

import org.apache.commons.cli.ParseException;

import org.apache.commons.cli.PosixParser;

import org.apache.commons.codec.digest.DigestUtils;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.KeyValue;

import org.apache.hadoop.hbase.client.Put;

import org.apache.hadoop.hbase.io.ImmutableBytesWritable;

import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;

import org.apache.hadoop.hbase.util.Bytes;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.io.Writable;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

/**

* @author:FengZhen

* @create:2018年9月14日

*/

public class ImportFromFile extends Configured implements Tool{

	private static String addr="HDP233,HDP232,HDP231";

	private static String port="2181";

	public static final String NAME = "ImportFromFile";

	public enum Counters { LINES }

	static class ImportMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {

		private byte[] family = null;

		private byte[] qualifier = null;

		@Override

		protected void setup(Mapper<LongWritable, Text, ImmutableBytesWritable, Put>.Context context)

				throws IOException, InterruptedException {

			String column = context.getConfiguration().get("conf.column");

			byte[][] colkey = KeyValue.parseColumn(Bytes.toBytes(column));

			family = colkey[0];

			if (colkey.length > 1) {

				qualifier = colkey[1];

			}

		}

		@Override

		protected void map(LongWritable key, Text value,

				Mapper<LongWritable, Text, ImmutableBytesWritable, Put>.Context context)

				throws IOException, InterruptedException {

			try {

				String lineString = value.toString();

				//行键是经过MD5散列之后随机生成的键值

				byte[] rowkey = DigestUtils.md5(lineString);

				Put put = new Put(rowkey);

				//存储原始数据到给定的表中的一列

				put.addColumn(family, qualifier, Bytes.toBytes(lineString));

				context.write(new ImmutableBytesWritable(rowkey), put);

				context.getCounter(Counters.LINES).increment(1L);

			} catch (Exception e) {

				e.printStackTrace();

			}

		}

	}

	/**

	 * 使用Apache Commons CLI类解析命令行参数。

	 * @param args

	 * @return

	 */

	private static CommandLine parseArgs(String[] args) {

		Options options = new Options();

		Option option = new Option("t", "table", true, "table to import into -must exist");

		option.setArgName("table-name");

		option.setRequired(true);

		options.addOption(option);

		option = new Option("c", "column", true, "column to store row data into -must exit");

		option.setArgName("family:qualifier");

		option.setRequired(true);

		options.addOption(option);

		option = new Option("i", "input", true, "the directory or file to read from");

		option.setArgName("path-in-HDFS");

		option.setRequired(true);

		options.addOption(option);

		options.addOption("d", "debug", false, "switch on DEBUG log level");

		CommandLineParser parser = new PosixParser();

		CommandLine cmd = null;

		try {

			cmd = parser.parse(options, args);

		} catch (ParseException e) {

			e.printStackTrace();

			System.err.println("ERROR: " + e.getMessage() + "\n");

			HelpFormatter formatter = new HelpFormatter();

			formatter.printHelp(NAME + " ", options, true);

			System.exit(1);

		}

		return cmd;

	}

	public int run(String[] arg0) throws Exception {

		Configuration configuration = HBaseConfiguration.create();

		configuration.set("hbase.zookeeper.quorum",addr);

		configuration.set("hbase.zookeeper.property.clientPort", port);

		//String[] otherArgs = new GenericOptionsParser(configuration, arg0).getRemainingArgs();

		//CommandLine commandLine = parseArgs(arg0);

//		String table = commandLine.getOptionValue("t");

//		String input = commandLine.getOptionValue("i");

//		String column = commandLine.getOptionValue("c");

		String table = arg0[0];

		String input = arg0[1];

		String column = arg0[2];

		configuration.set("conf.column", column);

		Job job = Job.getInstance(configuration);

		job.setJobName("ImportFromFile");

		job.setJarByClass(ImportFromFile.class);

		job.setMapperClass(ImportMapper.class);

		job.setOutputFormatClass(TableOutputFormat.class);

		job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, table);

		job.setOutputKeyClass(ImmutableBytesWritable.class);

		job.setOutputValueClass(Writable.class);

		//这是一个只包含map阶段的作业，框架会直接跳过reduce阶段

		job.setNumReduceTasks(0);

		FileInputFormat.addInputPath(job, new Path(input));

		return job.waitForCompletion(true) ? 0 : 1;

	}

	public static void main(String[] args) throws Exception {

		String[] params = new String[] {"test_table_mr", "hdfs://fz/data/fz/input/hbase", "data:info"};

		int exitCode = ToolRunner.run(new ImportFromFile(), params);

		System.exit(exitCode);

	}

}

MapReduce-读取文件写入HBase的更多相关文章

Mapreduce的文件和hbase共同输入
Mapreduce的文件和hbase共同输入 package duogemap; import java.io.IOException; import org.apache.hadoop.co ...
MapReduce和Spark写入Hbase多表总结
作者:Syn良子出处:http://www.cnblogs.com/cssdongl 转载请注明出处大家都知道用mapreduce或者spark写入已知的hbase中的表时,直接在mapreduc ...
shell读取文件写入新文件
#!/bin/sh #系统简称 SYST="HVPS" #发送行号 SEND1234SEND=" #接收行号 RECV1234RECV=" cd /home/w ...
python小练习之读取文件写入excel
文件是个json文件内容为: 导入excel后的格式为屡一下思路一步步怎么实现: 1 首先需要读取json文件然后将读取的内容转为字典 2 将excel的列名写入一个list中然后遍历执行写 ...
Python学习笔记五(读取提取写入文件)
#Python打开读取一个文件内容,然后写入一个新的文件中,并对某些字段进行提取,写入新的字段的脚本,与大家共同学习. import os import re def get_filelist(dir ...
【HBase】HBase与MapReduce集成——从HDFS的文件读取数据到HBase
目录需求步骤一.创建maven工程,导入jar包二.开发MapReduce程序三.结果需求将HDFS路径 /hbase/input/user.txt 文件的内容读取并写入到HBase 表 ...
使用MapReduce读取HBase数据存储到MySQL
Mapper读取HBase数据 package MapReduce; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hba ...
用mapreduce读取hdfs数据到hbase上
hdfs数据到hbase过程将HDFS上的文件中的数据导入到hbase中实现上面的需求也有两种办法,一种是自定义mr,一种是使用hbase提供好的import工具 hbase先创建好表 cre ...
MapReduce-从HBase读取数据处理后再写入HBase
MapReduce-从HBase读取处理后再写入HBase 代码如下 package com.hbase.mapreduce; import java.io.IOException; import o ...

随机推荐

Python简单分布式爬虫
分布式爬虫采用主从模式.主从模式是指由一台主机作为控制节点,负责管理所有运行网络爬虫的主机(url管理器,数据存储器,控制调度器),爬虫只需要从控制节点哪里接收任务,并把新生成任务提交给控制节点.此次 ...
hoj 2543 (费用流拆边）
http://acm.hit.edu.cn/hoj/problem/view?id=2543 1.将原图中的每条边(u, v)拆成两条:(u, v, Ci, 0), (u, v, ∞, Ei) 2.购 ...
MySQL中redo日志
重做日志用来实现事务的持久性,即ACID中的D,由两部分组成: 一是内存中的重做日志缓冲(redo log buffer) 易丢失二是重做日志文件(redo log file) 持久的 InnoD ...
我的Android进阶之旅------>Android中如何高效率的进行简繁体转换
因为APP要做国际化适配,所以就需要顾及到香港和台湾都是使用繁体字,怎样快速便捷高效的把简体字转换成繁体字呢? 说实话我之前用的方法比较呆板,把每个需要转换的字符串进行在线翻译.今天突然发现word或 ...
我的Android进阶之旅------>Android的ListView数据更新后，如何使最新的条目可以自动滚动到可视范围内？
在ListView的layout配置中添加 android:transcriptMode="alwaysScroll" <ListView android:id=" ...
Hadoop权威指南读书笔记
本书中提到的Hadoop项目简述 Common:一组分布式文件系统和通用I/O的组件与接口(序列化.javaRPC和持久化数据结构). Avro:一种支持高效.跨语言的RPC以及永久存储数据的序列化系 ...
JavaWeb:实现文件上传与下载
JavaWeb:实现文件上传与下载文件上传前端处理本模块使用到的前端Ajax库为Axio,其地址为GitHub官网. 关于文件上传上传文件就是把客户端的文件发送给服务器端. 在常见情况(不包含文 ...
Capslock and Esc
将Caps Lock转换成Esc(windows and linux) 1. linux 下将Caps Lock 转换成Esc 作为一个vimer,Caps Lock对我(还有其他很多人)来说根本就是 ...
hadoop02---高可用网站架构
tomcat每个请求都会占用内存cpu,tomcat没有代理功能.nginx是俄国人写的,nginx是静态资源服务器,既可以自己返回请求,也可以做代理进行转发,和负载均衡.Tomcat是动态资源jav ...
【Head First Servlets and JSP】笔记6：什么是响应首部 & 快速搭建一个简单的测试环境
搭建简单的测试环境什么是响应首部最简单的响应首部——Content-Type 设置响应首部请求重定向与响应首部在浏览器中查看Response Headers 1.先快速搭建一个简单的测试环境, ...

MapReduce-读取文件写入HBase

MapReduce直接写入HBase

代码如下

MapReduce-读取文件写入HBase的更多相关文章

随机推荐

热门专题