Hbase（七）hbase高级编程

一、Hbase结合mapreduce

为什么需要用 mapreduce 去访问 hbase 的数据？
——加快分析速度和扩展分析能力
Mapreduce 访问 hbase 数据作分析一定是在离线分析的场景下应用

1、HbaseToHDFS

从 hbase 中读取数据，分析之后然后写入 hdfs，代码实现：

package com.ghgj.hbase.hbase2hdfsmr;

import java.io.IOException;

import java.util.List;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.hbase.Cell;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.client.Result;

import org.apache.hadoop.hbase.client.Scan;

import org.apache.hadoop.hbase.io.ImmutableBytesWritable;

import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;

import org.apache.hadoop.hbase.mapreduce.TableMapper;

import org.apache.hadoop.hbase.util.Bytes;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**

 * 作用：从hbase中读取user_info这个表的数据，然后写出到hdfs

 */

public class HBaseToHDFSMR {

	private static final String ZK_CONNECT = "hadoop03:2181,hadoop04:2181,hadoop05:2181";

	public static void main(String[] args) throws Exception {

		Configuration conf = HBaseConfiguration.create();

		conf.set("hbase.zookeeper.quorum", ZK_CONNECT);

		System.setProperty("HADOOP_USER_NAME", "hadoop");

//		conf.set("fs.defaultFS", "hdfs://myha01/");

		Job job = Job.getInstance(conf);

		job.setJarByClass(HBaseToHDFSMR.class);

		Scan scan = new Scan();

		scan.addColumn(Bytes.toBytes("base_info"), Bytes.toBytes("name"));

		/**

		 * TableMapReduceUtil：以util结尾：工具

		 * MapReduceFactory：以factory结尾，它是工厂类，最大作用就是管理对象的生成

		 */

		TableMapReduceUtil.initTableMapperJob("user_info", scan,

				HBaseToHDFSMRMapper.class, Text.class, NullWritable.class, job);

		job.setReducerClass(HBaseToHDFSMRReducer.class);

		job.setOutputKeyClass(Text.class);

		job.setOutputValueClass(NullWritable.class);

		Path outputPath = new Path("/hbase2hdfs/output");

		FileSystem fs = FileSystem.get(conf);

		if(fs.exists(outputPath)){

			fs.delete(outputPath);

		}

		FileOutputFormat.setOutputPath(job, outputPath);

		boolean waitForCompletion = job.waitForCompletion(true);

		System.exit(waitForCompletion ? 0 : 1);

	}

	static class HBaseToHDFSMRMapper extends TableMapper<Text, NullWritable>{

		/**

		 * key:rowkey

		 * value:map方法每执行一次接收到的一个参数，这个参数就是一个Result实例

		 * 这个Result里面存的东西就是rowkey, family, qualifier, value, timestamp

		 */

		@Override

		protected void map(ImmutableBytesWritable key, Result value, Mapper<ImmutableBytesWritable, Result, Text, NullWritable>.Context context) throws IOException, InterruptedException {

			String rowkey = Bytes.toString(key.copyBytes());

			System.out.println(rowkey);

			List<Cell> cells = value.listCells();

			for (int i = 0; i < cells.size(); i++) {

				Cell cell = cells.get(i);

				String rowkey_result = Bytes.toString(cell.getRow()) + "\t"

						+ Bytes.toString(cell.getFamily()) + "\t"

						+ Bytes.toString(cell.getQualifier()) + "\t"

						+ Bytes.toString(cell.getValue()) + "\t"

						+ cell.getTimestamp();

				context.write(new Text(rowkey_result), NullWritable.get());

			}

		}

	}

	static class HBaseToHDFSMRReducer extends Reducer<Text, NullWritable, Text, NullWritable>{

		@Override

		protected void reduce(Text key, Iterable<NullWritable> arg1, Reducer<Text, NullWritable, Text, NullWritable>.Context context) throws IOException, InterruptedException {

			context.write(key, NullWritable.get());

		}

	}

}

　　2、HDFSToHbase

从 hdfs 从读入数据，处理之后写入 hbase，代码实现：

package com.ghgj.hbase.hbase2hdfsmr;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.HColumnDescriptor;

import org.apache.hadoop.hbase.HTableDescriptor;

import org.apache.hadoop.hbase.TableName;

import org.apache.hadoop.hbase.client.HBaseAdmin;

import org.apache.hadoop.hbase.client.Mutation;

import org.apache.hadoop.hbase.client.Put;

import org.apache.hadoop.hbase.io.ImmutableBytesWritable;

import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;

import org.apache.hadoop.hbase.mapreduce.TableReducer;

import org.apache.hadoop.hbase.util.Bytes;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

public class HDFSToHBaseMR {

	private static final String ZK_CONNECT = "hadoop03:2181,hadoop04:2181,hadoop05:2181";

	private static final String TABLE_NAME = "person_info";

	public static void main(String[] args) throws Exception {

		Configuration conf = HBaseConfiguration.create();

		conf.set("hbase.zookeeper.quorum", ZK_CONNECT);

		System.setProperty("HADOOP_USER_NAME", "hadoop");

		Job job = Job.getInstance(conf);

		job.setJarByClass(HDFSToHBaseMR.class);

		// 以下这一段代码是为了创建一张hbase表叫做 person_info

		HBaseAdmin admin = new HBaseAdmin(conf);

		HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE_NAME));

		htd.addFamily(new HColumnDescriptor("base_info"));

		if (admin.tableExists(TABLE_NAME)) {

			admin.disableTable(TABLE_NAME);

			admin.deleteTable(TABLE_NAME);

		}

		admin.createTable(htd);

		// 给job指定mapperclass 和  reducerclass

		job.setMapperClass(HDFSToHBaseMRMapper.class);

		TableMapReduceUtil.initTableReducerJob(TABLE_NAME, HDFSToHBaseMRReducer.class, job);

		// 给mapper和reducer指定输出的key-value的类型

		job.setMapOutputKeyClass(Text.class);

		job.setMapOutputValueClass(NullWritable.class);

		job.setOutputKeyClass(ImmutableBytesWritable.class);

		job.setOutputValueClass(Mutation.class);

		// 指定输入数据的路径

		FileInputFormat.setInputPaths(job, new Path("/hbase2hdfs/output"));

		// job提交

		boolean boo = job.waitForCompletion(true);

		System.exit(boo ? 0 :1);

	}

	static class HDFSToHBaseMRMapper extends Mapper<LongWritable, Text, Text, NullWritable> {

		@Override

		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context) throws IOException, InterruptedException {

			context.write(value, NullWritable.get());

		}

	}

	/**

	 * TableReducer extends Reducer 这么做的唯一效果就是把valueout的类型确定为Mutation

	 */

	static class HDFSToHBaseMRReducer extends TableReducer<Text, NullWritable, ImmutableBytesWritable> {

		/**

		 * baiyc_20150716_0001 base_info name baiyc1 1488348387443

		 */

		@Override

		protected void reduce(Text key, Iterable<NullWritable> values, Reducer<Text, NullWritable, ImmutableBytesWritable, Mutation>.Context context) throws IOException, InterruptedException {

			String[] splits = key.toString().split("\t");

			String rowkeyStr = splits[0];

			ImmutableBytesWritable rowkey = new ImmutableBytesWritable(Bytes.toBytes(rowkeyStr));

			Put put = new Put(Bytes.toBytes(rowkeyStr));

			String family = splits[1];

			String qualifier = splits[2];

			String value = splits[3];

			String ts = splits[4];

			put.add(Bytes.toBytes(family), Bytes.toBytes(qualifier), Long.parseLong(ts), Bytes.toBytes(value));

			context.write(rowkey, put);

		}

	}

}

二、Hbase和mysql数据库数据进行互导

1、mysql数据导入到hbase（用sqoop）

命令：

sqoop import --connect jdbc:mysql://hadoop01/mytest --username root --password root
--table student --hbase-create-table --hbase-table studenttest --column-family name
--hbase-row-key id

其中会报错，说 Exception in thread "main" java.lang.NoSuchMethodError: org.apache.hadoop.hbase.HTableDescriptor.addFamily(Lorg/apache/hadoop/hbase/HColumnDescriptor;)V 是由于版本不兼容引起，我们可以通过事先创建好表就可以使用了。
请使用下面的命令：

sqoop import --connect jdbc:mysql://hadoop01/mytest --username root --password root
--table student --hbase-table studenttest1 --column-family name --hbase-row-key id

--hbase-create-table 自动在 hbase 中创建表
--column-family name 指定列簇名字
--hbase-row-key id 指定 rowkey 对应的 mysql 当中的键

2、hbase数据导入到mysql

目前没有直接的命令将 Hbase 中的数据导出到 mysql，但是可以先将 hbase 中的数据导出到 hdfs 中，再将数据导出 mysql

替代方案：
先将 hbase 的数据导入到 hdfs 或者 hive，然后再将数据导入到 mysql

三、hbase整合hive

原理：

Hive 与 HBase 利用两者本身对外的 API 来实现整合，主要是靠 HBaseStorageHandler 进行通信，利用 HBaseStorageHandler， Hive 可以获取到 Hive 表对应的 HBase 表名，列簇以及列， InputFormat 和 OutputFormat 类，创建和删除 HBase 表等。

Hive 访问 HBase 中表数据，实质上是通过 MapReduce 读取 HBase 表数据，其实现是在 MR 中，使用 HiveHBaseTableInputFormat 完成对 HBase 表的切分，获取 RecordReader 对象来读取数据。

对 HBase 表的切分原则是一个 Region 切分成一个 Split,即表中有多少个 Regions,MR 中就有多少个 Map。

读取 HBase 表数据都是通过构建 Scanner，对表进行全表扫描，如果有过滤条件，则转化为 Filter。当过滤条件为 rowkey 时，则转化为对 rowkey 的过滤， Scanner 通过 RPC 调用 RegionServer 的 next()来获取数据；

1、准备hbase表数据

create 'mingxing',{NAME => 'base_info',VERSIONS => 1},{NAME => 'extra_info',VERSIONS => 1}

插入数据：

put 'mingxing','rk001','base_info:name','huangbo'
put 'mingxing','rk001','base_info:age','33'
put 'mingxing','rk001','extra_info:math','44'
put 'mingxing','rk001','extra_info:province','beijing'
put 'mingxing','rk002','base_info:name','xuzheng'
put 'mingxing','rk002','base_info:age','44'
put 'mingxing','rk003','base_info:name','wangbaoqiang'
put 'mingxing','rk003','base_info:age','55'
put 'mingxing','rk003','base_info:gender','male'
put 'mingxing','rk004','extra_info:math','33'
put 'mingxing','rk004','extra_info:province','tianjin'
put 'mingxing','rk004','extra_info:children','3'
put 'mingxing','rk005','base_info:name','liutao'
put 'mingxing','rk006','extra_info:name','liujialing'

2、hive端操作

三、hbasetohbase byMR

package com.ghgj.hbase.hbase2hdfsmr;

import java.io.IOException;

import java.util.List;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.hbase.Cell;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.HColumnDescriptor;

import org.apache.hadoop.hbase.HTableDescriptor;

import org.apache.hadoop.hbase.TableName;

import org.apache.hadoop.hbase.client.HBaseAdmin;

import org.apache.hadoop.hbase.client.Mutation;

import org.apache.hadoop.hbase.client.Put;

import org.apache.hadoop.hbase.client.Result;

import org.apache.hadoop.hbase.client.Scan;

import org.apache.hadoop.hbase.io.ImmutableBytesWritable;

import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;

import org.apache.hadoop.hbase.mapreduce.TableMapper;

import org.apache.hadoop.hbase.mapreduce.TableReducer;

import org.apache.hadoop.hbase.util.Bytes;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

public class HBaseToHBaseByMR {

	private static final String ZK_CONNECT = "hadoop03:2181,hadoop04:2181,hadoop05:2181";

	private static final String OLD_TABLE_NAME = "user_info";

	private static final String NEW_TABLE_NAME = "person_info2";

	private static final String FAMILY = "base_info";

	private static final String QUALIFIER = "age";

	public static void main(String[] args) throws Exception {

		Configuration conf = HBaseConfiguration.create();

		conf.set("hbase.zookeeper.quorum", ZK_CONNECT);

		System.setProperty("HADOOP_USER_NAME", "hadoop");

		// conf.set("fs.defaultFS", "hdfs://myha01/");

		Job job = Job.getInstance(conf);

		job.setJarByClass(HBaseToHDFSMR.class);

		// 以下这一段代码是为了创建一张hbase表叫做 person_info

		HBaseAdmin admin = new HBaseAdmin(conf);

		HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(NEW_TABLE_NAME));

		htd.addFamily(new HColumnDescriptor(FAMILY));

		if (admin.tableExists(NEW_TABLE_NAME)) {

			admin.disableTable(NEW_TABLE_NAME);

			admin.deleteTable(NEW_TABLE_NAME);

		}

		admin.createTable(htd);

		Scan scan = new Scan();

		scan.addColumn(Bytes.toBytes(FAMILY), Bytes.toBytes(QUALIFIER));

		/**

		 * TableMapReduceUtil：以util结尾：工具

		 * MapReduceFactory：以factory结尾，它是工厂类，最大作用就是管理对象的生成

		 */

		TableMapReduceUtil.initTableMapperJob(OLD_TABLE_NAME, scan, HBaseToHBaseByMRMapper.class, Text.class, NullWritable.class, job);

		TableMapReduceUtil.initTableReducerJob(NEW_TABLE_NAME, HBaseToHBaseByMRReducer.class, job);

		// 给mapper和reducer指定输出的key-value的类型

		job.setMapOutputKeyClass(Text.class);

		job.setMapOutputValueClass(NullWritable.class);

		job.setOutputKeyClass(ImmutableBytesWritable.class);

		job.setOutputValueClass(Mutation.class);

		boolean waitForCompletion = job.waitForCompletion(true);

		System.exit(waitForCompletion ? 0 : 1);

	}

	static class HBaseToHBaseByMRMapper extends TableMapper<Text, NullWritable> {

		/**

		 * key:rowkey value:map方法每执行一次接收到的一个参数，这个参数就是一个Result实例

		 * 这个Result里面存的东西就是rowkey, family, qualifier, value, timestamp

		 */

		@Override

		protected void map(ImmutableBytesWritable key, Result value, Mapper<ImmutableBytesWritable, Result, Text, NullWritable>.Context context) throws IOException, InterruptedException {

			String rowkey = Bytes.toString(key.copyBytes());

			System.out.println(rowkey);

			List<Cell> cells = value.listCells();

			for (int i = 0; i < cells.size(); i++) {

				Cell cell = cells.get(i);

				String rowkey_result = Bytes.toString(cell.getRow()) + "\t" + Bytes.toString(cell.getFamily()) + "\t" + Bytes.toString(cell.getQualifier()) + "\t" + Bytes.toString(cell.getValue()) + "\t" + cell.getTimestamp();

				context.write(new Text(rowkey_result), NullWritable.get());

			}

		}

	}

	/**

	 * TableReducer extends Reducer 这么做的唯一效果就是把valueout的类型确定为Mutation

	 */

	static class HBaseToHBaseByMRReducer extends TableReducer<Text, NullWritable, ImmutableBytesWritable> {

		/**

		 * baiyc_20150716_0001 base_info name baiyc1 1488348387443

		 */

		@Override

		protected void reduce(Text key, Iterable<NullWritable> values, Reducer<Text, NullWritable, ImmutableBytesWritable, Mutation>.Context context) throws IOException, InterruptedException {

			String[] splits = key.toString().split("\t");

			String rowkeyStr = splits[0];

			ImmutableBytesWritable rowkey = new ImmutableBytesWritable(Bytes.toBytes(rowkeyStr));

			Put put = new Put(Bytes.toBytes(rowkeyStr));

			String family = splits[1];

			String qualifier = splits[2];

			String value = splits[3];

			String ts = splits[4];

			put.add(Bytes.toBytes(family), Bytes.toBytes(qualifier), Long.parseLong(ts), Bytes.toBytes(value));

			context.write(rowkey, put);

		}

	}

}

Hbase（七）hbase高级编程的更多相关文章

HBase(七): HBase体系结构剖析（下)
目录: write Compaction splite read Write: 当客户端发起一个Put请求时,首先根据RowKey寻址,从hbase:meta表中查出该Put数据最终需要去的HRegi ...
(七) 一起学 Unix 环境高级编程(APUE) 之进程关系和守护进程
. . . . . 目录 (一) 一起学 Unix 环境高级编程 (APUE) 之标准IO (二) 一起学 Unix 环境高级编程 (APUE) 之文件 IO (三) 一起学 Unix 环境高级编 ...
HBase Coprocessor 剖析与编程实践（转载http://www.cnblogs.com/ventlam/archive/2012/10/30/2747024.html）
HBase Coprocessor 剖析与编程实践 1.起因(Why HBase Coprocessor) HBase作为列族数据库最经常被人诟病的特性包括:无法轻易建立“二级索引”,难以执行求和. ...
解读经典《C#高级编程》第七版 Page79-93.对象和类型.Chapter3
前言本篇我们继续讲解本章其余的部分:构造函数.只读字段.匿名类型.结构详解.部分类.静态类.Object类.扩展方法,等. 01 类构造函数构造函数是一种特殊的方法: 与类同名没有返回值,甚至 ...
解读经典《C#高级编程》第七版 Page68-79.对象和类型.Chapter3
前言新年好,本篇开始进入第三章,<对象和类型>,深刻理解C#的对象,对于使用好.Net类库非常重要. 01 类和结构从使用角度看,结构和类的区别很小,比如,将结构定义转换为类,只需要将 ...
解读经典《C#高级编程》第七版 Page50-68.核心C#.Chapter2
前言本篇讲述Main方法,控制台,注释,预处理指令,编程规范等.这些概念比较琐碎,为避免长篇大论,主要以列举要点的方式来说明. 01 Main方法 Main方法并不是所有应用类型的入口方法,它只是控 ...
解读经典《C#高级编程》第七版 Page45-50.核心C#.Chapter2
前言本篇讲述枚举和名称空间. 01 枚举首先需要明确枚举的概念:枚举是用户定义的整数类型.使用枚举的目标是,使用一组容易记忆的名称,来使得代码更容易编写和维护. 我们对比枚举的定义和类的定义,会发 ...
解读经典《C#高级编程》第七版 Page38-45.核心C#.Chapter2
前言控制流是语言中最基础的部分,我们不谈具体的细节,只讲讲一些关键和有趣的点. 01 流控制条件语句:if, else if, else if语句的使用非常值得细讲,如何是好的使用习惯.有一点非常 ...
解读经典《C#高级编程》第七版 Page32-38.核心C#.Chapter2
前言接下来讲讲预定义数据类型.关于数据类型,其实是非常值得透彻研究的. 01 预定义数据类型值类型和引用类型 C#将把数据类型分为两种,值类型和引用类型,值类型存储在堆栈上,引用类型存储在托管堆上 ...
解读经典《C#高级编程》第七版 Page20-32.核心C#.Chapter2
前言讲到核心C#的语法,其实很难讲,因为大部分是基础知识.如果只讲入门的基础知识,那细节又多,意义也不大.我们就不讲一般性的内容,而是找一些有趣的点,展开讲讲. 01 Hello World Hel ...

随机推荐

使用json.dumps转换django queryset的datatime报错问题解决
转换成json时使用的方法如下: json.dumps(list(models.userlist.objects.values("vu"))) 报错信息如下: Traceback ...
yum安装lnmp
python其他知识目录 1.安装LNMP之前要安装EPEL,以便安装源以外的软件,如Nginx,phpMyAdmin等. yum install epel-release 提示:EPEL,即Extr ...
JSBridge的原理
前言参考来源前人栽树,后台乘凉,本文参考了以下来源 github-WebViewJavascriptBridge JSBridge-Web与Native交互之iOS篇 Ios Android Hy ...
Scrum Meeting 10.29
成员今日活动明日计划用时徐越配置tomcat+eclipse 将上届后端代码迁移到服务器 4h 赵庶宏与数据库连接的java代码学习及编写,测试代码进行数据库的建立并学习数据库方面的知识 ...
08慕课网《进击Node.js基础（一）》事件events
引用events模块中的EventEmitter 事件的监听和发射相同的事件发射数量有限,可以通过setMaxListeners设置峰值 var EventEmitter = require('ev ...
JavaScript实现弹出层（以layer.open为例）
首先,引用layer,自行下载. 添加如下两行 <script src=" ../layer/jquery.min.js"></script> <sc ...
根据C#编程经验思考编程核心
程序是对数据的各种操作.数据的表示,数据的组织结构,数据的存储,数据的处理,数据的传输等. 程序是由具体的编程语言编写的,不同的编程语言有编写,编译检查,解释执行等过程. 具体的编程语言都有: 1,变 ...
团队作业8——测试与发布（Beta阶段）之展示博客
展示博客 1. 团队成员的简介和个人博客地址,团队的源码仓库地址. a.陈福鹏擅长技术:java.web等网站方面技术: 博客:http://www.cnblogs.com/royalchen/b. ...
AWS EC2安装docker时的问题
在AWS EC2的实例(Ubuntu)里面安装docker时,使用通常的安装步骤 :~$ sudo apt-get update :~$ sudo apt-get install docker 安装完 ...
性能测试问题_tomcat占用内存很高，响应速度很慢
Cronolog 1. 问题描述 Tomcat占用服务器内存过大导致访问变慢 2. 问题原因查看catalina.out文件过大,写日志时占用内存过大 3. 解决 ...

Hbase（七）hbase高级编程

Hbase（七）hbase高级编程的更多相关文章

随机推荐

热门专题