给HBase添加一二级索引,HBase协处理器结合solr

代码如下

package com.hbase.coprocessor;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.UUID; import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.CoprocessorEnvironment;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
import org.apache.hadoop.hbase.util.Bytes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory; /**
* @author:FengZhen
* @create:2018年7月9日
*/
public class HbaseDataSyncSolrObserver extends BaseRegionObserver{
public static Logger log = LoggerFactory.getLogger(HbaseDataSyncSolrObserver.class);
/**
* start
* @param e
* @throws IOException
*/
@Override
public void start(CoprocessorEnvironment e) throws IOException {
} /**
* stop
* @param e
* @throws IOException
*/
@Override
public void stop(CoprocessorEnvironment e) throws IOException {
} /**
* Called after the client stores a value
* after data put to hbase then prepare update builder to bulk Solr
*
* @param e
* @param put
* @param edit
* @param durability
* @throws IOException
*/
@Override
public void postPut(ObserverContext<RegionCoprocessorEnvironment> e, Put put, WALEdit edit, Durability durability) throws IOException {
NavigableMap<byte[], List<Cell>> familyMap = put.getFamilyCellMap();
for (Map.Entry<byte[], List<Cell>> entry : familyMap.entrySet()) {
String id = UUID.randomUUID().toString();
String rowkey = Bytes.toString(CellUtil.cloneRow(entry.getValue().get(0)));
List<String> tags = new ArrayList<String>();
for (Cell cell : entry.getValue()) {
String key = Bytes.toString(CellUtil.cloneQualifier(cell));
if (key.contains("tb_") || key.contains("tm_")) {
tags.add(key);
}
}
if (null == tags || tags.size() <= 0) {
continue;
}
VmMemory vmMemory = new VmMemory();
vmMemory.setId(id);
vmMemory.setRowkey(rowkey);
vmMemory.setTags(tags);
SolrWriter.addDocToCache(vmMemory);
}
}
}

 Solr代码处理如下

package com.hbase.coprocessor;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.Timer;
import java.util.TimerTask;
import java.util.Vector;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory; /**
* @author:FengZhen
* @create:2018年7月9日
*/
public class SolrWriter {
public static Logger log = LoggerFactory.getLogger(SolrWriter.class); public static String urlSolr = "node244.qt:2181,node245.qt:2181,node246.qt:2181"; //solr地址 192.168.1.232:2181
public static String defaultCollection = "socialSecurity"; //默认collection tagCollectionHDFS socialSecurity
public static int zkClientTimeOut =20000; //zk客户端请求超时间
public static int zkConnectTimeOut =10000; //zk客户端连接超时间
public static CloudSolrClient cloudSolrClient = null; public static int maxCacheCount = 200; //缓存大小,当达到该上限时提交
public static Vector<VmMemory> cache = null; //缓存
public static Vector<String> cacheRowkey = null;
public static Lock commitLock =new ReentrantLock(); //在添加缓存或进行提交时加�? public static int maxCommitTime = 60*1; //�?大提交时�? static {
Configuration conf = HBaseConfiguration.create();
urlSolr = conf.get("hbase.solr.zklist", "node244.qt:2181,node245.qt:2181,node246.qt:2181"); // 192.168.1.231:2181,192.168.1.232:2181,192.168.1.233:2181
defaultCollection = conf.get("hbase.solr.collection","socialSecurity");
zkClientTimeOut = conf.getInt("hbase.solr.zkClientTimeOut", 10000);
zkConnectTimeOut = conf.getInt("hbase.solr.zkConnectTimeOut", 10000);
maxCacheCount = conf.getInt("hbase.solr.maxCacheCount", 200);
maxCommitTime = conf.getInt("hbase.solr.maxCommitTime", 60*1); log.info("solr init param"+urlSolr+" "+defaultCollection+" "+zkClientTimeOut+" "+zkConnectTimeOut+" "+maxCacheCount+" "+maxCommitTime);
try {
cache=new Vector<VmMemory>(maxCacheCount);
cacheRowkey = new Vector<String>(maxCacheCount);
cloudSolrClient = new CloudSolrClient(urlSolr);
cloudSolrClient.setDefaultCollection(defaultCollection);
cloudSolrClient.setZkClientTimeout(zkClientTimeOut);
cloudSolrClient.setZkConnectTimeout(zkConnectTimeOut);
//启动定时任务,第�?次延�?10执行,之后每隔指定时间执行�?�?
Timer timer=new Timer();
timer.schedule(new CommitTimer(),10*1000,maxCommitTime*1000);
} catch (Exception ex){
ex.printStackTrace();
}
}
/**
* 批量提交
*/
public void inputDoc(List<VmMemory> vmMoneyList) throws IOException, SolrServerException {
if (vmMoneyList == null || vmMoneyList.size() == 0) {
log.info("==========inputDoc:return========");
return;
}
List<SolrInputDocument> doclist= new ArrayList<SolrInputDocument>(vmMoneyList.size());
for (VmMemory vm : vmMoneyList) {
String id = vm.getId();
String rowkey = vm.getRowkey();
List<String> tags = vm.getTags();
log.info("===id={}===rowkey={}=======",id,rowkey);
Set<String> tagSet = new HashSet<String>();
SolrQuery solrQuery = new SolrQuery();
solrQuery.setQuery("rowkey:"+rowkey);
QueryResponse queryResponse = cloudSolrClient.query(solrQuery);
List<SolrDocument> rowkeys = queryResponse.getResults();
SolrInputDocument document = new SolrInputDocument(); if (null != rowkeys && rowkeys.size() > 0) {
for(SolrDocument solrDocument : rowkeys) {
id = (String)solrDocument.get("id");
rowkey = (String)solrDocument.get("rowkey");
List<String> solrTags = (List<String>)solrDocument.get("tags");
tagSet.addAll(solrTags);
}
}
tagSet.addAll(tags);
document.addField("id", id);
document.addField("rowkey", rowkey);
List<String> tagIds = new ArrayList<String>(tagSet);
for (String tagId : tagIds) {
document.addField("tags", tagId);
}
doclist.add(document);
}
cloudSolrClient.add(doclist);
cloudSolrClient.commit(true, true, true);
} /**
* 单条提交
*/
public void inputDoc(VmMemory vm) throws IOException, SolrServerException {
if (vm == null) {
return;
}
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", vm.getId());
doc.addField("rowkey", vm.getRowkey());
List<String> tags = vm.getTags();
for (String tag:tags) {
doc.addField("tags", tag);
}
cloudSolrClient.add(doc);
cloudSolrClient.commit(true, true, true);
} public void deleteDoc(List<String> rowkeys) throws IOException, SolrServerException {
if (rowkeys == null || rowkeys.size() == 0) {
return;
}
cloudSolrClient.deleteById(rowkeys);
cloudSolrClient.commit(true, true, true);
} public void deleteDoc(String rowkey) throws IOException, SolrServerException {
cloudSolrClient.deleteById(rowkey);
cloudSolrClient.commit(true, true, true);
} /**
* 添加记录到cache,如果cache达到maxCacheCount,则提交
* addDocToCache会在hbase每次插入数据时将记录插入缓存�?
* 并且判断是否达到上限,如果达到则将缓存内�?用数据提交到solr
*/
public static void addDocToCache(VmMemory vmMemory) {
commitLock.lock();
try {
//判断cache中是否有重复的rowkey,有则先提交
if (cacheRowkey.contains(vmMemory.getRowkey())) {
new SolrWriter().inputDoc(cache);
cache.clear();
cacheRowkey.clear();
}
cache.add(vmMemory);
cacheRowkey.add(vmMemory.getRowkey());
if (cache.size() >= maxCacheCount) {
new SolrWriter().inputDoc(cache);
cache.clear();
cacheRowkey.clear();
}
} catch (Exception ex) {
log.info(ex.getMessage());
} finally {
commitLock.unlock();
}
} /**
* 提交定时�?
* CommitTimer 则会每隔�?段时间提交一次,
* 以保证缓存内�?有数据最终写入solr
*/
static class CommitTimer extends TimerTask {
@Override
public void run() {
commitLock.lock();
try {
if (cache.size() > 0) { //大于0则提�?
log.info("timer commit count:"+cache.size());
new SolrWriter().inputDoc(cache);
cache.clear();
cacheRowkey.clear();
}
} catch (Exception ex) {
log.info(ex.getMessage());
} finally {
commitLock.unlock();
}
}
}
}

协处理器使用步骤如下

1.代码打jar包,并上传至HDFS

2.创建HBase表并添加协处理器,如下

hbase(main):002:0> create 'socialSecurityTest','tags','userInfo'
hbase(main):004:0> disable 'socialSecurityTest'
hbase(main):010:0> alter 'socialSecurityTest',METHOD=>'table_att','coprocessor'=>'hdfs://nameservice/user/solr/hbase/observer/HBaseCoprocessor.jar|com.hbase.coprocessor.HbaseDataSyncSolrObserver|1001|collection=tagCollection'
hbase(main):027:0> enable 'socialSecurityTest'

3.测试

hbase(main):016:0> put 'socialSecurityTest','rowkey-1','tags:0_1','1'

此时,可通过HBase日志查看协处理器的处理情况。

没错误的情况下,Solr中应该已经也有数据了

使用过程中出现的问题

2018-07-11 17:06:14,054 INFO  [LruBlockCacheStatsExecutor] hfile.LruBlockCache: totalSize=417.42 KB, freeSize=395.89 MB, max=396.30 MB, blockCount=0, accesses=0, hits=0, hitRatio=0, cachingAccesses=0, cachingHits=0, cachingHitsRatio=0,evictions=8069, evicted=0, evictedPerRun=0.0
2018-07-11 17:06:23,523 ERROR [RpcServer.FifoWFPBQ.priority.handler=19,queue=1,port=16000] master.MasterRpcServices: Region server node231.qt,16020,1531219308266 reported a fatal error:
ABORTING region server node231.qt,16020,1531219308266: The coprocessor com.hbase.coprocesser.HbaseDataSyncEsObserver threw java.lang.NoClassDefFoundError: org/apache/http/entity/mime/content/ContentBody
Cause:
java.lang.NoClassDefFoundError: org/apache/http/entity/mime/content/ContentBody
at com.hbase.coprocesser.SolrUtil.insert(SolrUtil.java:53)
at com.hbase.coprocesser.HbaseDataSyncEsObserver.postPut(HbaseDataSyncEsObserver.java:79)
at org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost$32.call(RegionCoprocessorHost.java:923)
at org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost$RegionOperation.call(RegionCoprocessorHost.java:1660)
at org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost.execOperation(RegionCoprocessorHost.java:1734)
at org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost.execOperation(RegionCoprocessorHost.java:1692)
at org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost.postPut(RegionCoprocessorHost.java:919)
at org.apache.hadoop.hbase.regionserver.HRegion.doMiniBatchMutation(HRegion.java:3413)
at org.apache.hadoop.hbase.regionserver.HRegion.batchMutate(HRegion.java:2986)
at org.apache.hadoop.hbase.regionserver.HRegion.batchMutate(HRegion.java:2928)
at org.apache.hadoop.hbase.regionserver.RSRpcServices.doBatchOp(RSRpcServices.java:748)
at org.apache.hadoop.hbase.regionserver.RSRpcServices.doNonAtomicRegionMutation(RSRpcServices.java:708)
at org.apache.hadoop.hbase.regionserver.RSRpcServices.multi(RSRpcServices.java:2124)
at org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:32393)
at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2141)
at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:112)
at org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:187)
at org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:167)
Caused by: java.lang.ClassNotFoundException: org.apache.http.entity.mime.content.ContentBody
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:338)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 18 more

两种解决方式

一、将缺少的jar包放入HBase的lib下

二、添加依赖重新打包即可,依赖如下

<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpmime</artifactId>
<version>4.3.2</version>
</dependency> 

pom添加一下内容

<build>
<finalName>SolrTest</finalName> <plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass></mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>

<dependency>

<groupId>org.apache.httpcomponents</groupId>

<artifactId>httpmime</artifactId>

<version>4.3.2</version>

</dependency>

HBase协处理器的使用(添加Solr二级索引)的更多相关文章

  1. Hbase 学习(九) 华为二级索引(原理)

    这个是华为的二级索引方案,已经开放源代码了,下面是网上的一篇讲解原理的帖子,发出来和大家共享一下. 经过本人认真阅读了一下代码,发现这个源码仅供参考,想要集成到原有的集群当中是有点儿难度的,它对hba ...

  2. HBase协处理器同步二级索引到Solr

    一. 背景二. 什么是HBase的协处理器三. HBase协处理器同步数据到Solr四. 添加协处理器五. 测试六. 协处理器动态加载 一. 背景 在实际生产中,HBase往往不能满足多维度分析,我们 ...

  3. 基于Solr实现HBase的二级索引

    文章来源:http://www.open-open.com/lib/view/open1421501717312.html 实现目的: 由于hbase基于行健有序存储,在查询时使用行健十分高效,然后想 ...

  4. CDH使用Solr实现HBase二级索引

      一.为什么要使用Solr做二级索引二.实时查询方案三.部署流程3.1 安装HBase.Solr3.2 增加HBase复制功能3.3创建相应的 SolrCloud 集合3.4 创建 Lily HBa ...

  5. CDH版本Hbase二级索引方案Solr key value index

    概述 在Hbase中,表的RowKey 按照字典排序, Region按照RowKey设置split point进行shard,通过这种方式实现的全局.分布式索引. 成为了其成功的最大的砝码. 然而单一 ...

  6. HBase协处理器同步二级索引到Solr(续)

    一. 已知的问题和不足二.解决思路三.代码3.1 读取config文件内容3.2 封装SolrServer的获取方式3.3 编写提交数据到Solr的代码3.4 拦截HBase的Put和Delete操作 ...

  7. HBase + Solr Cloud实现HBase二级索引

    1. 执行流程 2. Solr Cloud实现 http://blog.csdn.net/u011462328/article/details/53008344 3. HBase实现 1) 自定义Ob ...

  8. hbase基于solr配置二级索引

    一.概述 Hbase适用于大表的存储,通过单一的RowKey查询虽然能快速查询,但是对于复杂查询,尤其分页.查询总数等,实现方案浪费计算资源,所以可以针对hbase数据创建二级索引(Hbase Sec ...

  9. Hbase(三) hbase协处理器与二级索引

    一.协处理器—Coprocessor 1. 起源Hbase 作为列族数据库最经常被人诟病的特性包括:无法轻易建立“二级索引”,难以执 行求和.计数.排序等操作.比如,在旧版本的(<0.92)Hb ...

随机推荐

  1. ADO.NET详细学习笔记《一》

    目录 ADO.NET和ADO的区别 ADO.NET的五大核心对象 Connection对象 Command对象 DataAdapter对象,DataSet对象 DataReader对象 [1]ADO. ...

  2. 2016-06-19 NOIP模拟赛

          2016-06-19 NOIP模拟赛 by coolyangzc 共3道题目,时间3小时 题目名 高级打字机 不等数列 经营与开发 源文件 type.cpp/c/pas num.cpp/c ...

  3. 【BZOJ2730】[HNOI2012]矿场搭建 Tarjan

    [BZOJ2730][HNOI2012]矿场搭建 Description 煤矿工地可以看成是由隧道连接挖煤点组成的无向图.为安全起见,希望在工地发生事故时所有挖煤点的工人都能有一条出路逃到救援出口处. ...

  4. outlook撤回已发送邮件

    官方教程参考: https://support.office.com/zh-cn/article/%E5%8F%91%E9%80%81%E9%82%AE%E4%BB%B6%E5%90%8E%E6%92 ...

  5. ActiveMQ5.10.2版本配置JMX

    ActiveMQ的特性之一是很好的支持JMX.通过JMX MBeans可以很方便的监听和控制ActiveMQ的broker. 鉴于官方网站提供的JMX特性说明对于远程访问的配置流程不是很完整,笔者在实 ...

  6. JavaScript确定一个字符串是否包含在另一个字符串中的四种方法

    一.indexOf() 1.定义 indexOf()方法返回String对象第一次出现指定字符串的索引,若未找到指定值,返回-1.(数组同一个概念) 2.语法 str.indexOf(searchVa ...

  7. Vue.js之组件嵌套小demo

    Vue.js之组件嵌套的小demo项目 第一步:初始化一个wabpack项目,这里不在复述.第二步:在components文件夹下新建Header.vue Footer.vue和Users.vue三个 ...

  8. MYSQL中case when then else end 用法

    条件语句 delimiter \\CREATE PROCEDURE proc_if ()BEGIN      declare i int default 0;   if i = 1 THEN      ...

  9. python基础:while循环,for循环

    ---恢复内容开始--- 1.使用while循环输出1 2 3 4 5 6     8 9 10 2.求1-100的所有数的和 3.输出 1-100 内的所有奇数 4.输出 1-100 内的所有偶数 ...

  10. 斯坦福大学Andrew Ng - 机器学习笔记(2) -- 逻辑回归 & 正则化

    大概用了一个月,Andrew Ng老师的机器学习视频断断续续看完了,以下是个人学习笔记,入门级别,权当总结.笔记难免有遗漏和误解,欢迎讨论. 鸣谢:中国海洋大学黄海广博士提供课程视频和个人笔记,在此深 ...