通过Python操作hbase api
# coding=utf-8
# Author: ruin
"""
discrible: """
from thrift.transport import TSocket
from thrift.protocol import TBinaryProtocol
from thrift.transport import TTransport
from hbase import Hbase import struct # Method for encoding ints with Thrift's string encoding
def encode(n):
return struct.pack("i", n) # Method for decoding ints with Thrift's string encoding
def decode(s):
return int(s) if s.isdigit() else struct.unpack('i', s)[0]
class HBaseApi(object): def __init__(self,table='fr_test_hbase:test_api',host='10.2.46.240',port=9090):
self.table = table.encode('utf-8')
self.host = host
self.port = port
# Connect to HBase Thrift server
self.transport = TTransport.TBufferedTransport(TSocket.TSocket(host, port))
self.protocol = TBinaryProtocol.TBinaryProtocolAccelerated(self.transport) # Create and open the client connection
self.client = Hbase.Client(self.protocol)
self.transport.open()
# set type and field of column families
self.set_column_families([bytes],['info'])
self._build_column_families() def set_column_families(self,type_list,col_list=['info']):
self.columnFamiliesType = type_list self.columnFamilies = col_list def _build_column_families(self):
"""
give all column families name list,create a table
:return:
"""
tables = self.client.getTableNames()
if self.table not in tables:
self.__create_table(self.table) def __create_table(self,table):
"""
create table in hbase with column families
:param table: fr_test_hbase:fr_test
:return:
""" columnFamilies = []
for columnFamily in self.columnFamilies:
name = Hbase.ColumnDescriptor(name = columnFamily)
columnFamilies.append(name)
table = table.encode('utf-8')
print(type(table),type(columnFamilies)) self.client.createTable(table,columnFamilies) def __del__(self):
self.transport.close() def __del_table(self,table):
"""
delete a table,first need to disable it
"""
self.client.disableTable(table)
self.client.deleteTable(table) def getColumnDescriptors(self):
return self.client.getColumnDescriptors(self.table) def put(self, rowKey, qualifier, value):
"""
put one row
column is column name,value is column value
:param rowKey: rowKey
:param column: column name
:param value: column value
:description: HbaseApi(table).put('rowKey','column','value')
""" rowKey = rowKey.encode('utf-8')
mutations = []
# for j, column in enumerate(column):
if isinstance(value, str):
value = value.encode('utf-8')
m_name = Hbase.Mutation(column=(self.columnFamilies[0]+':'+qualifier).encode('utf-8'), value=value)
elif isinstance(value, int):
m_name = Hbase.Mutation(column=(self.columnFamilies[0]+':'+qualifier).encode('utf-8'), value=encode(value))
mutations.append(m_name)
self.client.mutateRow(self.table, rowKey, mutations, {}) def puts(self,rowKeys,qualifier,values):
""" put sevel rows, `qualifier` is autoincrement :param rowKeys: a single rowKey
:param values: values is a 2-dimension list, one piece element is [name, sex, age]
:param qualifier: column family qualifier Usage:: >>> HBaseTest('table').puts(rowKeys=[1,2,3],qualifier="name",values=[1,2,3]) """ mutationsBatch = []
if not isinstance(rowKeys,list):
rowKeys = [rowKeys] * len(values) for i, value in enumerate(values):
mutations = []
# for j, column in enumerate(value):
if isinstance(value, str):
value = value.encode('utf-8')
m_name = Hbase.Mutation(column=(self.columnFamilies[0]+':'+qualifier).encode('utf-8'), value=value)
elif isinstance(value, int):
m_name = Hbase.Mutation(column=(self.columnFamilies[0]+':'+qualifier).encode('utf-8'), value=encode(value))
mutations.append(m_name)
mutationsBatch.append(Hbase.BatchMutation(row = rowKeys[i].encode('utf-8'),mutations=mutations))
self.client.mutateRows(self.table, mutationsBatch, {}) def getRow(self,row, qualifier='name'):
"""
get one row from hbase table
:param row:
:param qualifier:
:return:
"""
# res = []
row = self.client.getRow(self.table, row.encode('utf-8'),{})
for r in row:
rd = {}
row = r.row.decode('utf-8')
value = (r.columns[b'info:name'].value).decode('utf-8')
rd[row] = value
# res.append(rd)
# print ('the row is ',r.row.decode('utf-8'))
# print ('the value is ',(r.columns[b'info:name'].value).decode('utf-8'))
return rd def getRows(self, rows, qualifier='name'):
"""
get rows from hbase,all the row sqecify the same 'qualifier'
:param rows: a list of row key
:param qualifier: column
:return: None
"""
# grow = True if len(rows) == 1 else False
res = []
for r in rows:
res.append(self.getRow(r,qualifier))
return res def scanner(self, numRows=100, startRow=None, stopRow=None):
""" :param numRows:
:param startRow:
:param stopRow:
:return:
"""
scan = Hbase.TScan(startRow, stopRow)
scannerId = self.client.scannerOpenWithScan(self.table,scan, {}) ret = []
rowList = self.client.scannerGetList(scannerId, numRows) for r in rowList:
rd = {}
row = r.row.decode('utf-8')
value = (r.columns[b'info:name'].value).decode('utf-8')
rd[row] = value
# print ('the row is ',r.row.decode('utf-8'))
# print ('the value is ',(r.columns[b'info:name'].value).decode('utf-8'))
ret.append(rd) return ret def demo():
ha = HBaseApi('fr_test_hbase:test_log1')
# ha.put('0002','age','23')
rowKeys = [str(key) for key in range(10001,10010)]
values = ['fr'+str(val) for val in range(10001,10010)]
ha.puts(rowKeys,'name',values)
print(ha.scanner())
# print(ha.getRow('0001'))
# print(ha.getRows(rowKeys))
if __name__ == "__main__":
demo()
通过Python操作hbase api的更多相关文章
- python 操作 hbase
python 是万能的,当然也可以通过api去操作big database 的hbase了,python是通过thrift去访问操作hbase 以下是在centos7 上安装操作,前提是hbase已经 ...
- 【Hbase三】Java,python操作Hbase
Java,python操作Hbase 操作Hbase python操作Hbase 安装Thrift之前所需准备 安装Thrift 产生针对Python的Hbase的API 启动Thrift服务 执行p ...
- 使用IDEA操作Hbase API 报错:org.apache.hadoop.hbase.client.RetriesExhaustedException的解决方法:
使用IDEA操作Hbase API 报错:org.apache.hadoop.hbase.client.RetriesExhaustedException的解决方法: 1.错误详情: Excepti ...
- Hbase理论&&hbase shell&&python操作hbase&&python通过mapreduce操作hbase
一.Hbase搭建: 二.理论知识介绍: 1Hbase介绍: Hbase是分布式.面向列的开源数据库(其实准确的说是面向列族).HDFS为Hbase提供可靠的底层数据存储服务,MapReduce为Hb ...
- Python操作HBase之happybase
安装Thrift 安装Thrift的具体操作,请点击链接 pip install thrift 安装happybase pip install happybase 连接(happybase.Conne ...
- python操作Hbase
本地操作 启动thrift服务:./bin/hbase-daemon.sh start thrift hbase模块产生: 下载thrfit源码包:thrift-0.8.0.tar.gz 解压安装 . ...
- python 操作Hbase 详解
博文参考:https://www.cnblogs.com/tashanzhishi/p/10917956.html 如果你们学习过Python,可以用Python来对Hbase进行操作. happyb ...
- python操作ansible api示例
#!/usr/bin/env python # -*- coding:utf-8 -*- import json import shutil from collections import named ...
- Python 操作 GA API 指南
因为需要写一个 Blog Feature 的缘故,所以接触了下 GA 的 Python API,发现 G 家的 API 不是那么直观,比较绕,但是,在使用过程中发现其实 G 家的 API 设计挺有意思 ...
随机推荐
- js Json操作
JSON字符串: var jsonStr='{"fname":"json","fage":1}' JSON对象: var jsonOb ...
- Java实现XSS防御
XSS概述 跨站脚本攻击(Cross Site Scripting),缩写为XSS.恶意攻击者往Web页面里插入恶意Script代码,当用户浏览该页之时,嵌入其中Web里面的Script代码会被执行, ...
- 初识Quartz(二)
简单作业: ? 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 package quartz_pr ...
- nc在centos7上的安装和简单使用
下载 http://vault.centos.org/6.6/os/x86_64/Packages/nc-1.84-22.el6.x86_64.rpm rpm -iUv nc-1.84-22. ...
- IOS设计模式的六大设计原则之迪米特法则(LOD,Law Of Demeter)
定义 狭义的迪米特法则定义:也叫最少知识原则(LKP,Least Knowledge Principle).如果两个类不必彼此直接通信,那么这两个类就不应当发生直接的相互作用.如果其中的一个类需要调用 ...
- csla 与高cpu
在项目中一直使用csla 4.13. 项目一直正常,但是偶尔会出现iis占用的cpu 突然100%, 后面客户量大的情况,加入了缓存的机制.100%的情况出现的更多了. 当时有数据库死锁的原因.cpu ...
- Death to Binary? 分析模拟
/** 题目:Death to Binary? 链接:https://vjudge.net/contest/154246#problem/T 题意:略. 思路: 注意事项: 给的字符串存在前导0: 存 ...
- python 动态语言 __slots__
python 是动态语言,就是说可以动态的创建属性, 别的语言不行,再创建类的时候已经规定好了 使用__slots__,注意要用tuple定义同意绑定的属性名称,仅对当前类起作用,对继承的子类是不起作 ...
- tcp/iP协议族——IP工作原理及实例具体解释(下)
IP协议具体解释 上一篇文章文章主要介绍了IP服务的特点,IPv4头部结构IP分片.并用tcpdump抓取数据包,来观察IP数据报传送过程中IP的格式,以及分片的过程.本文主要介绍IP路由,IP ...
- Android无线测试之—UiAutomator UiDevice API介绍三
获取坐标与坐标点击 一.坐标相关的知识: 1)手机屏幕坐标:左上角开始到右下角结束 2)DP:设备独立像素,例如320像素显示到640像素上要拉伸一倍 3)Point:代表一个点(x,y),左上角的坐 ...