通过Python操作hbase api
# coding=utf-8
# Author: ruin
"""
discrible: """
from thrift.transport import TSocket
from thrift.protocol import TBinaryProtocol
from thrift.transport import TTransport
from hbase import Hbase import struct # Method for encoding ints with Thrift's string encoding
def encode(n):
return struct.pack("i", n) # Method for decoding ints with Thrift's string encoding
def decode(s):
return int(s) if s.isdigit() else struct.unpack('i', s)[0]
class HBaseApi(object): def __init__(self,table='fr_test_hbase:test_api',host='10.2.46.240',port=9090):
self.table = table.encode('utf-8')
self.host = host
self.port = port
# Connect to HBase Thrift server
self.transport = TTransport.TBufferedTransport(TSocket.TSocket(host, port))
self.protocol = TBinaryProtocol.TBinaryProtocolAccelerated(self.transport) # Create and open the client connection
self.client = Hbase.Client(self.protocol)
self.transport.open()
# set type and field of column families
self.set_column_families([bytes],['info'])
self._build_column_families() def set_column_families(self,type_list,col_list=['info']):
self.columnFamiliesType = type_list self.columnFamilies = col_list def _build_column_families(self):
"""
give all column families name list,create a table
:return:
"""
tables = self.client.getTableNames()
if self.table not in tables:
self.__create_table(self.table) def __create_table(self,table):
"""
create table in hbase with column families
:param table: fr_test_hbase:fr_test
:return:
""" columnFamilies = []
for columnFamily in self.columnFamilies:
name = Hbase.ColumnDescriptor(name = columnFamily)
columnFamilies.append(name)
table = table.encode('utf-8')
print(type(table),type(columnFamilies)) self.client.createTable(table,columnFamilies) def __del__(self):
self.transport.close() def __del_table(self,table):
"""
delete a table,first need to disable it
"""
self.client.disableTable(table)
self.client.deleteTable(table) def getColumnDescriptors(self):
return self.client.getColumnDescriptors(self.table) def put(self, rowKey, qualifier, value):
"""
put one row
column is column name,value is column value
:param rowKey: rowKey
:param column: column name
:param value: column value
:description: HbaseApi(table).put('rowKey','column','value')
""" rowKey = rowKey.encode('utf-8')
mutations = []
# for j, column in enumerate(column):
if isinstance(value, str):
value = value.encode('utf-8')
m_name = Hbase.Mutation(column=(self.columnFamilies[0]+':'+qualifier).encode('utf-8'), value=value)
elif isinstance(value, int):
m_name = Hbase.Mutation(column=(self.columnFamilies[0]+':'+qualifier).encode('utf-8'), value=encode(value))
mutations.append(m_name)
self.client.mutateRow(self.table, rowKey, mutations, {}) def puts(self,rowKeys,qualifier,values):
""" put sevel rows, `qualifier` is autoincrement :param rowKeys: a single rowKey
:param values: values is a 2-dimension list, one piece element is [name, sex, age]
:param qualifier: column family qualifier Usage:: >>> HBaseTest('table').puts(rowKeys=[1,2,3],qualifier="name",values=[1,2,3]) """ mutationsBatch = []
if not isinstance(rowKeys,list):
rowKeys = [rowKeys] * len(values) for i, value in enumerate(values):
mutations = []
# for j, column in enumerate(value):
if isinstance(value, str):
value = value.encode('utf-8')
m_name = Hbase.Mutation(column=(self.columnFamilies[0]+':'+qualifier).encode('utf-8'), value=value)
elif isinstance(value, int):
m_name = Hbase.Mutation(column=(self.columnFamilies[0]+':'+qualifier).encode('utf-8'), value=encode(value))
mutations.append(m_name)
mutationsBatch.append(Hbase.BatchMutation(row = rowKeys[i].encode('utf-8'),mutations=mutations))
self.client.mutateRows(self.table, mutationsBatch, {}) def getRow(self,row, qualifier='name'):
"""
get one row from hbase table
:param row:
:param qualifier:
:return:
"""
# res = []
row = self.client.getRow(self.table, row.encode('utf-8'),{})
for r in row:
rd = {}
row = r.row.decode('utf-8')
value = (r.columns[b'info:name'].value).decode('utf-8')
rd[row] = value
# res.append(rd)
# print ('the row is ',r.row.decode('utf-8'))
# print ('the value is ',(r.columns[b'info:name'].value).decode('utf-8'))
return rd def getRows(self, rows, qualifier='name'):
"""
get rows from hbase,all the row sqecify the same 'qualifier'
:param rows: a list of row key
:param qualifier: column
:return: None
"""
# grow = True if len(rows) == 1 else False
res = []
for r in rows:
res.append(self.getRow(r,qualifier))
return res def scanner(self, numRows=100, startRow=None, stopRow=None):
""" :param numRows:
:param startRow:
:param stopRow:
:return:
"""
scan = Hbase.TScan(startRow, stopRow)
scannerId = self.client.scannerOpenWithScan(self.table,scan, {}) ret = []
rowList = self.client.scannerGetList(scannerId, numRows) for r in rowList:
rd = {}
row = r.row.decode('utf-8')
value = (r.columns[b'info:name'].value).decode('utf-8')
rd[row] = value
# print ('the row is ',r.row.decode('utf-8'))
# print ('the value is ',(r.columns[b'info:name'].value).decode('utf-8'))
ret.append(rd) return ret def demo():
ha = HBaseApi('fr_test_hbase:test_log1')
# ha.put('0002','age','23')
rowKeys = [str(key) for key in range(10001,10010)]
values = ['fr'+str(val) for val in range(10001,10010)]
ha.puts(rowKeys,'name',values)
print(ha.scanner())
# print(ha.getRow('0001'))
# print(ha.getRows(rowKeys))
if __name__ == "__main__":
demo()
通过Python操作hbase api的更多相关文章
- python 操作 hbase
python 是万能的,当然也可以通过api去操作big database 的hbase了,python是通过thrift去访问操作hbase 以下是在centos7 上安装操作,前提是hbase已经 ...
- 【Hbase三】Java,python操作Hbase
Java,python操作Hbase 操作Hbase python操作Hbase 安装Thrift之前所需准备 安装Thrift 产生针对Python的Hbase的API 启动Thrift服务 执行p ...
- 使用IDEA操作Hbase API 报错:org.apache.hadoop.hbase.client.RetriesExhaustedException的解决方法:
使用IDEA操作Hbase API 报错:org.apache.hadoop.hbase.client.RetriesExhaustedException的解决方法: 1.错误详情: Excepti ...
- Hbase理论&&hbase shell&&python操作hbase&&python通过mapreduce操作hbase
一.Hbase搭建: 二.理论知识介绍: 1Hbase介绍: Hbase是分布式.面向列的开源数据库(其实准确的说是面向列族).HDFS为Hbase提供可靠的底层数据存储服务,MapReduce为Hb ...
- Python操作HBase之happybase
安装Thrift 安装Thrift的具体操作,请点击链接 pip install thrift 安装happybase pip install happybase 连接(happybase.Conne ...
- python操作Hbase
本地操作 启动thrift服务:./bin/hbase-daemon.sh start thrift hbase模块产生: 下载thrfit源码包:thrift-0.8.0.tar.gz 解压安装 . ...
- python 操作Hbase 详解
博文参考:https://www.cnblogs.com/tashanzhishi/p/10917956.html 如果你们学习过Python,可以用Python来对Hbase进行操作. happyb ...
- python操作ansible api示例
#!/usr/bin/env python # -*- coding:utf-8 -*- import json import shutil from collections import named ...
- Python 操作 GA API 指南
因为需要写一个 Blog Feature 的缘故,所以接触了下 GA 的 Python API,发现 G 家的 API 不是那么直观,比较绕,但是,在使用过程中发现其实 G 家的 API 设计挺有意思 ...
随机推荐
- android自定义View&&简单布局&&回调方法
一.内容描述 根据“慕课网”上的教程,实现一个自定义的View,且该View中使用自定义的属性,同时为该自定义的View定义点击事件的回调方法. 二.定义自定义的属性 在res/valus/ 文件夹下 ...
- 构建基于Javascript的移动web CMS入门——简单介绍
看到项目上的移动框架,网上寻找了一下,发现原来这些一開始都有. 于是,找了个演示样例開始构建一个移动平台的CMS--墨颀 CMS,方便项目深入理解的同一时候.也能够自己维护一个CMS系统. 构建框架 ...
- windows操作系统记事本保存操作时间、字符映射表的打开、步骤记录器使用
记事本自动记录修改时间 你有用记事本记账或写日记的习惯吗?其实在记事本的文档开头输入".LOG"(无引号,字母为大写),之后记录内容并保存,这样以后打开就会看到之前每次修改的时间了 ...
- wget 命令
wget是在Linux下开发的开放源代码的软件,作者是Hrvoje Niksic,后来被移植到包括Windows在内的各个平台上.它有以下功能和特点:(1)支持断点下传功能:这一点,也是网络蚂蚁和Fl ...
- 125. Valid Palindrome【easy】
125. Valid Palindrome[easy] Given a string, determine if it is a palindrome, considering only alphan ...
- win7安装RabbitMQ
1.下载并安装erlang http://www.erlang.org/downloads 2.下载并安装RabbitMQ http://www.rabbitmq.com/install-window ...
- hive分桶 与保存数据的方式
创建分桶的表 create table t_buck(id int ,name string) clustered by (id ) sorted by (id) into 4 buckets ; ...
- libpcap丢包原理分析及Fedora 9 内核2.6.25.14下安装PF-RING的详细过程
看到网上有人讲解fedora 9下安装PF-RING的过程,都是几年前的了,比较老了,我安装PF-RING就是为了使用libpcap库,libpcap的原理是通过socket 将数据包从网卡 捕获数据 ...
- UVa 12563 劲歌金曲 刘汝佳第二版例题9-5;
Problem J Jin Ge Jin Qu [h]ao (If you smiled when you see the title, this problem is for you ^_^) Fo ...
- NDK 安装小结
建议先看后面的错误,可以先执行那些命令以免出现相关问题. chmod a+x android-ndk-r10c-linux-x86_64.bin ./android-ndk-r10c-linux-x8 ...