python Hbase Thrift pycharm 及引入包

cp -r hbase/ /usr/lib/python2.7/site-packages/

官方示例子
http://code.google.com/p/hbase-thrift/source/browse/trunk/python/test/tables.py
http://yannramin.com/2008/07/19/using-facebook-thrift-with-python-and-hbase/
http://wiki.apache.org/hadoop/Hbase/ThriftApi

将生成的hbase目录copy到python的包下

cp -r hbase /usr/lib/python2./site-packages/

。启动hbase和thrift服务

./bin/start-hbase.sh

./bin/hbase-daemon.sh start thrift

好像需要源码，我反正没找到src目录，忘记了  。。。。。。 忘记当初自己怎么装的了。

# --*-- coding:utf-8 --*--

import sys

import time

# 所有thirft编程都需要的

from thrift import Thrift

from thrift.transport import TSocket, TTransport

from thrift.protocol import TBinaryProtocol

# Ｈbase的 客户端代码

from hbase import ttypes

from hbase.Hbase import Client, ColumnDescriptor, Mutation

# make socket 这里配置的是hbase zookeeper的地址，因为master只负责负载均衡，读写由zookeeper协调

transport = TSocket.TSocket('localhost', 9090)

# buffering is critical . raw sockets are very slow

transport = TTransport.TBufferedTransport(transport)

# wrap in a protocol

protocol = TBinaryProtocol.TBinaryProtocol(transport)

# create a client to use the protocol encoder

client = Client(protocol)

# connect

transport.open()

t = 'tab2'

# 扫描所有表获取所有表名称

print 'scanning tables ......'

for table in client.getTableNames():

    print 'found:%s' % table

    if client.isTableEnabled(table):

        print ' disabling table: %s' % t

        # 置为无效

        client.disableTable(table)

        print 'deleting table: %s' % t

        # 删除表

        client.deleteTable(table)

# 创建表

columns = []

col = ColumnDescriptor()

col.name = 'entry:'

col.maxVersions = 10

columns.append(col)

col = ColumnDescriptor()

col.name = 'unused:'

columns.append(col)

try:

    print 'creating table : % s' % t

    client.createTable(t, columns)

except Exception, ae:

    print 'Warn:' + ae.message

# 插入数据

invalid = 'foo-\xfc\xa1\xa1\xa1\xa1\xa1'

valid = 'foo-\xE7\x94\x9F\xE3\x83\x93\xE3\x83\xBC\xE3\x83\xAB'

# non-utf8 is fine for data

mutations = [Mutation(column='entry:foo', value=invalid)]

print str(mutations)

client.mutateRow(t, 'foo', mutations)  # foo is row key

# try empty strings

# cell value empty

mutations = [Mutation(column='entry:foo', value='')]

# rowkey empty

client.mutateRow(t, '', mutations)

#this row name is valid utf8

mutations = [Mutation(column='entry:foo', value=valid)]

client.mutateRow(t, valid, mutations)

# run a scanner on the rows we just created

# 全表扫描

print 'starting scanner...'

scanner = client.scannerOpen(t, '', ['entry:'])

r = client.scannerGet(scanner)

while r:

    #printRow(r[0])

    r = client.scannerGet(scanner)

print 'scanner finished '

# 范围扫描

columnNames = []

for (col, desc) in client.getColumnDescriptors(t).items():

    print 'column with name:', desc.name

    print desc

    columnNames.append(desc.name + ':')

print 'stating scanner...'

scanner = client.scannerOpenWithStop(t, '', '', columnNames)

r = client.scannerGet(scanner)

while r:

    # printRow(r[0])

    r = client.scannerGet(scanner)

client.scannerClose(scanner)

print 'scanner finished'

# 关闭socket

transport.close()



现在我们就可以用python来和hbase通信了

#-*-coding:utf- -*-

#!/usr/bin/python

from thrift import Thrift

from thrift.transport import TSocket

from thrift.transport import TTransport

from thrift.protocol import TBinaryProtocol

from hbase import Hbase

from hbase.ttypes import ColumnDescriptor,Mutation,BatchMutation

class HbaseWriter:

        """

                IP地址

                端口

                表名

        """

        def __init__(self,address,port,table='user'):

                self.tableName = table

                #建立与hbase的连接

                self.transport=TTransport.TBufferedTransport(TSocket.TSocket(address,port))

                self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)

                self.client=Hbase.Client(self.protocol)

                self.transport.open()

                tables = self.client.getTableNames()

                if self.tableName not in tables:

                        print "not in tables"

                        self.__createTable()

                self.write("hell,babay!!!")

                self.read()

        #关闭

        def __del__(self):

                self.transport.close()

        #建表

        def __createTable(self):

                col1 = ColumnDescriptor(name="person:",maxVersions=)

                col2 = ColumnDescriptor(name="contents:",maxVersions=)

                col3 = ColumnDescriptor(name="info:",maxVersions=)

                self.client.createTable(self.tableName,[col1,col2,col3])

        def write(self,content):

                row="abc"

                mutations=[Mutation(column="person:",value=content),Mutation(column="info:",value=content)]

                self.client.mutateRow(self.tableName,row,mutations)

        def read(self):

                scannerId = self.client.scannerOpen(self.tableName,"",["contents:",])

                while True:

                        try:

                                result = self.client.scannerGet(scannerId)

                        except:

                                break

                        contents = result.columns["contents:"].value

                        #print contents

                self.client.scannerClose(scannerId)

if __name__ == "__main__":

        client = HbaseWriter("192.168.239.135","","person")

我们看下使用thrift生成的代码中都提供了那些方法

提供的方法有：

void enableTable(Bytes tableName)

enable表

void disableTable(Bytes tableName)

disable表

bool isTableEnabled(Bytes tableName)

查看表状态

void compact(Bytes tableNameOrRegionName)

void majorCompact(Bytes tableNameOrRegionName)

getTableNames()

getColumnDescriptors(Text tableName)

getTableRegions(Text tableName)

void createTable(Text tableName, columnFamilies)

void deleteTable(Text tableName)

get(Text tableName, Text row, Text column)

getVer(Text tableName, Text row, Text column, i32 numVersions)

getVerTs(Text tableName, Text row, Text column, i64 timestamp, i32 numVersions)

getRow(Text tableName, Text row)

getRowWithColumns(Text tableName, Text row,  columns)

getRowTs(Text tableName, Text row, i64 timestamp)

getRowWithColumnsTs(Text tableName, Text row,  columns, i64 timestamp)

getRows(Text tableName,  rows)

getRowsWithColumns(Text tableName,  rows,  columns)

getRowsTs(Text tableName,  rows, i64 timestamp)

getRowsWithColumnsTs(Text tableName,  rows,  columns, i64 timestamp)

void mutateRow(Text tableName, Text row,  mutations)

void mutateRowTs(Text tableName, Text row,  mutations, i64 timestamp)

void mutateRows(Text tableName,  rowBatches)

void mutateRowsTs(Text tableName,  rowBatches, i64 timestamp)

i64 atomicIncrement(Text tableName, Text row, Text column, i64 value)

void deleteAll(Text tableName, Text row, Text column)

void deleteAllTs(Text tableName, Text row, Text column, i64 timestamp)

void deleteAllRow(Text tableName, Text row)

void deleteAllRowTs(Text tableName, Text row, i64 timestamp)

ScannerID scannerOpenWithScan(Text tableName, TScan scan)

ScannerID scannerOpen(Text tableName, Text startRow,  columns)

ScannerID scannerOpenWithStop(Text tableName, Text startRow, Text stopRow,  columns)

ScannerID scannerOpenWithPrefix(Text tableName, Text startAndPrefix,  columns)

ScannerID scannerOpenTs(Text tableName, Text startRow,  columns, i64 timestamp)

ScannerID scannerOpenWithStopTs(Text tableName, Text startRow, Text stopRow,  columns, i64 timestamp)

scannerGet(ScannerID id)

scannerGetList(ScannerID id, i32 nbRows)

void scannerClose(ScannerID id)

http://blog.csdn.net/poechant/article/details/6618264

http://mmicky.blog.163.com/blog/static/150290154201311801519681/ 按照这个配置python hbase开发环境

编程前切换到/usr/program/python/hbase 然后运行python

>>>from thrift.transport import TSocket
>>>from thrift.protocol import TBinaryProtocol
>>>from hbase import Hbase

都不报错，但是到pycharm报错，原因时python默认搜索当前目录。

到pycharm 需要把 /usr/program/python/hbase 添加到pycharm的path

操作步骤：File>>setting>>project interpreter>>python interpreter>>>paths>>>+ 把/usr/program/python/hbase 文件夹添加进去就好了。

__author__ = 'root'

from thrift.transport import TSocket

from thrift.protocol import TBinaryProtocol

from hbase import Hbase

transport = TSocket.TSocket("localhost", 9090)

protocol = TBinaryProtocol.TBinaryProtocol(transport)

client = Hbase.Client(protocol)

transport.open()

tabs = client.getTableNames()

print tabs

python Hbase Thrift pycharm 及引入包的更多相关文章

python中引入包的时候报错AttributeError: module 'sys' has no attribute 'setdefaultencoding'解决方法？
python中引入包的时候报错:import unittestimport smtplibimport timeimport osimport sysimp.reload(sys)sys.setdef ...
Python入门之PyCharm的快捷键与常用设置和扩展(Win系统)
1. PyCharm的快捷键 2 . PyCharm的常用设置和扩展 ---------------------------------------------------------------- ...
Python入门之PyCharm的快捷键与常用设置和扩展(Mac系统)
1. 快捷键 2 . PyCharm的常用设置和扩展 ------------------------------------------------------------------------- ...
（转载）Python 的 JPype 模块调用 Jar 包
Python 的 JPype 模块调用 Jar 包背景与需求最近学习并安装使用了HttpRunner框架去尝试做接口测试,并有后续在公司推广的打算. HttpRunner由Python开发,调用接 ...
PyCharm导入tensorflow包报错的问题
[注]PyCharm导入tensorflow包报错的问题若是你也遇到这个问题,说明你也没有理解tensorflow到底在哪里. 当安装了anaconda3.6后,在PyCharm中设置interpr ...
【Python】Java程序员学习Python（十）— 类、包和模块
我觉得学习到现在应该得掌握Python的OOP编程了,但是现在还没有应用到,先留一个坑. 一.类和对象说到类和对象其实就是在说面向对象编程,学完Java以后我觉得面向对象编程还是很不错的,首先封装了 ...
[Python开发工具] Pycharm之快捷键
[Python开发工具] Pycharm之快捷键 1 全局搜索: Ctrl+Shift+F,不过PyCharm的更强大, 你可以点选左侧某个目录后再按Ctrl+Shift+F, 这样默认会搜索改目录; ...
HBase & thrift & C++编程
目录目录 1 1. 前言 1 2. 启动和停止thrift2 1 2.1. 启动thrift2 1 2.2. 停止thrift2 1 2.3. 启动参数 2 3. hbase.thrift 2 3. ...
Golang&Python测试thrift
接上篇,安装好之后,就开始编写IDL生成然后测试. 一.生成运行参考 http://www.aboutyun.com/thread-8916-1-1.html 来个添加,查询. namespace ...

随机推荐

JS方法在iframe父子窗口间的调用
本文向大家简单介绍一下iframe父子窗口间JS方法调用,JavaScript 被数百万计的网页用来改进设计.验证表单.检测浏览器.创建cookies,以及更多的应用,希望本文介绍对你有所帮助. if ...
Qt on Android: Qt Quick 之 Hello World 图文具体解释
在上一篇文章,<Qt on Android:QML 语言基础>中,我们介绍了 QML 语言的语法,在最后我们遗留了一些问题没有展开,这篇呢,我们就正式開始撰写 Qt Quick 程序,而那 ...
thinkphp实现短信验证注册
前言注册时经常需要用到短信验证码,本文记录一下思路和具体实现. 短信验证平台使用云片,短信验证码的生成使用thinkphp. 思路 1.用户输入手机号,请求获取短信验证码. 2.thinkphp生成 ...
[转] json in javascript
JavaScript is a general purpose programming language that was introduced as the page scripting langu ...
PL/SQL 批量SQL
批量SQL包括: FORALL语句 BULK COLLECT子句 FORALL语句 FORALL具有如下结构: FORALL loop_counter IN bounds_clause [SAVE E ...
关于XML（一）。
关于XML 什么是XML? XML是可扩展标记语言.类似于HTML,XML的宗旨是旨在传输数据,而非显示数据.其标签没有预定义,您需要自行定义标签.XML具有自我描述性,是W3C的推荐标准. XML与 ...
HTML5 WebAudioAPI(三)--绘制频谱图
HTML <style> #canvas { background: black; } </style> <div class="container" ...
HTML5 <Audio>标签API整理(一)
简单实例: <audio id="myAudio"></audio> <script> var myAudio = document.getEl ...
JAVA HashMap详细介绍和示例
http://www.jb51.net/article/42769.htm 我们先对HashMap有个整体认识,然后再学习它的源码,最后再通过实例来学会使用HashMap. 第1部分 HashMa ...
Dictionary 总结
foreach (KeyValuePair<int, string> kvp in myDictionary) {...} Dictionary<string, string> ...

python Hbase Thrift pycharm 及引入包

python Hbase Thrift pycharm 及引入包的更多相关文章

随机推荐

热门专题