Flink批处理与hbase的读写

source-hbase

父类

是模仿官方写的.

import org.apache.flink.api.common.io.LocatableInputSplitAssigner;

import org.apache.flink.api.common.io.RichInputFormat;

import org.apache.flink.api.common.io.statistics.BaseStatistics;

import org.apache.flink.api.java.utils.ParameterTool;

import org.apache.flink.configuration.Configuration;

import org.apache.flink.core.io.InputSplitAssigner;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.TableName;

import org.apache.hadoop.hbase.client.*;

import org.apache.hadoop.hbase.util.Bytes;

import org.apache.hadoop.hbase.util.Pair;

import org.slf4j.Logger;

import org.slf4j.LoggerFactory;

import java.io.IOException;

import java.util.ArrayList;

import java.util.List;

/**

 * @Auther WeiJiQian

 * @描述

 */

public abstract class SourceHBaseInputBase<T> extends RichInputFormat<T, MyTableInputSplit>{

    protected static final Logger LOG = LoggerFactory.getLogger(SourceHBaseInputBase.class);

    // helper variable to decide whether the input is exhausted or not

    protected boolean endReached = false;

    protected transient HTable table = null;

    protected transient Scan scan = null;

    protected transient Connection connection = null;

    /** HBase iterator wrapper. */

    protected ResultScanner resultScanner = null;

    protected byte[] currentRow;

    protected long scannedRows;

    protected ParameterTool parameterTool;

    protected abstract T mapResultToOutType(Result r);

    protected abstract void getScan();

    protected abstract TableName getTableName();

    protected void getTable() throws IOException {

        org.apache.hadoop.conf.Configuration configuration;

        parameterTool = PropertiesUtil.PARAMETER_TOOL;

        configuration = HBaseConfiguration.create();

        configuration.set(HBASE_ZOOKEEPER_QUORUM, parameterTool.get(HBASE_ZOOKEEPER_QUORUM));

        configuration.set(HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT, parameterTool.get(HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT));

        configuration.set(HBASE_RPC_TIMEOUT, parameterTool.get(HBASE_RPC_TIMEOUT));

        configuration.set(HBASE_CLIENT_OPERATION_TIMEOUT, parameterTool.get(HBASE_CLIENT_OPERATION_TIMEOUT));

        configuration.set(HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, parameterTool.get(HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD));

        connection = ConnectionFactory.createConnection(configuration);

        table = (HTable) connection.getTable(getTableName());

    }

    @SneakyThrows

    @Override

    public void configure(Configuration parameters) {

        getTable();

        getScan();

    }

    @Override

    public void open(MyTableInputSplit split) throws IOException {

        System.out.println("open:" + table == null);

        if (table == null) {

            System.out.println("open:table is null ---------");

            throw new IOException("The HBase table has not been opened! " +

                    "This needs to be done in configure().");

        }

        if (scan == null) {

            throw new IOException("Scan has not been initialized! " +

                    "This needs to be done in configure().");

        }

        if (split == null) {

            throw new IOException("Input split is null!");

        }

        logSplitInfo("opening", split);

        // set scan range

        currentRow = split.getStartRow();

        scan.setStartRow(currentRow);

        scan.setStopRow(split.getEndRow());

        resultScanner = table.getScanner(scan);

        endReached = false;

        scannedRows = 0;

    }

    public T nextRecord(T reuse) throws IOException {

        if (resultScanner == null) {

            throw new IOException("No table result scanner provided!");

        }

        Result res;

        try {

            res = resultScanner.next();

        } catch (Exception e) {

            resultScanner.close();

            //workaround for timeout on scan

            LOG.warn("Error after scan of " + scannedRows + " rows. Retry with a new scanner...", e);

            scan.withStartRow(currentRow, false);

            resultScanner = table.getScanner(scan);

            res = resultScanner.next();

        }

        if (res != null) {

            scannedRows++;

            currentRow = res.getRow();

            return mapResultToOutType(res);

        }

        endReached = true;

        return null;

    }

    private void logSplitInfo(String action, MyTableInputSplit split) {

        int splitId = split.getSplitNumber();

        String splitStart = Bytes.toString(split.getStartRow());

        String splitEnd = Bytes.toString(split.getEndRow());

        String splitStartKey = splitStart.isEmpty() ? "-" : splitStart;

        String splitStopKey = splitEnd.isEmpty() ? "-" : splitEnd;

        String[] hostnames = split.getHostnames();

        LOG.info("{} split (this={})[{}|{}|{}|{}]", action, this, splitId, hostnames, splitStartKey, splitStopKey);

    }

    @Override

    public boolean reachedEnd() throws IOException {

        return endReached;

    }

    @Override

    public void close() throws IOException {

        LOG.info("Closing split (scanned {} rows)", scannedRows);

        currentRow = null;

        try {

            if (resultScanner != null) {

                resultScanner.close();

            }

        } finally {

            resultScanner = null;

        }

    }

    @Override

    public void closeInputFormat() throws IOException {

        try {

            if (connection != null) {

                connection.close();

            }

        } finally {

            connection = null;

        }

        try {

            if (table != null) {

                table.close();

            }

        } finally {

            table = null;

        }

    }

    @Override

    public MyTableInputSplit[] createInputSplits(final int minNumSplits) throws IOException {

        if (table == null) {

            throw new IOException("The HBase table has not been opened! " +

                    "This needs to be done in configure().");

        }

        if (scan == null) {

            throw new IOException("Scan has not been initialized! " +

                    "This needs to be done in configure().");

        }

        // Get the starting and ending row keys for every region in the currently open table

        final Pair<byte[][], byte[][]> keys = table.getRegionLocator().getStartEndKeys();

        if (keys == null || keys.getFirst() == null || keys.getFirst().length == 0) {

            throw new IOException("Expecting at least one region.");

        }

        final byte[] startRow = scan.getStartRow();

        final byte[] stopRow = scan.getStopRow();

        final boolean scanWithNoLowerBound = startRow.length == 0;

        final boolean scanWithNoUpperBound = stopRow.length == 0;

        final List<MyTableInputSplit> splits = new ArrayList<MyTableInputSplit>(minNumSplits);

        for (int i = 0; i < keys.getFirst().length; i++) {

            final byte[] startKey = keys.getFirst()[i];

            final byte[] endKey = keys.getSecond()[i];

            final String regionLocation = table.getRegionLocator().getRegionLocation(startKey, false).getHostnamePort();

            // Test if the given region is to be included in the InputSplit while splitting the regions of a table

            if (!includeRegionInScan(startKey, endKey)) {

                continue;

            }

            // Find the region on which the given row is being served

            final String[] hosts = new String[]{regionLocation};

            // Determine if regions contains keys used by the scan

            boolean isLastRegion = endKey.length == 0;

            if ((scanWithNoLowerBound || isLastRegion || Bytes.compareTo(startRow, endKey) < 0) &&

                    (scanWithNoUpperBound || Bytes.compareTo(stopRow, startKey) > 0)) {

                final byte[] splitStart = scanWithNoLowerBound || Bytes.compareTo(startKey, startRow) >= 0 ? startKey : startRow;

                final byte[] splitStop = (scanWithNoUpperBound || Bytes.compareTo(endKey, stopRow) <= 0)

                        && !isLastRegion ? endKey : stopRow;

                int id = splits.size();

                final MyTableInputSplit split = new MyTableInputSplit(id, hosts, table.getName().getName(), splitStart, splitStop);

                splits.add(split);

            }

        }

        LOG.info("Created " + splits.size() + " splits");

        for (MyTableInputSplit split : splits) {

            logSplitInfo("created", split);

        }

        return splits.toArray(new MyTableInputSplit[splits.size()]);

    }

    /**

     * Test if the given region is to be included in the scan while splitting the regions of a table.

     *

     * @param startKey Start key of the region

     * @param endKey   End key of the region

     * @return true, if this region needs to be included as part of the input (default).

     */

    protected boolean includeRegionInScan(final byte[] startKey, final byte[] endKey) {

        return true;

    }

    @Override

    public InputSplitAssigner getInputSplitAssigner(MyTableInputSplit[] inputSplits) {

        return new LocatableInputSplitAssigner(inputSplits);

    }

    @Override

    public BaseStatistics getStatistics(BaseStatistics cachedStatistics) {

        return null;

    }

}

子类

import org.apache.flink.configuration.Configuration;

import org.apache.hadoop.hbase.TableName;

import org.apache.hadoop.hbase.client.Result;

import org.apache.hadoop.hbase.client.Scan;

import org.apache.hadoop.hbase.filter.BinaryPrefixComparator;

import org.apache.hadoop.hbase.filter.CompareFilter;

import org.apache.hadoop.hbase.filter.FilterList;

import org.apache.hadoop.hbase.filter.RowFilter;

import org.apache.hadoop.hbase.util.Bytes;

import javax.swing.*;

import java.util.List;

import static org.apache.hadoop.hbase.filter.FilterList.Operator.MUST_PASS_ONE;

/**

 * @author WeiJiQian

 * @param

 * @return

 */

public class SourceDaysHbase extends SourceHBaseInputBase<UsersBean> {

    public SourceDaysHbase(List<String> dates){

        this.dates = dates;

    }

    private List<String> dates;

    private UsersBean usersBean = new UsersBean();

    @Override

    public void configure(Configuration parameters) {

        super.configure(parameters);

    }

    @Override

    protected UsersBean mapResultToOutType(Result r) {

         usersBean.setPhone11(CustomizeUtils.getPhoneOfPersonaDataRowKey(Bytes.toString(r.getRow())));

         usersBean.setPhone8(CustomizeUtils.getPhone8(usersBean.getPhone11()));

         return usersBean;

    }

    @Override

    protected void getScan() {

        scan = new Scan();

        scan.addColumn(HBaseConstant.HBASE_PERSONA_FAMILY_MONTH_DAY, HBaseConstant.HBASE_PERSONA_ACTIVITE_DATE);

    }

    @Override

    protected TableName getTableName() {

        return TableName.valueOf(parameterTool.get(HBaseConstant.HBASE_TABLE_NAME_PERSONA_DATA));

    }

}

sink-hbase

import lombok.extern.slf4j.Slf4j;

import org.apache.flink.api.common.io.OutputFormat;

import org.apache.flink.api.java.utils.ParameterTool;

import org.apache.flink.configuration.Configuration;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.TableName;

import org.apache.hadoop.hbase.client.*;

import org.apache.hadoop.hbase.util.Bytes;

import org.mortbay.util.MultiPartWriter;

import java.io.IOException;

import static com.hecaiyun.common.bean.HBaseConstant.*;

/**

 * @Auther WeiJiQian

 * @描述

 */

@Slf4j

public abstract class HBaseOutputFormatBase<T> implements OutputFormat<T> {

    protected final String valueString = "1";

    protected String date ;

    protected Table table ;

    protected Connection connection;

    protected BufferedMutatorParams params;

    protected BufferedMutator mutator;

    protected org.apache.hadoop.conf.Configuration configuration;

    protected ParameterTool parameterTool;

    public abstract TableName getTableName();

    public void configure(Configuration parameters) {

        parameterTool = PropertiesUtil.PARAMETER_TOOL;

        configuration = HBaseConfiguration.create();

        configuration.set(HBASE_ZOOKEEPER_QUORUM, parameterTool.get(HBASE_ZOOKEEPER_QUORUM));

        configuration.set(HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT, parameterTool.get(HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT));

        configuration.set(HBASE_RPC_TIMEOUT, parameterTool.get(HBASE_RPC_TIMEOUT));

        configuration.set(HBASE_CLIENT_OPERATION_TIMEOUT, parameterTool.get(HBASE_CLIENT_OPERATION_TIMEOUT));

        configuration.set(HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, parameterTool.get(HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD));

    }

    public void open(int taskNumber, int numTasks) throws IOException {

        connection =  ConnectionFactory.createConnection(configuration);

        table = connection.getTable(getTableName());

        params = new BufferedMutatorParams(table.getName());

        //设置缓存的大小 100M

        params.writeBufferSize(parameterTool.getLong(HBASE_WRITEBUFFER_SIZE));

        mutator = connection.getBufferedMutator(params);

    }

    /*

     * @author WeiJiQian

     * @param rowKey

     * @param family

     * @param colum

     * @param value

     * @return org.apache.hadoop.hbase.client.Put

     * 描述  覆盖数据

     */

    public void putData(String rowKey,byte[] family, byte[] colum,String value ) throws IOException {

        Put put = new Put(Bytes.toBytes(rowKey));

        put.addColumn(family,colum,Bytes.toBytes(value));

        put.setDurability(Durability.SKIP_WAL);

        mutator.mutate(put);

    }

    public void close() throws IOException {

        if (mutator != null){

            mutator.flush();

            mutator.close();

        }

        if (table != null){

            table.close();

        }

       if (connection != null){

           connection.close();

       }

    }

}

Flink连接器-批处理-读写Hbase的更多相关文章

Spark读写Hbase的二种方式对比
作者:Syn良子出处:http://www.cnblogs.com/cssdongl 转载请注明出处一.传统方式这种方式就是常用的TableInputFormat和TableOutputForm ...
【原创】大叔经验分享（25）hive通过外部表读写hbase数据
在hive中创建外部表: CREATE EXTERNAL TABLE hive_hbase_table(key string, name string,desc string) STORED BY ' ...
Spark读写HBase
Spark读写HBase示例 1.HBase shell查看表结构 hbase(main)::> desc 'SDAS_Person' Table SDAS_Person is ENABLED ...
Flink批处理读写Hive
import org.apache.flink.table.api.*; import org.apache.flink.table.catalog.hive.HiveCatalog; /** * @ ...
flink连接器-流处理-读写redis
写入redis resultStream.addSink(new RedisSink(FlinkUtils.getRedisSinkConfig(parameters),new MyRedisMapp ...
Spark读写Hbase中的数据
def main(args: Array[String]) { val sparkConf = new SparkConf().setMaster("local").setAppN ...
Spark实战之读写HBase
1 配置 1.1 开发环境: HBase:hbase-1.0.0-cdh5.4.5.tar.gz Hadoop:hadoop-2.6.0-cdh5.4.5.tar.gz ZooKeeper:zooke ...
spark读写hbase性能对比
一.spark写入hbase hbase client以put方式封装数据,并支持逐条或批量插入.spark中内置saveAsHadoopDataset和saveAsNewAPIHadoopDatas ...
Spark学习笔记——读写Hbase
1.首先在Hbase中建立一张表,名字为student 参考 Hbase学习笔记——基本CRUD操作一个cell的值,取决于Row,Column family,Column Qualifier和Ti ...

随机推荐

深度分析：Java 静态方法/变量，非静态方法/变量的区别，今天一并帮你解决！
静态/非静态方法/变量的写法大家应该都明白静态方法/字段比普通方法/字段的写法要多一个static关键字,简单写下他们的写法吧,了解的可以直接略过 class Test{ // 静态变量 publ ...
数学分析理论（rudin版）笔记：实数系和复数系.1
导引有理数集是"稀疏的"和"稠密的". 选择公理考虑以下问题:容易找到两个无理数 a, b 使 a + b 为有理数,或者使 ab 为有理数,但是能否使得 ...
基于Vue、Springboot网站实现第三方登录之QQ登录，以及邮件发送
基于Vue.Springboot实现第三方登录之QQ登录前言一.前提(准备) 二.QQ登录实现 1.前端 2.后端 1.application.yml 和工具类QQHttpClient 2.QQL ...
python应用（3）：启用集成开发工具pycharm
之前写了个python程序给自己用,写代码时用的是macvim(vim的一种),macvim是个编辑工具,由于我已经设置过对python等各种语言的支持特性,所以什么缩进.对齐.高亮之类的表现都有,写 ...
C++高级程序员进阶之路
一.自学成为高级程序员推荐看的书: 1.c语言基础 <c primer Plus>.<c和指针>.<C专家编程> 2.C++语言基础 <C++ Primer& ...
用FL Studio制作反向人声音效（Vocal Chops）
人声切片在各类电子音乐中都被广泛运用,在FL Studio20中我们也可以运用其自带的插件来制作属于我们自己的人声切片效果.在学完这篇文章后你就可以动手做出如Kygo.Martin Garrix等大牌 ...
Hibernate的配置跟简单创建一个表并插入一条数据
首先官网下载一个hibernate的架包,由于时间关系,博主已经分享到了百度网盘:https://pan.baidu.com/s/1Mw0Og3EKnouf84Njz9UicQ,提取码lens Hib ...
关于transition动画效果中，滚动条会闪一下就消失的问题
具体问题说明: 我在通过transition来改变width的长度,在transition变化过程中,底下的滚动条会闪烁一下. 问题原理:因为是里面容器没办法完全被装下,并且容器的宽度被限制住了. 解 ...
C#（二）基础篇—操作符
2020-12-02 本随笔为个人复习巩固知识用,多从书上总结与理解得来,如有错误麻烦指正 1.数学操作符 int a=2,b=3,c=0; float d=0; c=a+b; //c=5 c++; ...
《高并发下的.NET》第2季 - 故障公告：高并发下全线崩溃
大家好,非常抱歉,在昨天下午(12月3日)的访问高峰,园子迎来更高的并发,在这样的高并发下,突发的数据库连接故障造成博客站点全线崩溃,由此给您带来很大的麻烦,请您谅解. 最近,我们一边在忙于AWS合作 ...

Flink连接器-批处理-读写Hbase

source-hbase

父类

子类

sink-hbase

Flink连接器-批处理-读写Hbase的更多相关文章

随机推荐

热门专题