Hbase之缓存扫描加快读取速度

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.TableName;

import org.apache.hadoop.hbase.client.*;

import org.apache.hadoop.hbase.client.metrics.ScanMetrics;

import java.io.IOException;

/**

 * Created by similarface on 16/8/23.

 */

public class ScanDataUseCache {

    private static Table table=null;

    public static Table getTable() {

        if(table==null){

            try {

                Configuration configuration = HBaseConfiguration.create();

                Connection connection = ConnectionFactory.createConnection(configuration);

                //建立表的连接

                return connection.getTable(TableName.valueOf("testtable"));

            }catch (IOException e){

                return table;

            }

        }

        return table;

    }

    private static void scan(int caching,int batch,boolean small) {

        int count=0;

        //setCaching 设置的值为每次rpc的请求记录数，默认是1；cache大可以优化性能，但是太大了会花费很长的时间进行一次传输。

        //setBatch 设置每次取的column size；有些row特别大，所以需要分开传给client，就是一次传一个row的几个column。

        //setSmall 是否为小扫描

        //setScanMetricsEnabled 使用了集合

        Scan scan = new Scan().setCaching(caching).setBatch(batch).setSmall(small).setScanMetricsEnabled(true);

        ResultScanner scanner=null;

        try {

            scanner = getTable().getScanner(scan);

        }catch (IOException e){

            System.out.println(e);

        }

        if (scanner!=null){

            for (Result result:scanner){

                count++;

            }

        scanner.close();

        ScanMetrics metrics = scan.getScanMetrics();

        System.out.println("Caching: " + caching + ", Batch: " + batch + ", Small: " + small + ", Results: " + count + ", RPCs: " + metrics.countOfRPCcalls);

        }

        else {

            System.out.println("Error");

        }

    }

    public static void main(String[] args) throws IOException {

        // Caching: 1, Batch: 1, Small: false, Results: 9, RPCs: 12

        scan(1, 1, false);

        //Caching: 1, Batch: 0, Small: false, Results: 4, RPCs: 7

        scan(1, 0, false);

        // Caching: 1, Batch: 0, Small: true, Results: 4, RPCs: 0

        scan(1, 0, true);

        //Caching: 200, Batch: 1, Small: false, Results: 9, RPCs: 3

        scan(200, 1, false);

        //Caching: 200, Batch: 0, Small: false, Results: 4, RPCs: 3

        scan(200, 0, false);

        //Caching: 200, Batch: 0, Small: true, Results: 4, RPCs: 0

        scan(200, 0, true);

        // Caching: 2000, Batch: 100, Small: false, Results: 4, RPCs: 3

        scan(2000, 100, false);

        // Caching: 2, Batch: 100, Small: false, Results: 4, RPCs: 5

        scan(2, 100, false);

        // Caching: 2, Batch: 10, Small: false, Results: 4, RPCs: 5

        scan(2, 10, false);

        // Caching: 2, Batch: 10, Small: false, Results: 4, RPCs: 5

        scan(5, 100, false);

        // Caching: 5, Batch: 100, Small: false, Results: 4, RPCs: 3

        scan(5, 20, false);

        // Caching: 10, Batch: 10, Small: false, Results: 4, RPCs: 3

        scan(10, 10, false);

    }

}

/**

 Caching: 1, Batch: 0, Small: false, Results: 5, RPCs: 8

 Caching: 1, Batch: 0, Small: true, Results: 5, RPCs: 0

 Caching: 200, Batch: 1, Small: false, Results: 1009, RPCs: 8

 Caching: 200, Batch: 0, Small: false, Results: 5, RPCs: 3

 Caching: 200, Batch: 0, Small: true, Results: 5, RPCs: 0

 Caching: 2000, Batch: 100, Small: false, Results: 14, RPCs: 3

 Caching: 2, Batch: 100, Small: false, Results: 14, RPCs: 10

 Caching: 2, Batch: 10, Small: false, Results: 104, RPCs: 55

 Caching: 5, Batch: 100, Small: false, Results: 14, RPCs: 5

 Caching: 5, Batch: 20, Small: false, Results: 54, RPCs: 13

 Caching: 10, Batch: 10, Small: false, Results: 104, RPCs: 13

 **/

这是一个9行数据的表

每行包含一些列

使用缓存为6 批量为3的扫描器

需要3个RPC

3个列装入一个Result实例

6个result到缓存中组成一个RPC

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.TableName;

import org.apache.hadoop.hbase.client.*;

import org.apache.hadoop.hbase.client.metrics.ScanMetrics;

import java.io.IOException;

/**

 * Created by similarface on 16/8/24.

 */

public class ScanWithOffsetAndLimit {

    private static Table table = null;

    public static Table getTable() {

        if (table == null) {

            try {

                Configuration configuration = HBaseConfiguration.create();

                Connection connection = ConnectionFactory.createConnection(configuration);

                //建立表的连接

                return connection.getTable(TableName.valueOf("testtable"));

            } catch (IOException e) {

                return table;

            }

        }

        return table;

    }

    /**

     * 遍历访问数据

     * @param num 运行次序

     * @param caching

     * @param batch

     * @param offset

     * @param maxResults

     * @param maxResultSize

     * @param dump

     * @throws IOException

     */

    private static void scan(int num, int caching, int batch, int offset, int maxResults, int maxResultSize, boolean dump

    ) throws IOException {

        int count = 0;

        Scan scan = new Scan().setCaching(caching).setBatch(batch)

                .setRowOffsetPerColumnFamily(offset)

                .setMaxResultsPerColumnFamily(maxResults)

                .setMaxResultSize(maxResultSize)

                .setScanMetricsEnabled(true);

        ResultScanner scanner = getTable().getScanner(scan);

        System.out.println("Scan #" + num + " running...");

        for (Result result : scanner) {

            count++;

            if (dump)

                System.out.println("Result [" + count + "]:" + result);

        }

        scanner.close();

        ScanMetrics metrics = scan.getScanMetrics();

        System.out.println("Caching: " + caching + ", Batch: " + batch +

                ", Offset: " + offset + ", maxResults: " + maxResults +

                ", maxSize: " + maxResultSize + ", Results: " + count +

                ", RPCs: " + metrics.countOfRPCcalls);

    }

    public static void main(String[] args) throws IOException {

        //偏移为0 最大2个cell 所以会扫描到列1 和列2

        scan(1, 11, 0, 0, 2, -1, true);

        //偏移为4 最大2个cell 所以会扫描到列5 和列6

        scan(2, 11, 0, 4, 2, -1, true);

        //

        scan(3, 5, 0, 0, 2, -1, false);

        scan(4, 11, 2, 0, 5, -1, true);

        scan(5, 11, -1, -1, -1, 1, false);

        scan(6, 11, -1, -1, -1, 10000, false);

    }

}

/**

 Caching: 11, Batch: 0, Offset: 0, maxResults: 2, maxSize: -1, Results: 5005, RPCs: 458

 Caching: 11, Batch: 0, Offset: 4, maxResults: 2, maxSize: -1, Results: 1, RPCs: 3

 Caching: 5, Batch: 0, Offset: 0, maxResults: 2, maxSize: -1, Results: 5005, RPCs: 1004

 Caching: 11, Batch: 2, Offset: 0, maxResults: 5, maxSize: -1, Results: 5009, RPCs: 458

 Caching: 11, Batch: -1, Offset: -1, maxResults: -1, maxSize: 1, Results: 5005, RPCs: 11012

 Caching: 11, Batch: -1, Offset: -1, maxResults: -1, maxSize: 10000, Results: 5005, RPCs: 469

**/

Hbase之缓存扫描加快读取速度的更多相关文章

ASP.NET状缓存Cache的应用-提高数据库读取速度
原文:ASP.NET状缓存Cache的应用-提高数据库读取速度一. Cache概述既然缓存中的数据其实是来自数据库的,那么缓存中的数据如何和数据库进行同步呢?一般来说,缓存中应该存放改 ...
优化SQLServer数据库加快查询速度
查询速度慢的原因很多,常见如下几种: 1.没有索引或者没有用到索引(这是查询慢最常见的问题,是程序设计的缺陷) 2.I/O吞吐量小,形成了瓶颈效应. 3.没有创建计算列导致查询不优化. 4.内存不足 ...
使用Openresty加快网页速度
新年快乐~~~ 上一篇文章讲到使用多级缓存来减少数据库的访问来加快网页的速度,只是,仍旧没有"嗖"一下就加载出来的感觉,想再优化一下,优化代码什么的已经到了极限.上周无意中看到了o ...
mysql千万级数据库插入速度和读取速度的调整记录
一般情况下mysql上百万数据读取和插入更新是没什么问题了,但到了上千万级就会出现很慢,下面我们来看mysql千万级数据库插入速度和读取速度的调整记录吧. 1)提高数据库插入性能中心思想:尽量将数据一 ...
数据读取速度达1.5G/s，UFS 2.1存储技术曝光
目前最快的是苹果NVME,当然UFS2.1也不差 iPhone6s与iPhone6s Plus在硬件的规格上有了很大的提升,但是它们身上的变化远没有苹果在发布会上所提到的A9处理器.1200万摄像头以 ...
160304-01、mysql数据库插入速度和读取速度的调整记录
需求:由于项目变态,需要在一个比较短时间段急剧增加数据库记录(两三天内,由于0增加至5亿).在整个过程调优过程非常艰辛思路: (1)提高数据库插入性能中心思想:尽量将数据一次性写入到Data Fil ...
mysql千万级数据库插入速度和读取速度的调整
mysql上百万数据读取和插入更新一般没什么问题,但上千万后速度会很慢,如何调整配置,提高效率.如下: 1.尽量将数据一次性写入DataFile和减少数据库的checkpoint操作,调整如下参数: ...
Linux检测硬盘读取速度
1. 清空缓存 > /proc/sys/vm/drop_caches 2. 测试读取速度 a. 将/dev/zero中数据按1M的数据单位写入testfile,共写512个单位,并不通过缓存 c ...
Android开发之制作圆形头像自定义View,直接引用工具类，加快开发速度。带有源代码学习
作者:程序员小冰,CSDN博客:http://blog.csdn.net/qq_21376985 QQ986945193 博客园主页:http://www.cnblogs.com/mcxiaobing ...

随机推荐

Thinkphp3.2.3如何加载自定义函数库
方法一:将自定义函数库放在Common文件夹下的Common文件夹下,命名为function.php. 方法二:项目配置文件中定义LOAD_EXT_FILE参数.这个方法在3.1的开发手册中有. 参考 ...
Mac OS下应用Python+Selenium实现web自动化测试
在Mac环境下的自动化测试环境搭建这里有一篇亲测通过的文<mac下怎么搭建selenium python环境?>. 不过在这个过程中要注意两点: 1.在终端联网执行命令“sudo pip ...
浅谈Apache性能调优
做了很多WEB系统性能测试,都知道了解测试环境,服务器硬件配置,web服务器参数配置是我们开始测试前首先要做的事情. 针对并发数量来说,不同数量级的用户并发,需求的服务器和web服务参数肯定是不同的. ...
lua中特殊用法
th> a=torch.zeros(,) [.0001s] th> a [torch.DoubleTensor of size 1x5] [.0001s] th> a[{,floor ...
华东交通大学2016年ACM“双基”程序设计竞赛 1007
Problem Description ACM小学妹在今天的暑假训练结束后,想看球赛放松一下.当他打开电脑时查询到联盟今天直播N场球赛,每场球赛的起止时间(S1,E1),(S2,E2),...,(SN ...
WebForm分页浏览
1.封装类 //封装类 using System; using System.Collections.Generic; using System.Web; /// <summary> // ...
(4)Redis 资料
Redis是一种面向“键/值”对类型数据的分布式NoSQL数据库系统,特点是高性能,持久存储,适应高并发的应用场景. Redis Home http://redis.io/ MSOpenTech/re ...
uTenux——重新整理底层驱动库
重新整理底层驱动库 1. 整理chip.h 在chip.h文件中的07----13的宏定义设置位如下,这样我们就不用在工程配中定义sam3s4c这个宏了,为我们以后通用少了一件麻烦事. //#if d ...
FZU 2148 Moon Game
Moon Game Time Limit:1000MS Memory Limit:32768KB 64bit IO Format:%I64d & %I64u Submit St ...
FZU 2146
Easy Game Time Limit:1000MS Memory Limit:32768KB 64bit IO Format:%I64d & %I64u Submit St ...

Hbase之缓存扫描加快读取速度

Hbase之缓存扫描加快读取速度的更多相关文章

随机推荐

热门专题