HBase源码实战：CreateRandomStoreFile

/*

 *

 * Licensed to the Apache Software Foundation (ASF) under one

 * or more contributor license agreements.  See the NOTICE file

 * distributed with this work for additional information

 * regarding copyright ownership.  The ASF licenses this file

 * to you under the Apache License, Version 2.0 (the

 * "License"); you may not use this file except in compliance

 * with the License.  You may obtain a copy of the License at

 *

 *     http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */

package org.apache.hadoop.hbase.regionserver;

import java.io.IOException;

import java.util.Arrays;

import java.util.Random;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.HConstants;

import org.apache.hadoop.hbase.KeyValue;

import org.apache.hadoop.hbase.io.compress.Compression;

import org.apache.hadoop.hbase.io.hfile.CacheConfig;

import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex;

import org.apache.hadoop.hbase.io.hfile.HFileContext;

import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;

import org.apache.hadoop.hbase.util.BloomFilterFactory;

import org.apache.hadoop.io.BytesWritable;

import org.slf4j.Logger;

import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;

import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser;

import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter;

import org.apache.hbase.thirdparty.org.apache.commons.cli.Options;

import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException;

import org.apache.hbase.thirdparty.org.apache.commons.cli.PosixParser;

/**

 * Creates an HFile with random key/value pairs.

 */

public class CreateRandomStoreFile {

  /**

   * As much as this number of bytes can be added or subtracted from key/value

   * lengths.

   */

  private static final int LEN_VARIATION = 5;

  private static final Logger LOG =

      LoggerFactory.getLogger(CreateRandomStoreFile.class);

  private static final String OUTPUT_DIR_OPTION = "o";

  private static final String NUM_KV_OPTION = "n";

  private static final String HFILE_VERSION_OPTION = "h";

  private static final String KEY_SIZE_OPTION = "k";

  private static final String VALUE_SIZE_OPTION = "v";

  private static final String COMPRESSION_OPTION = "c";

  private static final String BLOOM_FILTER_OPTION = "bf";

  private static final String BLOCK_SIZE_OPTION = "bs";

  private static final String BLOOM_BLOCK_SIZE_OPTION = "bfbs";

  private static final String INDEX_BLOCK_SIZE_OPTION = "ibs";

  /** The exit code this command-line tool returns on failure */

  private static final int EXIT_FAILURE = 1;

  /** The number of valid key types in a store file */

  private static final int NUM_VALID_KEY_TYPES =

      KeyValue.Type.values().length - 2;

  private Options options = new Options();

  private int keyPrefixLen, keyLen, rowLen, cfLen, valueLen;

  private Random rand;

  /**

   * Runs the tools.

   *

   * @param args command-line arguments

   * @return true in case of success

   * @throws IOException

   */

  public boolean run(String[] args) throws IOException {

    options.addOption(OUTPUT_DIR_OPTION, "output_dir", true,

        "Output directory");

    options.addOption(NUM_KV_OPTION, "num_kv", true,

        "Number of key/value pairs");

    options.addOption(KEY_SIZE_OPTION, "key_size", true, "Average key size");

    options.addOption(VALUE_SIZE_OPTION, "value_size", true,

        "Average value size");

    options.addOption(HFILE_VERSION_OPTION, "hfile_version", true,

        "HFile version to create");

    options.addOption(COMPRESSION_OPTION, "compression", true,

        " Compression type, one of "

            + Arrays.toString(Compression.Algorithm.values()));

    options.addOption(BLOOM_FILTER_OPTION, "bloom_filter", true,

        "Bloom filter type, one of "

            + Arrays.toString(BloomType.values()));

    options.addOption(BLOCK_SIZE_OPTION, "block_size", true,

        "HFile block size");

    options.addOption(BLOOM_BLOCK_SIZE_OPTION, "bloom_block_size", true,

        "Compound Bloom filters block size");

    options.addOption(INDEX_BLOCK_SIZE_OPTION, "index_block_size", true,

        "Index block size");

    if (args.length == 0) {

      HelpFormatter formatter = new HelpFormatter();

      formatter.printHelp(CreateRandomStoreFile.class.getSimpleName(), options,

          true);

      return false;

    }

    CommandLineParser parser = new PosixParser();

    CommandLine cmdLine;

    try {

      cmdLine = parser.parse(options, args);

    } catch (ParseException ex) {

      LOG.error(ex.toString(), ex);

      return false;

    }

    if (!cmdLine.hasOption(OUTPUT_DIR_OPTION)) {

      LOG.error("Output directory is not specified");

      return false;

    }

    if (!cmdLine.hasOption(NUM_KV_OPTION)) {

      LOG.error("The number of keys/values not specified");

      return false;

    }

    if (!cmdLine.hasOption(KEY_SIZE_OPTION)) {

      LOG.error("Key size is not specified");

      return false;

    }

    if (!cmdLine.hasOption(VALUE_SIZE_OPTION)) {

      LOG.error("Value size not specified");

      return false;

    }

    Configuration conf = HBaseConfiguration.create();

    Path outputDir = new Path(cmdLine.getOptionValue(OUTPUT_DIR_OPTION));

    long numKV = Long.parseLong(cmdLine.getOptionValue(NUM_KV_OPTION));

    configureKeyValue(numKV,

        Integer.parseInt(cmdLine.getOptionValue(KEY_SIZE_OPTION)),

        Integer.parseInt(cmdLine.getOptionValue(VALUE_SIZE_OPTION)));

    FileSystem fs = FileSystem.get(conf);

    Compression.Algorithm compr = Compression.Algorithm.NONE;

    if (cmdLine.hasOption(COMPRESSION_OPTION)) {

      compr = Compression.Algorithm.valueOf(

          cmdLine.getOptionValue(COMPRESSION_OPTION));

    }

    BloomType bloomType = BloomType.NONE;

    if (cmdLine.hasOption(BLOOM_FILTER_OPTION)) {

      bloomType = BloomType.valueOf(cmdLine.getOptionValue(

          BLOOM_FILTER_OPTION));

    }

    int blockSize = HConstants.DEFAULT_BLOCKSIZE;

    if (cmdLine.hasOption(BLOCK_SIZE_OPTION))

      blockSize = Integer.valueOf(cmdLine.getOptionValue(BLOCK_SIZE_OPTION));

    if (cmdLine.hasOption(BLOOM_BLOCK_SIZE_OPTION)) {

      conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE,

          Integer.valueOf(cmdLine.getOptionValue(BLOOM_BLOCK_SIZE_OPTION)));

    }

    if (cmdLine.hasOption(INDEX_BLOCK_SIZE_OPTION)) {

      conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY,

          Integer.valueOf(cmdLine.getOptionValue(INDEX_BLOCK_SIZE_OPTION)));

    }

    HFileContext meta = new HFileContextBuilder().withCompression(compr)

                        .withBlockSize(blockSize).build();

    StoreFileWriter sfw = new StoreFileWriter.Builder(conf,

        new CacheConfig(conf), fs)

            .withOutputDir(outputDir)

            .withBloomType(bloomType)

            .withMaxKeyCount(numKV)

            .withFileContext(meta)

            .build();

    rand = new Random();

    LOG.info("Writing " + numKV + " key/value pairs");

    for (long i = 0; i < numKV; ++i) {

      sfw.append(generateKeyValue(i));

    }

    int numMetaBlocks = rand.nextInt(10) + 1;

    LOG.info("Writing " + numMetaBlocks + " meta blocks");

    for (int metaI = 0; metaI < numMetaBlocks; ++metaI) {

      sfw.getHFileWriter().appendMetaBlock(generateString(),

          new BytesWritable(generateValue()));

    }

    sfw.close();

    Path storeFilePath = sfw.getPath();

    long fileSize = fs.getFileStatus(storeFilePath).getLen();

    LOG.info("Created {}, {} bytes, compression={}", storeFilePath, fileSize, compr.toString());

    return true;

  }

  private void configureKeyValue(long numKV, int keyLen, int valueLen) {

    numKV = Math.abs(numKV);

    keyLen = Math.abs(keyLen);

    keyPrefixLen = 0;

    while (numKV != 0) {

      numKV >>>= 8;

      ++keyPrefixLen;

    }

    this.keyLen = Math.max(keyPrefixLen, keyLen);

    this.valueLen = valueLen;

    // Arbitrarily split the key into row, column family, and qualifier.

    rowLen = keyPrefixLen / 3;

    cfLen = keyPrefixLen / 4;

  }

  private int nextInRange(int range) {

    return rand.nextInt(2 * range + 1) - range;

  }

  public KeyValue generateKeyValue(long i) {

    byte[] k = generateKey(i);

    byte[] v = generateValue();

    return new KeyValue(

        k, 0, rowLen,

        k, rowLen, cfLen,

        k, rowLen + cfLen, k.length - rowLen - cfLen,

        rand.nextLong(),

        generateKeyType(rand),

        v, 0, v.length);

  }

  public static KeyValue.Type generateKeyType(Random rand) {

    if (rand.nextBoolean()) {

      // Let's make half of KVs puts.

      return KeyValue.Type.Put;

    } else {

      KeyValue.Type keyType =

          KeyValue.Type.values()[1 + rand.nextInt(NUM_VALID_KEY_TYPES)];

      if (keyType == KeyValue.Type.Minimum || keyType == KeyValue.Type.Maximum)

      {

        throw new RuntimeException("Generated an invalid key type: " + keyType

            + ". " + "Probably the layout of KeyValue.Type has changed.");

      }

      return keyType;

    }

  }

  private String generateString() {

    StringBuilder sb = new StringBuilder();

    for (int i = 0; i < rand.nextInt(10); ++i) {

      sb.append((char) ('A' + rand.nextInt(26)));

    }

    return sb.toString();

  }

  private byte[] generateKey(long i) {

    byte[] k = new byte[Math.max(keyPrefixLen, keyLen

        + nextInRange(LEN_VARIATION))];

    for (int pos = keyPrefixLen - 1; pos >= 0; --pos) {

      k[pos] = (byte) (i & 0xFF);

      i >>>= 8;

    }

    for (int pos = keyPrefixLen; pos < k.length; ++pos) {

      k[pos] = (byte) rand.nextInt(256);

    }

    return k;

  }

  private byte[] generateValue() {

    byte[] v = new byte[Math.max(1, valueLen + nextInRange(LEN_VARIATION))];

    for (int i = 0; i < v.length; ++i) {

      v[i] = (byte) rand.nextInt(256);

    }

    return v;

  }

  public static void main(String[] args) {

    CreateRandomStoreFile app = new CreateRandomStoreFile();

    try {

      if (!app.run(args))

        System.exit(EXIT_FAILURE);

    } catch (IOException ex) {

      LOG.error(ex.toString(), ex);

      System.exit(EXIT_FAILURE);

    }

  }

}

HBase源码实战：CreateRandomStoreFile的更多相关文章

HBase源码实战：BufferedMutator
/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agr ...
HBase源码实战：ImportTsv
/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agr ...
Hbase源码分析：Hbase UI中Requests Per Second的具体含义
Hbase源码分析:Hbase UI中Requests Per Second的具体含义让运维加监控,被问到Requests Per Second(见下图)的具体含义是什么?我一时竟回答不上来,虽然大 ...
hbase源码系列（十二）Get、Scan在服务端是如何处理？
继上一篇讲了Put和Delete之后,这一篇我们讲Get和Scan, 因为我发现这两个操作几乎是一样的过程,就像之前的Put和Delete一样,上一篇我本来只打算写Put的,结果发现Delete也可以 ...
hbase源码带注释版本，放在这里，方便大家下载吧
看了5个月的hbase源码,记录了一些笔记,如果有需要的朋友可以拿去. 里面总共包括几个主要的工程吧:hbase-common,hbase-client,hbase-prefix-tree,hbase ...
HBase源码学习系列
转自:http://www.cnblogs.com/cenyuhai/tag/hbase%E6%BA%90%E7%A0%81%E7%B3%BB%E5%88%97/ (mark) hbase源码系列(十 ...
Hbase源码分析：RPC概况
RPC是hbase中Master,RegionServer和Client三者之间通信交流的纽带.了解hbase的rpc机制能够为通过源码学习hbase奠定良好的基础.因为了解了hbase的rpc机制能 ...
11 hbase源码系列（十一）Put、Delete在服务端是如何处理
hbase源码系列(十一)Put.Delete在服务端是如何处理? 在讲完之后HFile和HLog之后,今天我想分享是Put在Region Server经历些了什么?相信前面看了<HTab ...
hbase源码系列（十二）Get、Scan在服务端是如何处理
hbase源码系列(十二)Get.Scan在服务端是如何处理? 继上一篇讲了Put和Delete之后,这一篇我们讲Get和Scan, 因为我发现这两个操作几乎是一样的过程,就像之前的Put和Del ...

随机推荐

设计模式的征途—4.抽象工厂（Abstract Factory）模式
上一篇的工厂方法模式引入了工厂等级结构,解决了在原来简单工厂模式中工厂类职责太重的原则,但是由于工厂方法模式的每个工厂只生产一类产品,可能会导致系统中存在大量的工厂类,从而增加系统开销.那么,我们应该 ...
Chapter 5 Blood Type——7
"You say that a lot," I noted, trying to ignore the sudden trembling in my stomach and kee ...
[ Java面试题 ]并发篇
1.Java内存模型是什么? Java内存模型规定和指引Java程序在不同的内存架构.CPU和操作系统间有确定性地行为.它在多线程的情况下尤其重要.Java内存模型对一个线程所做的变动能被其它线程可见 ...
Perl IO：随机读写文件
随机读写如果一个文件句柄是指向一个实体文件的,那么就可以对它进行随机数据的访问(包括随机读.写),随机访问表示可以读取文件中的任何一部分数据或者向文件中的任何一个位置处写入数据.实现这种随机读写的功 ...
Redis的复制是如何实现的？
前言关系数据库通常会使用一个主服务器向多个从服务器发送更新,并使用从服务器来处理所有的读请求,Redis采用了同样方法来实现自己的复制特性. 简单总结起来就是:在接收到主服务器发送的数据初始副本之后 ...
CAN总线学习记录之四：位定时与同步
一.位定时 1.1 比特率和波特率 1)位速率:又叫做比特率(bit rata).信息传输率,表示的是单位时间内,总线上传输的信息量,即每秒能够传输的二进制位的数量,单位是bit per second ...
vb.net MakeWParam
Private Function MakeWParam(loWord As Integer, hiWord As Integer) As Integer ) End Function
C#调用Oracle的存储过程时，连接字符串需要配置PLSQLRSet=1
C#调用Oracle的存储过程时, 如果有个SYS_REFCURSOR的Output参数存储时, web.config文件中的连接字符串需要配置PLSQLRSet=1, 否则可能会报这个错:参数个数或 ...
Docker 安装rabbitMQ
Docker 安装rabbitMQ docker pull rabbitmq:3.7.7-management 使用:docker images 查看所有镜像 4.根据下载的镜像创建和启动容器 doc ...
Azure Storage用法：使用Blob Storage
Azure Storage 是微软 Azure 云提供的云端存储解决方案,当前支持的存储类型有 Blob.Queue.File 和 Table. 笔者在C# 消息队列-Microsoft Azure ...

HBase源码实战：CreateRandomStoreFile

HBase源码实战：CreateRandomStoreFile的更多相关文章

随机推荐

热门专题