读取hive文件并将数据导入hbase

package cn.tansun.bd.hbase;

import java.io.IOException;

import java.net.URI;

import java.util.List;

import java.util.Map;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.hbase.KeyValue;

import org.apache.hadoop.hbase.client.HTable;

import org.apache.hadoop.hbase.io.ImmutableBytesWritable;

import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;

import org.apache.hadoop.hbase.util.Bytes;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

import cn.tansun.bd.utils.JDBCUtils;

/**

 * @author 作者 E-mail: zgl

 * @version 创建时间：2016年7月5日 下午7:57:17 类说明

 */

public class HiveMySQl2HBaseMR extends Configured implements Tool {

    public static String tableName;

    public static String cf = null;

    public static String strRowkey = null;

    public static String strIndex = null;

    public static String column_name = null;

    public static String strColumn = null;

    private static Configuration conf = null;

    public static void main(String[] args) {

        getDatas();

        try {

            int runs = ToolRunner.run(new HiveMySQl2HBaseMR(), args);

            System.exit(runs);

        } catch (Exception e) {

            e.printStackTrace();

        }

    }

    // mysql读取获得cf、rowKey、cloumn, qual

    @SuppressWarnings("rawtypes")

    public static List<Map> getDatas() {

        // List<Map> listDatas = new ArrayList<Map>();

        String sql = "SELECT DISTINCT s.tableName, ar.rowkey,af.column_family,     aq.column_hive_index,   aq.column_name FROM "

                + " archive_htable s,     archive_hrowkey ar,     archive_hfamily af,     archive_hqualifier aq WHERE "

                + "    s.rowkey_id = ar.rowkey_id  AND ar.family_id = af.family_id    AND s.tableName = '2'";

        List<Map> selectDatas = JDBCUtils.selectDatas(sql);

        for (Map<String, String> metaData : selectDatas) {

            if (null == tableName) {

                tableName = metaData.get("tableName");

            }

            if (null == cf) {

                cf = metaData.get("column_family");

            }

            if (null == strRowkey) {

                strRowkey = metaData.get("rowkey");

            }

            String strTempIndex = metaData.get("column_hive_index");

            String strTempName = metaData.get("column_name");

            if (null == strColumn

                    || (null != strColumn && "".equals(strColumn))) {

                strColumn = strTempIndex + "    " + strTempName;

            } else {

                strColumn = strColumn + "," + strTempIndex + "    " + strTempName;

            }

        }

        return selectDatas;

    }

    @SuppressWarnings("deprecation")

    public int run(String[] args) throws Exception {

        /*

         * if (args.length != 3){ System.err.println(

         * "Usage: HiveMySQl2HBaseMR <table_name><data_input_path><hfile_output_path>"

         * ); System.exit( -1 ); }

         */

        conf  = new Configuration();

        conf.addResource("hbase-site.xml");

        String table = "2";

        String input = "hdfs://node11:9000/datas/hivedata5";

        String output = "hdfs://node11:9000/datas/out1";

        HTable htable;

        try {

            // 运行前，删除已存在的中间输出目录

            try {

                FileSystem fs = FileSystem.get(URI.create(output), conf);

                fs.delete(new Path(output), true);

                fs.close();

            } catch (IOException e1) {

                e1.printStackTrace();

            }

            htable = new HTable(conf, table.getBytes());

            Job job = new Job(conf);

            job.setJobName("Generate HFile");

            job.setJarByClass(HiveMySQl2HBaseMR.class);

            job.setInputFormatClass(TextInputFormat.class);

            job.setMapperClass(HiveMySQlMapper.class);

            FileInputFormat.setInputPaths(job, input);

            job.getConfiguration().set("mapred.mapoutput.key.class",

                    "org.apache.hadoop.hbase.io.ImmutableBytesWritable");

            job.getConfiguration().set("mapred.mapoutput.value.class",

                    "org.apache.hadoop.hbase.KeyValue");

            FileOutputFormat.setOutputPath(job, new Path(output));

            HFileOutputFormat2.configureIncrementalLoad(job, htable);

            try {

                job.waitForCompletion(true);

            } catch (InterruptedException e) {

                e.printStackTrace();

            } catch (ClassNotFoundException e) {

                e.printStackTrace();

            }

        } catch (IOException e) {

            e.printStackTrace();

        }

        return 0;

    }

    public static class HiveMySQlMapper extends

            Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue> {

        @Override

        protected void setup(

                Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue>.Context context)

                throws IOException, InterruptedException {

             super.setup( context );

             conf = new Configuration();

        }

        String tableName = HiveMySQl2HBaseMR.tableName;

        String cf = HiveMySQl2HBaseMR.cf;

        String rowKey = HiveMySQl2HBaseMR.strRowkey;

        String strColumnName = HiveMySQl2HBaseMR.column_name;

        String strColumn = HiveMySQl2HBaseMR.strColumn;

        String split = "001";

        @Override

        protected void map(

                LongWritable key,

                Text value,

                Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue>.Context context)

                throws IOException, InterruptedException {

            // 将rowkey 是数字并且带有","的分隔符去掉，获得对应的数字

            // get rkMaps indexa

            String strRowKey = "";

            String[] datas = value.toString().split("\t");

            for (String strIndex : rowKey.split(",")) {

                if (null == (strRowKey) || (null != strRowKey)

                        && "".equals(strRowKey)) {

                    strRowKey = datas[Integer.valueOf(strIndex)];

                } else {

                    strRowKey = strRowKey + split

                            + datas[Integer.valueOf(strIndex)];

                }

            }

            for (String str : strColumn.split(",")) {

                String[] columnTupe = str.split("\t");

                String columnData = datas[Integer.valueOf(columnTupe[0])];

                String columnName = columnTupe[1];

                System.out.println(columnData + "columnDatacolumnData");

                ImmutableBytesWritable rk = new ImmutableBytesWritable(

                        Bytes.toBytes(rowKey));

                // byte[] row, byte[] family, byte[] qualifier, byte[] value

                KeyValue kv = new KeyValue(Bytes.toBytes(strRowKey), // "a\001b\001\c\001"

                        cf.getBytes(), Bytes.toBytes(columnName),

                        Bytes.toBytes(columnData));

                context.write(rk, kv);

            }

        }

    }

}

JDBCUtils类：

package cn.tansun.bd.utils;

import java.io.ByteArrayInputStream;

import java.io.IOException;

import java.io.InputStream;

import java.sql.Connection;

import java.sql.DriverManager;

import java.sql.ResultSet;

import java.sql.ResultSetMetaData;

import java.sql.SQLException;

import java.sql.Statement;

import java.util.ArrayList;

import java.util.HashMap;

import java.util.Iterator;

import java.util.List;

import java.util.Map;

import java.util.Map.Entry;

import java.util.Properties;

import java.util.Set;

/**

 * @author 作者 E-mail: zgl

 * @version 创建时间：2016年6月23日 下午4:25:03 类说明

 */

public class JDBCUtils {

    public JDBCUtils()

    {

    }

    public static String PATH = "jdbc.properties";

    public static Properties prop;

    public static String url = null;

    public static String username = null;

    public static String password = null;

    public static Connection conn;

    public static Statement stmt;

    public static ResultSet rs;

    public static String fileName = null;

    static {

        try {

            InputStream inputStream = JDBCUtils.class.getClassLoader().getResourceAsStream( PATH );

            prop = new Properties();

            prop.load( inputStream );

            url = prop.getProperty( "jdbc.url" );

            username = prop.getProperty( "jdbc.username" );

            password = prop.getProperty( "jdbc.password" );

            if ( inputStream != null ) {

                inputStream.close();

            }

        }

        catch ( IOException e ) {

            e.printStackTrace();

        }

    }

    public static void closeConnection( Connection conn ) {

        if ( conn != null ) {

            try {

                conn.close();

            }

            catch ( SQLException e ) {

                e.printStackTrace();

            }

        }

    }

    /**

     * 根据sql语句查询

     * 

     * @param sql

     * @return

     */

    @SuppressWarnings( "rawtypes" )

    public static List<Map> selectDatas( String sql ) {

        List<Map> listDatas = new ArrayList<Map>();

        try {

            conn = DriverManager.getConnection( url, username, password );

            conn.setAutoCommit( false );

            stmt =

                conn.prepareStatement( "load data local infile '' " + "into table loadtest fields terminated by ','" );

            StringBuilder sb = new StringBuilder();

            InputStream is = new ByteArrayInputStream( sb.toString().getBytes() );

            ( (com.mysql.jdbc.Statement) stmt ).setLocalInfileInputStream( is );

            rs = stmt.executeQuery( sql );

            if ( rs != null ) {

                ResultSetMetaData metaData = rs.getMetaData();

                int count = metaData.getColumnCount();

                Map<String, Object> map = null;

                while ( rs.next() ) {

                    map = new HashMap<String, Object>();

                    for ( int i = 1; i < count + 1; i++ ) {

                        map.put( metaData.getColumnName( i ), rs.getObject( i ) );

                    }

                    listDatas.add( map );

                }

            }

        }

        catch ( SQLException e ) {

            e.printStackTrace();

        }

        return listDatas;

    }

    /**

     * 

     * @param sql

     * @return

     */

    public static List<String>  getStrMap( String  sql) {

         List<String> strList = new ArrayList<String>();

         try {

             conn = DriverManager.getConnection( url, username, password );

             conn.setAutoCommit( false );

             stmt =

                 conn.prepareStatement( "load data local infile '' " + "into table loadtest fields terminated by ','" );

             StringBuilder sb = new StringBuilder();

             InputStream is = new ByteArrayInputStream( sb.toString().getBytes() );

             ( (com.mysql.jdbc.Statement) stmt ).setLocalInfileInputStream( is );

             rs = stmt.executeQuery( sql );

             if ( rs != null ) {

                 ResultSetMetaData metaData = rs.getMetaData();

                 int count = metaData.getColumnCount();

                 while (rs.next()){

                 for (int i = 1; i < count + 1; i++){

                     //String str1 = metaData.getColumnName( i );

                     String str2 = (String) rs.getObject( i );

                     strList.add(str2);

                 }

              }

             }

         }

         catch ( SQLException e ) {

             e.printStackTrace();

         }

        return strList;

    }

    public static String table_name = null;

    public static String rowkey = null;

    public static String column_family = null;

    public static String column_name = null;

    private static String rows = null;

    public static String sql = null;

    public static String sql2 = null;

    @SuppressWarnings( "rawtypes" )

    public static void main( String[] args ) {

		sql2 = "SELECT   GROUP_CONCAT( DISTINCT aq.column_hive_index,'  ',   aq.column_name ,' '    ORDER BY   "

				+ "    aq.column_hive_index SEPARATOR ','  ) AS column_names FROM  archive_hqualifier  aq "

				+ "where aq.table_id = 77 GROUP BY   aq.column_name ORDER BY aq.column_hive_index";

		sql ="SELECT DISTINCT 	s.tableName, 	ar.rowkey,	af.column_family,	"

				+ "aq.column_name FROM	archive_htable s,archive_hrowkey ar,archive_hfamily af,"

				+ " 	archive_hqualifier aq "

				+ "WHERE s .rowkey_id = ar.rowkey_id AND ar.family_id = af.family_id "

				+ "AND af.qualifier_id = aq.qualifier_id;";     

		String datas  = null;

		List<String> strList = getStrMap(sql);

		String substring  = null;

		 for (int i = 0; i < strList.size(); i++){

			datas = strList.get(i);

			//datas = strList.get(i).substring(0,   strList.get(i).length()-1);

			System.out.print(datas);

		 }

    }

}

读取hive文件并将数据导入hbase的更多相关文章

sqoop将mysql数据导入hbase、hive的常见异常处理
原创不易,如需转载,请注明出处https://www.cnblogs.com/baixianlong/p/10700700.html,否则将追究法律责任!!! 一.需求: 1.将以下这张表(test_ ...
Sqoop将mysql数据导入hbase的血与泪
Sqoop将mysql数据导入hbase的血与泪(整整搞了大半天) 版权声明:本文为yunshuxueyuan原创文章.如需转载请标明出处: https://my.oschina.net/yunsh ...
MapReduce将HDFS文本数据导入HBase中
HBase本身提供了很多种数据导入的方式,通常有两种常用方式: 使用HBase提供的TableOutputFormat,原理是通过一个Mapreduce作业将数据导入HBase 另一种方式就是使用HB ...
[Python]将Excel文件中的数据导入MySQL
Github Link 需求现有2000+文件夹,每个文件夹下有若干excel文件,现在要将这些excel文件中的数据导入mysql. 每个excel文件的第一行是无效数据. 除了excel文件中已 ...
使用sqoop将MySQL数据库中的数据导入Hbase
使用sqoop将MySQL数据库中的数据导入Hbase 前提:安装好 sqoop.hbase. 下载jbdc驱动:mysql-connector-java-5.1.10.jar 将 mysql-con ...
小技巧之“将Text文件中的数据导入到Excel中，这里空格为分割符为例”
1.使用场景将数据以文本导出后,想录入到Excel中,的简便方案, 起因:对于Excel的导出,Text导出明显会更方便些 2.将Text文件中的数据导入到Excel中,这里空格为分割符为例的步骤 ...
Hive如何加载和导入HBase的数据
当我们用HBase 存储实时数据的时候, 如果要做一些数据分析方面的操作, 就比较困难了, 要写MapReduce Job. Hive 主要是用来做数据分析的数据仓库,支持标准SQL 查询, 做数据分 ...
Hive数据导入HBase引起数据膨胀引发的思考
最近朋友公司在做一些数据的迁移,主要是将一些Hive处理之后的热数据导入到HBase中,但是遇到了一个很奇怪的问题:同样的数据到了HBase中,所占空间竟增长了好几倍!详谈中,笔者建议朋友至少从几点原 ...
Hive数据导入Hbase
方案一:Hive关联HBase表方式适用场景:数据量不大4T以下(走hbase的api导入数据) 一.hbase表不存在的情况创建hive表hive_hbase_table映射hbase表hbas ...

随机推荐

python学习三十三天函数匿名函数lambda用法
python函数匿名函数lambda用法,是在多行语句转换一行语句,有点像三元运算符,只可以表示一些简单运算的,lambda做一些复杂的运算不太可能.分别对比普通函数和匿名函数的区别 1,普通的函数用 ...
NGUI的Lebal需注意问题
1,为什么调节字体大小时,字体大小没变化,我们需要调节两个地方,如下图框柱显示调节font size和size才可以同时控制字体的大小 2,label有时是不支持输入中文,但是支持复制进去,则这时我 ...
C#面试笔试题二
1.using关键字有什么用?什么是IDisposable? using可以声明namespace的引入,还可以实现非托管资源的释放,实现了IDisposiable的类在using中创建,using结 ...
IntelliJ IDEA 中 Ctrl+Alt+Left/Right 失效
开发工具:Idea OS:Window 7 在idea中使用ctrl+b跟踪进入函数之后,每次返回都不知道用什么快捷键,在idea中使用ctrl+alt+方向键首先会出现与win7屏幕方向的快捷键冲突 ...
2018-4-30-win2d-CanvasRenderTarget-vs-CanvasBitmap
title author date CreateTime categories win2d CanvasRenderTarget vs CanvasBitmap lindexi 2018-04-30 ...
93-基于ATOM E3825的3U PXIe 主板控制器
基于ATOM E3825的3U PXIe 主板控制器一.板卡概述: 本主板采用intel ATOM 处理器 E3825 设计主板控制器,是一种低成本.低功耗解决方案.板卡采用Intel Bay Tr ...
iView的Message提示框
全局配置message main.js Vue.prototype.$Message.config({ top: 70, duration:3 }); Vue.prototype.$Message.c ...
substr()、substring()、slice()
substr(start,length) start(必选)开始位置的下标可为负数-1即为倒数第一个字符以此类推 0为第一个字母下标 length长度(可选)如果省略该参数则默认到最后一位 var ...
java中switch的用法以及判断的类型有哪些（String\byte\short\int\char\枚举类型）
switch关键字对于多数java学习者来说并不陌生,由于笔试和面试经常会问到它的用法,这里做了一个简单的总结: 能用于switch判断的类型有:byte.short.int.char(JDK1.6) ...
JS中JSON.stringify()方法，将js对象（json串）转换成字符串，传入服务器
JSON 通常用于与服务端交换数据. 在向服务器发送数据时一般是字符串. 我们可以使用 JSON.stringify() 方法将 JavaScript 对象转换为字符串. 语法 JSON.string ...

读取hive文件并将数据导入hbase

读取hive文件并将数据导入hbase的更多相关文章

随机推荐

热门专题