HDFS 工具类

读取HDFS上文件数据

import java.io.File;

import java.io.FileInputStream;

import java.io.IOException;

import java.io.InputStream;

import java.io.OutputStream;

import java.io.StringWriter;

import java.net.URI;

import java.util.ArrayList;

import java.util.List;

import org.apache.commons.io.IOUtils;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FSDataInputStream;

import org.apache.hadoop.fs.FSDataOutputStream;

import org.apache.hadoop.fs.FileStatus;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.compress.CompressionCodec;

import org.apache.hadoop.io.compress.CompressionCodecFactory;

import org.apache.hadoop.util.Progressable;

/**

 * @author 作者 E-mail:

 * @version 创建时间：2016年3月8日 上午9:37:49 类说明

 * 读取hdfs文件数据

 */

public class ReadHDFSDatas {

    static Configuration conf = new Configuration();

    /**

     *

     *

     * @param location

     * @param conf

     * @return

     * @throws Exception

     */

    public static List<String> readLines( Path location, Configuration conf )

        throws Exception {

        // StringBuffer sb = new StringBuffer();

        FileSystem fileSystem = FileSystem.get( location.toUri(), conf );

        CompressionCodecFactory factory = new CompressionCodecFactory( conf );

        FileStatus[] items = fileSystem.listStatus( location );

        if ( items == null )

            return new ArrayList<String>();

        List<String> results = new ArrayList<String>();

        for ( FileStatus item : items ) {

            // ignoring files like _SUCCESS

            if ( item.getPath().getName().startsWith( "_" ) ) {

                continue;

            }

            CompressionCodec codec = factory.getCodec( item.getPath() );

            InputStream stream = null;

            if ( codec != null ) {

                stream = codec.createInputStream( fileSystem.open( item.getPath() ) );

            }

            else {

                stream = fileSystem.open( item.getPath() );

            }

            StringWriter writer = new StringWriter();

            IOUtils.copy( stream, writer, "UTF-8" );

            String raw = writer.toString();

            // String[] resulting = raw.split( "\n" );

            for ( String str : raw.split( "\t" ) ) {

                results.add( str );

                System.out.println( "start..." + results + "....." );

            }

        }

        return results;

    }

    public String ReadFile( String hdfs )

        throws IOException {

        StringBuffer sb = new StringBuffer();

        FileSystem fs = FileSystem.get( URI.create( hdfs ), conf );

        FSDataInputStream hdfsInStream = fs.open( new Path( hdfs ) );

        try {

            fs = FileSystem.get( conf );

            hdfsInStream = fs.open( new Path( hdfs ) );

            byte[] b = new byte[10240];

            int numBytes = 0;

            // Windows os error

            while ( ( numBytes = hdfsInStream.read( b ) ) > 0 ) {

                numBytes = hdfsInStream.read( b );

            }

        }

        catch ( IOException e ) {

            e.printStackTrace();

        }

        hdfsInStream.close();

        fs.close();

        return sb.toString();

    }

    /**

     *

     * @param filePath

     * @return

     * @throws IOException

     */

    public static String getFile( String filePath ) throws IOException {

        String line = "";

        try {

            Configuration conf = new Configuration();

            FileSystem fs = FileSystem.get( URI.create( filePath ), conf );

            Path pathq = new Path( filePath );

            FSDataInputStream fsr = fs.open( pathq );

            while ( line != null ) {

                line = fsr.readLine();

                if ( line != null ) {

                    System.out.println( line );

                }

            }

        }

        catch ( Exception e ) {

            e.printStackTrace();

        }

        return line;

    }

    /*

     *

     */

    public static List<String> getDatas( String filePath )  {

       List<String> list = new ArrayList<String>();

        try {

            Configuration conf = new Configuration();

            FileSystem fs = FileSystem.get( URI.create( filePath ), conf );

            Path pathq = new Path( filePath );

            FSDataInputStream fsr = fs.open( pathq );

            String line ="";

            while ( line != null ) {

                line = fsr.readLine();

                if ( line != null ) {

                    list.add( line );

                }

            }

        }

        catch ( Exception e ) {

            e.printStackTrace();

        }

        return list;

    }

    public static void main( String[] args ){

        //String hdfs = "hdfs://node4:9000/hive/warehouse/u_data/u.data";

        //String  hdfs = "/datas/t1";

        String  hdfs = "/datas/u.data";

        Path path = new Path( hdfs );

        // String hdfs = "/datas";

        // String hdfs = "/hive/warehouse/u_data/u.data";

      //  getFile(hdfs);

        /**

         * userid INT,

        movieid INT,

        rating INT,

        weekday INT)

         */

        List<String> listDatas = getDatas(hdfs);

        for (int i = 0; i < listDatas.size(); i++){

                String[] split = listDatas.get(i).split("\t");

                String userid = split[0];

                String movieid = split[1];

                String rating = split[2];

                String weekday = split[3];

                String makeRowKey = RegionSeverSplit.makeRowKey(userid);　
　　　　　　　　　// 用put API实现批量入库

                //System.out.println("userid--"+ userid + ".."+ "movieid--"+ movieid + ".." +"rating--"+ rating + ".."+"weekday--"+ weekday + "....");

                HBaseUtils.addRows("t1", makeRowKey, "f1", "weekday-rating", (movieid+"-"+rating+"-"+weekday).getBytes());

        }

        System.out.println("success......");

    }

}

HBase 随机生成rowkey 前置处理

import java.security.MessageDigest;

import java.security.NoSuchAlgorithmException;

import org.apache.commons.codec.binary.Hex;

public class RegionSeverSplit {

    public  static String makeRowKey(String id){

         String md5_content = null;

            try {

                MessageDigest messageDigest = MessageDigest.getInstance("MD5");

                messageDigest.reset();

                messageDigest.update(id.getBytes());

                byte[] bytes = messageDigest.digest();

                md5_content = new String(Hex.encodeHex(bytes));

            } catch (NoSuchAlgorithmException e1) {

                e1.printStackTrace();

            }

            //turn right md5

            String right_md5_id = Integer.toHexString(Integer.parseInt(md5_content.substring(0,7),16)>>1);

            while(right_md5_id.length()<7){

                right_md5_id = "0" + right_md5_id;

            }

            return right_md5_id + "-" + id;

    }

    public static void main(String[] args){

        String rowky = makeRowKey("asdfasdf");

        System.out.println(rowky);

    }

}

HBase Util工具类，用put方式批量或者单条数据入库

import java.io.IOException;

import java.util.ArrayList;

import java.util.List;

import java.util.Random;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.client.HBaseAdmin;

import org.apache.hadoop.hbase.client.HTable;

import org.apache.hadoop.hbase.client.HTableInterface;

import org.apache.hadoop.hbase.client.HTablePool;

import org.apache.hadoop.hbase.client.Put;

import org.apache.hadoop.mapreduce.InputSplit;

import cn.tansun.bd.hdfs.ReadHDFSDatas;

/**

 *

 * @author root

 *

 */

public class HBaseUtils {

    private static HBaseAdmin hadmin = null;

    private static Configuration conf;

    private static HTable htable = null;

    static {

        conf = new Configuration();

        String filePath = "hbase-site.xml";

        Path path = new Path(filePath);

        conf.addResource(path);

        conf = HBaseConfiguration.create(conf);

    }

    /**

     * insert one row

     *

     * @param tableName

     * @param rowkey

     * @param columnFinaly

     * @param columnName

     * @param values

     * @return

     */

    public static boolean addRow(String tableName, String rowkey,

            String columnFinaly, String columnName, byte[] values) {

        boolean flag = true;

        if (tableName != null) {

            HTablePool hTpool = new HTablePool(conf, 1000);

            HTableInterface table = hTpool.getTable(tableName);

            Put put = new Put(rowkey.getBytes());

            put.addColumn(columnFinaly.getBytes(), columnName.getBytes(),

                    values);

            try {

                table.put(put);

                System.out.print("addRow success..." + "tableName....."

                        + tableName);

            } catch (IOException e) {

                // TODO Auto-generated catch block

                e.printStackTrace();

            }

        } else {

            System.out.println("  please select tableName");

        }

        return flag;

    }

    public static void main(String[] args) {

        /*String makeRowKey = RegionSeverSplit.makeRowKey("adcdfef");

        String tableName = "student";

        String columnfianly = "info";

        String columnName = "name";

        String values = "zhangsan";

        addRow(tableName, makeRowKey, columnfianly, columnName,

                values.getBytes());*/

        ReadHDFSDatas readh = new ReadHDFSDatas();

        String hdfs = "/datas/u.data";

        List<String> getDatas = readh.getDatas(hdfs);

        for (int i = 0; i < getDatas.size(); i++){

            if (i < 100){

                System.out.println(getDatas.get(i));

            }

        }

    }

    /**

     * put many rows

     *

     * @param tableName

     * @param rowkey

     * @param columnFinaly

     * @param columnName

     * @param values

     * @return

     */

    public static List<Put> addRows(String tableName, String rowkey,

            String columnFinaly, String columnName, byte[] values) {

        List<Put> lists  = null;

        long start = System.currentTimeMillis();

        if (tableName != null || rowkey != null) {

            HTablePool hTablePool = new HTablePool(conf, 1000);

            HTableInterface table = hTablePool.getTable(tableName);

            try {

                table.setAutoFlush(false);

                table.setWriteBufferSize(1024 * 1024 * 1);

                lists = new ArrayList<Put>();

                Random random = new Random();

                byte[] buffers = new byte[256];

                int count = 100;

                for (int i = 0; i < count; i++){

                    Put put = new Put(rowkey.getBytes());

                    random.nextBytes(buffers);

                    put.add(columnFinaly.getBytes(), columnName.toString().getBytes(), values);

                    put.getDurability();

                    //table.setAutoFlush(false);

                    if ( i % 100 == 0){

                        lists.add(put);

                        try {

                            table.batch(lists);

                        } catch (InterruptedException e) {

                            System.out.println("error......");

                            e.printStackTrace();

                        }

                        table.put(lists);

                        lists.clear();

                        table.flushCommits();

                    }

                }

            } catch (IOException e) {

                e.printStackTrace();

            }

        } else {

            System.out.println("..tableName  not null");

        }

        long end = System.currentTimeMillis();

        long times = end - start;

        System.out.println(times * 1.0 / 1000 +"..... finsh........"  );

        return lists;

    }

    /**

     * read datas by fileName

     * @param fileName

     * @return

     */

    public List<String> getFileDatas(String fileName){

        return null;

    } 

    /**

     * read hdfs datas by fileName

     * @param fileName

     * @return

     */

    public static List<String> getHdfsDatas(String fileName){

    /*    List<String> getDatas = ReadHDFSDatas.getDatas(fileName);

        for (int i = 0; i < getDatas.size(); i++){

            if (i < 100){

                System.out.println(getDatas.get(i));

            }

        }

        return getDatas;*/

        return null;

    }

    /**

     *

     * @param startKey

     * @param endKey

     * @return

     */

    public List<InputSplit> getSplits(byte[] startKey, byte[] endKey) {

        return null;

    }

}

HDFS 工具类的更多相关文章

flink---实时项目--day02-----1. 解析参数工具类 2. Flink工具类封装 3. 日志采集架构图 4. 测流输出 5. 将kafka中数据写入HDFS 6 KafkaProducer的使用 7 练习
1. 解析参数工具类(ParameterTool) 该类提供了从不同数据源读取和解析程序参数的简单实用方法,其解析args时,只能支持单只参数. 用来解析main方法传入参数的工具类 public c ...
hadoop的dfs工具类一个【原创】
开始没搞定插件问题,就弄了个dsf操作类,后面搞定了插件问题,这玩意也就聊胜于无了,还是丢这里算了. 首先是一个配置,ztool.hadoop.properties hadoop.home.dir=G ...
Hbase javaAPI（工具类）表的增删改查
建立连接: package Init; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.*; i ...
Java基础Map接口+Collections工具类
1.Map中我们主要讲两个接口 HashMap 与 LinkedHashMap (1)其中LinkedHashMap是有序的怎么存怎么取出来我们讲一下Map的增删改查功能: /* * Ma ...
Android—关于自定义对话框的工具类
开发中有很多地方会用到自定义对话框,为了避免不必要的城府代码,在此总结出一个工具类. 弹出对话框的地方很多,但是都大同小异,不同无非就是提示内容或者图片不同,下面这个类是将提示内容和图片放到了自定义函 ...
[转]Java常用工具类集合
转自:http://blog.csdn.net/justdb/article/details/8653166 数据库连接工具类——仅仅获得连接对象 ConnDB.java package com.ut ...
js常用工具类.
一些js的工具类复制代码 /** * Created by sevennight on 15-1-31. * js常用工具类 */ /** * 方法作用:[格式化时间] * 使用方法 * 示例: * ...
Guava库介绍之实用工具类
作者:Jack47 转载请保留作者和原文出处欢迎关注我的微信公众账号程序员杰克,两边的文章会同步,也可以添加我的RSS订阅源. 本文是我写的Google开源的Java编程库Guava系列之一,主要介 ...
Java程序员的日常—— Arrays工具类的使用
这个类在日常的开发中,还是非常常用的.今天就总结一下Arrays工具类的常用方法.最常用的就是asList,sort,toStream,equals,copyOf了.另外可以深入学习下Arrays的排 ...

随机推荐

利用java反射动态调用方法，生成grid数据
项目中需要java后台查询并组装前台grid的数据,数据行数不定,数据行定义不定,开始用了最原始的方法,写了几百行,就是前台需要什么字段后台拼接什么字段,java代码冗余量非常大,并且不够灵活,一旦前 ...
ArrayList与List<T>的区别
ArrayList alist = new ArrayList(); //ArrayList(object value),所以ArrayList可以存储任何类型,如果存储值类型的话会进行装箱操作,在操 ...
CHEVP算法（Canny/Hough Estimation of Vanishing Points)
这个算法是汪悦在 Lane detection and tracking using B-spline中提出来的.他在这篇论文中主要用的是B-spline模型,这个模型的主要优点是鲁棒性好,可以针对不 ...
jq鼠标移入移除
ele.on({ mouseover : function(){ } , mouseout : function(){ } })
ApacheHttpServer修改httpd.conf配置文件
转自:https://blog.csdn.net/dream1120757048/article/details/77427351 1. 安装完 Apache HTTP Server 之后,还需要修改 ...
vue动态设置Iview的多个Input组件自动获取焦点
1.html,通过ref=replyBox设置焦点元素,以便后续获取 // 动态设定自动获取焦点按钮 <p class="text-right text-blue fts14 ptb1 ...
Runtime-iOS运行时应用篇
一.动态方法交换:Method Swizzling实现动态方法交换(Method Swizzling )是Runtime中最具盛名的应用场景,其原理是:通过Runtime获取到方法实现的地址,进而动态 ...
2019-9-2-win10-uwp-标题栏
title author date CreateTime categories win10 uwp 标题栏 lindexi 2019-09-02 12:57:38 +0800 2018-2-13 17 ...
Linux学习笔记之VIM编辑器
此处根据需要,只罗列一些常用的指令和用法五．VIM程序编辑器 Vi与vim Vi打开文件没有高亮注释,vim有,且vim是vi的高级版本 Vim默认打开文件为命令模式 i ...
记一个日志冲突——管中窥豹[java混乱的日志体系]
D:\Java\jdk1.8.0_211\bin\java.exe "-javaagent:C:\Program Files\JetBrains\IntelliJ IDEA Communit ...

HDFS 工具类

HDFS 工具类的更多相关文章

随机推荐

热门专题