读取HDFS上文件数据

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.StringWriter;
import java.net.URI;
import java.util.ArrayList;
import java.util.List; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.util.Progressable;
/**
* @author 作者 E-mail:
* @version 创建时间:2016年3月8日 上午9:37:49 类说明
* 读取hdfs文件数据
*/
public class ReadHDFSDatas { static Configuration conf = new Configuration();
/**
*
*
* @param location
* @param conf
* @return
* @throws Exception
*/
public static List<String> readLines( Path location, Configuration conf )
throws Exception {
// StringBuffer sb = new StringBuffer();
FileSystem fileSystem = FileSystem.get( location.toUri(), conf );
CompressionCodecFactory factory = new CompressionCodecFactory( conf );
FileStatus[] items = fileSystem.listStatus( location );
if ( items == null )
return new ArrayList<String>();
List<String> results = new ArrayList<String>();
for ( FileStatus item : items ) { // ignoring files like _SUCCESS
if ( item.getPath().getName().startsWith( "_" ) ) {
continue;
}
CompressionCodec codec = factory.getCodec( item.getPath() );
InputStream stream = null; if ( codec != null ) {
stream = codec.createInputStream( fileSystem.open( item.getPath() ) );
}
else {
stream = fileSystem.open( item.getPath() );
} StringWriter writer = new StringWriter();
IOUtils.copy( stream, writer, "UTF-8" );
String raw = writer.toString();
// String[] resulting = raw.split( "\n" );
for ( String str : raw.split( "\t" ) ) {
results.add( str );
System.out.println( "start..." + results + "....." );
}
}
return results;
} public String ReadFile( String hdfs )
throws IOException {
StringBuffer sb = new StringBuffer();
FileSystem fs = FileSystem.get( URI.create( hdfs ), conf );
FSDataInputStream hdfsInStream = fs.open( new Path( hdfs ) );
try {
fs = FileSystem.get( conf );
hdfsInStream = fs.open( new Path( hdfs ) );
byte[] b = new byte[10240];
int numBytes = 0;
// Windows os error
while ( ( numBytes = hdfsInStream.read( b ) ) > 0 ) {
numBytes = hdfsInStream.read( b ); } }
catch ( IOException e ) { e.printStackTrace();
}
hdfsInStream.close();
fs.close();
return sb.toString();
} /**
*
* @param filePath
* @return
* @throws IOException
*/
public static String getFile( String filePath ) throws IOException {
String line = "";
try {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get( URI.create( filePath ), conf );
Path pathq = new Path( filePath );
FSDataInputStream fsr = fs.open( pathq ); while ( line != null ) {
line = fsr.readLine();
if ( line != null ) {
System.out.println( line );
}
} }
catch ( Exception e ) {
e.printStackTrace();
}
return line;
} /*
*
*/
public static List<String> getDatas( String filePath ) {
List<String> list = new ArrayList<String>(); try {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get( URI.create( filePath ), conf );
Path pathq = new Path( filePath );
FSDataInputStream fsr = fs.open( pathq );
String line ="";
while ( line != null ) {
line = fsr.readLine();
if ( line != null ) { list.add( line );
}
}
}
catch ( Exception e ) {
e.printStackTrace();
}
return list;
}
public static void main( String[] args ){
//String hdfs = "hdfs://node4:9000/hive/warehouse/u_data/u.data";
//String hdfs = "/datas/t1";
String hdfs = "/datas/u.data";
Path path = new Path( hdfs );
// String hdfs = "/datas";
// String hdfs = "/hive/warehouse/u_data/u.data";
// getFile(hdfs);
/**
* userid INT,
movieid INT,
rating INT,
weekday INT) */
List<String> listDatas = getDatas(hdfs);
for (int i = 0; i < listDatas.size(); i++){
String[] split = listDatas.get(i).split("\t");
String userid = split[0];
String movieid = split[1];
String rating = split[2];
String weekday = split[3];
String makeRowKey = RegionSeverSplit.makeRowKey(userid); 
         // 用put API实现批量入库
//System.out.println("userid--"+ userid + ".."+ "movieid--"+ movieid + ".." +"rating--"+ rating + ".."+"weekday--"+ weekday + "....");
HBaseUtils.addRows("t1", makeRowKey, "f1", "weekday-rating", (movieid+"-"+rating+"-"+weekday).getBytes());
}
System.out.println("success......");
}
}

HBase 随机生成rowkey 前置处理

import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException; import org.apache.commons.codec.binary.Hex; public class RegionSeverSplit { public static String makeRowKey(String id){
String md5_content = null;
try {
MessageDigest messageDigest = MessageDigest.getInstance("MD5");
messageDigest.reset();
messageDigest.update(id.getBytes());
byte[] bytes = messageDigest.digest();
md5_content = new String(Hex.encodeHex(bytes));
} catch (NoSuchAlgorithmException e1) {
e1.printStackTrace();
}
//turn right md5
String right_md5_id = Integer.toHexString(Integer.parseInt(md5_content.substring(0,7),16)>>1);
while(right_md5_id.length()<7){
right_md5_id = "0" + right_md5_id;
}
return right_md5_id + "-" + id;
}
public static void main(String[] args){
String rowky = makeRowKey("asdfasdf");
System.out.println(rowky);
}
}

HBase Util工具类,用put方式批量或者单条数据入库

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.HTablePool;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.mapreduce.InputSplit; import cn.tansun.bd.hdfs.ReadHDFSDatas; /**
*
* @author root
*
*/ public class HBaseUtils {
private static HBaseAdmin hadmin = null;
private static Configuration conf;
private static HTable htable = null; static {
conf = new Configuration();
String filePath = "hbase-site.xml";
Path path = new Path(filePath);
conf.addResource(path);
conf = HBaseConfiguration.create(conf);
} /**
* insert one row
*
* @param tableName
* @param rowkey
* @param columnFinaly
* @param columnName
* @param values
* @return
*/
public static boolean addRow(String tableName, String rowkey,
String columnFinaly, String columnName, byte[] values) {
boolean flag = true;
if (tableName != null) {
HTablePool hTpool = new HTablePool(conf, 1000);
HTableInterface table = hTpool.getTable(tableName);
Put put = new Put(rowkey.getBytes());
put.addColumn(columnFinaly.getBytes(), columnName.getBytes(),
values);
try {
table.put(put);
System.out.print("addRow success..." + "tableName....."
+ tableName);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
System.out.println(" please select tableName");
} return flag;
} public static void main(String[] args) {
/*String makeRowKey = RegionSeverSplit.makeRowKey("adcdfef");
String tableName = "student";
String columnfianly = "info";
String columnName = "name";
String values = "zhangsan";
addRow(tableName, makeRowKey, columnfianly, columnName,
values.getBytes());*/
ReadHDFSDatas readh = new ReadHDFSDatas();
String hdfs = "/datas/u.data";
List<String> getDatas = readh.getDatas(hdfs);
for (int i = 0; i < getDatas.size(); i++){
if (i < 100){
System.out.println(getDatas.get(i));
}
}
} /**
* put many rows
*
* @param tableName
* @param rowkey
* @param columnFinaly
* @param columnName
* @param values
* @return
*/
public static List<Put> addRows(String tableName, String rowkey,
String columnFinaly, String columnName, byte[] values) {
List<Put> lists = null;
long start = System.currentTimeMillis();
if (tableName != null || rowkey != null) {
HTablePool hTablePool = new HTablePool(conf, 1000);
HTableInterface table = hTablePool.getTable(tableName);
try {
table.setAutoFlush(false);
table.setWriteBufferSize(1024 * 1024 * 1);
lists = new ArrayList<Put>();
Random random = new Random();
byte[] buffers = new byte[256];
int count = 100;
for (int i = 0; i < count; i++){
Put put = new Put(rowkey.getBytes());
random.nextBytes(buffers);
put.add(columnFinaly.getBytes(), columnName.toString().getBytes(), values);
put.getDurability();
//table.setAutoFlush(false);
if ( i % 100 == 0){ lists.add(put);
try {
table.batch(lists);
} catch (InterruptedException e) {
System.out.println("error......");
e.printStackTrace();
}
table.put(lists);
lists.clear();
table.flushCommits();
}
}
} catch (IOException e) { e.printStackTrace();
} } else {
System.out.println("..tableName not null");
}
long end = System.currentTimeMillis();
long times = end - start;
System.out.println(times * 1.0 / 1000 +"..... finsh........" );
return lists;
} /**
* read datas by fileName
* @param fileName
* @return
*/
public List<String> getFileDatas(String fileName){ return null;
} /**
* read hdfs datas by fileName
* @param fileName
* @return
*/
public static List<String> getHdfsDatas(String fileName){ /* List<String> getDatas = ReadHDFSDatas.getDatas(fileName);
for (int i = 0; i < getDatas.size(); i++){
if (i < 100){
System.out.println(getDatas.get(i));
}
}
return getDatas;*/
return null;
}
/**
*
* @param startKey
* @param endKey
* @return
*/
public List<InputSplit> getSplits(byte[] startKey, byte[] endKey) {
return null;
}
}

HDFS 工具类的更多相关文章

  1. flink---实时项目--day02-----1. 解析参数工具类 2. Flink工具类封装 3. 日志采集架构图 4. 测流输出 5. 将kafka中数据写入HDFS 6 KafkaProducer的使用 7 练习

    1. 解析参数工具类(ParameterTool) 该类提供了从不同数据源读取和解析程序参数的简单实用方法,其解析args时,只能支持单只参数. 用来解析main方法传入参数的工具类 public c ...

  2. hadoop的dfs工具类一个【原创】

    开始没搞定插件问题,就弄了个dsf操作类,后面搞定了插件问题,这玩意也就聊胜于无了,还是丢这里算了. 首先是一个配置,ztool.hadoop.properties hadoop.home.dir=G ...

  3. Hbase javaAPI(工具类)表的增删改查

    建立连接: package Init; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.*; i ...

  4. Java基础Map接口+Collections工具类

    1.Map中我们主要讲两个接口 HashMap  与   LinkedHashMap (1)其中LinkedHashMap是有序的  怎么存怎么取出来 我们讲一下Map的增删改查功能: /* * Ma ...

  5. Android—关于自定义对话框的工具类

    开发中有很多地方会用到自定义对话框,为了避免不必要的城府代码,在此总结出一个工具类. 弹出对话框的地方很多,但是都大同小异,不同无非就是提示内容或者图片不同,下面这个类是将提示内容和图片放到了自定义函 ...

  6. [转]Java常用工具类集合

    转自:http://blog.csdn.net/justdb/article/details/8653166 数据库连接工具类——仅仅获得连接对象 ConnDB.java package com.ut ...

  7. js常用工具类.

    一些js的工具类 复制代码 /** * Created by sevennight on 15-1-31. * js常用工具类 */ /** * 方法作用:[格式化时间] * 使用方法 * 示例: * ...

  8. Guava库介绍之实用工具类

    作者:Jack47 转载请保留作者和原文出处 欢迎关注我的微信公众账号程序员杰克,两边的文章会同步,也可以添加我的RSS订阅源. 本文是我写的Google开源的Java编程库Guava系列之一,主要介 ...

  9. Java程序员的日常—— Arrays工具类的使用

    这个类在日常的开发中,还是非常常用的.今天就总结一下Arrays工具类的常用方法.最常用的就是asList,sort,toStream,equals,copyOf了.另外可以深入学习下Arrays的排 ...

随机推荐

  1. 利用java反射动态调用方法,生成grid数据

    项目中需要java后台查询并组装前台grid的数据,数据行数不定,数据行定义不定,开始用了最原始的方法,写了几百行,就是前台需要什么字段后台拼接什么字段,java代码冗余量非常大,并且不够灵活,一旦前 ...

  2. ArrayList与List<T>的区别

    ArrayList alist = new ArrayList(); //ArrayList(object value),所以ArrayList可以存储任何类型,如果存储值类型的话会进行装箱操作,在操 ...

  3. CHEVP算法(Canny/Hough Estimation of Vanishing Points)

    这个算法是汪悦在 Lane detection and tracking using B-spline中提出来的.他在这篇论文中主要用的是B-spline模型,这个模型的主要优点是鲁棒性好,可以针对不 ...

  4. jq鼠标移入移除

    ele.on({ mouseover : function(){ } , mouseout : function(){ } })

  5. ApacheHttpServer修改httpd.conf配置文件

    转自:https://blog.csdn.net/dream1120757048/article/details/77427351 1. 安装完 Apache HTTP Server 之后,还需要修改 ...

  6. vue动态设置Iview的多个Input组件自动获取焦点

    1.html,通过ref=replyBox设置焦点元素,以便后续获取 // 动态设定自动获取焦点按钮 <p class="text-right text-blue fts14 ptb1 ...

  7. Runtime-iOS运行时应用篇

    一.动态方法交换:Method Swizzling实现动态方法交换(Method Swizzling )是Runtime中最具盛名的应用场景,其原理是:通过Runtime获取到方法实现的地址,进而动态 ...

  8. 2019-9-2-win10-uwp-标题栏

    title author date CreateTime categories win10 uwp 标题栏 lindexi 2019-09-02 12:57:38 +0800 2018-2-13 17 ...

  9. Linux学习笔记之VIM编辑器

    此处根据需要,只罗列一些常用的指令和用法 五.VIM程序编辑器 Vi与vim Vi打开文件没有高亮注释,vim有,且vim是vi的高级版本 Vim默认打开文件为命令模式 i               ...

  10. 记一个日志冲突——管中窥豹[java混乱的日志体系]

    D:\Java\jdk1.8.0_211\bin\java.exe "-javaagent:C:\Program Files\JetBrains\IntelliJ IDEA Communit ...