HDFS 工具类
读取HDFS上文件数据
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.StringWriter;
import java.net.URI;
import java.util.ArrayList;
import java.util.List; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.util.Progressable;
/**
* @author 作者 E-mail:
* @version 创建时间:2016年3月8日 上午9:37:49 类说明
* 读取hdfs文件数据
*/
public class ReadHDFSDatas { static Configuration conf = new Configuration();
/**
*
*
* @param location
* @param conf
* @return
* @throws Exception
*/
public static List<String> readLines( Path location, Configuration conf )
throws Exception {
// StringBuffer sb = new StringBuffer();
FileSystem fileSystem = FileSystem.get( location.toUri(), conf );
CompressionCodecFactory factory = new CompressionCodecFactory( conf );
FileStatus[] items = fileSystem.listStatus( location );
if ( items == null )
return new ArrayList<String>();
List<String> results = new ArrayList<String>();
for ( FileStatus item : items ) { // ignoring files like _SUCCESS
if ( item.getPath().getName().startsWith( "_" ) ) {
continue;
}
CompressionCodec codec = factory.getCodec( item.getPath() );
InputStream stream = null; if ( codec != null ) {
stream = codec.createInputStream( fileSystem.open( item.getPath() ) );
}
else {
stream = fileSystem.open( item.getPath() );
} StringWriter writer = new StringWriter();
IOUtils.copy( stream, writer, "UTF-8" );
String raw = writer.toString();
// String[] resulting = raw.split( "\n" );
for ( String str : raw.split( "\t" ) ) {
results.add( str );
System.out.println( "start..." + results + "....." );
}
}
return results;
} public String ReadFile( String hdfs )
throws IOException {
StringBuffer sb = new StringBuffer();
FileSystem fs = FileSystem.get( URI.create( hdfs ), conf );
FSDataInputStream hdfsInStream = fs.open( new Path( hdfs ) );
try {
fs = FileSystem.get( conf );
hdfsInStream = fs.open( new Path( hdfs ) );
byte[] b = new byte[10240];
int numBytes = 0;
// Windows os error
while ( ( numBytes = hdfsInStream.read( b ) ) > 0 ) {
numBytes = hdfsInStream.read( b ); } }
catch ( IOException e ) { e.printStackTrace();
}
hdfsInStream.close();
fs.close();
return sb.toString();
} /**
*
* @param filePath
* @return
* @throws IOException
*/
public static String getFile( String filePath ) throws IOException {
String line = "";
try {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get( URI.create( filePath ), conf );
Path pathq = new Path( filePath );
FSDataInputStream fsr = fs.open( pathq ); while ( line != null ) {
line = fsr.readLine();
if ( line != null ) {
System.out.println( line );
}
} }
catch ( Exception e ) {
e.printStackTrace();
}
return line;
} /*
*
*/
public static List<String> getDatas( String filePath ) {
List<String> list = new ArrayList<String>(); try {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get( URI.create( filePath ), conf );
Path pathq = new Path( filePath );
FSDataInputStream fsr = fs.open( pathq );
String line ="";
while ( line != null ) {
line = fsr.readLine();
if ( line != null ) { list.add( line );
}
}
}
catch ( Exception e ) {
e.printStackTrace();
}
return list;
}
public static void main( String[] args ){
//String hdfs = "hdfs://node4:9000/hive/warehouse/u_data/u.data";
//String hdfs = "/datas/t1";
String hdfs = "/datas/u.data";
Path path = new Path( hdfs );
// String hdfs = "/datas";
// String hdfs = "/hive/warehouse/u_data/u.data";
// getFile(hdfs);
/**
* userid INT,
movieid INT,
rating INT,
weekday INT) */
List<String> listDatas = getDatas(hdfs);
for (int i = 0; i < listDatas.size(); i++){
String[] split = listDatas.get(i).split("\t");
String userid = split[0];
String movieid = split[1];
String rating = split[2];
String weekday = split[3];
String makeRowKey = RegionSeverSplit.makeRowKey(userid);
// 用put API实现批量入库
//System.out.println("userid--"+ userid + ".."+ "movieid--"+ movieid + ".." +"rating--"+ rating + ".."+"weekday--"+ weekday + "....");
HBaseUtils.addRows("t1", makeRowKey, "f1", "weekday-rating", (movieid+"-"+rating+"-"+weekday).getBytes());
}
System.out.println("success......");
}
}
HBase 随机生成rowkey 前置处理
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException; import org.apache.commons.codec.binary.Hex; public class RegionSeverSplit { public static String makeRowKey(String id){
String md5_content = null;
try {
MessageDigest messageDigest = MessageDigest.getInstance("MD5");
messageDigest.reset();
messageDigest.update(id.getBytes());
byte[] bytes = messageDigest.digest();
md5_content = new String(Hex.encodeHex(bytes));
} catch (NoSuchAlgorithmException e1) {
e1.printStackTrace();
}
//turn right md5
String right_md5_id = Integer.toHexString(Integer.parseInt(md5_content.substring(0,7),16)>>1);
while(right_md5_id.length()<7){
right_md5_id = "0" + right_md5_id;
}
return right_md5_id + "-" + id;
}
public static void main(String[] args){
String rowky = makeRowKey("asdfasdf");
System.out.println(rowky);
}
}
HBase Util工具类,用put方式批量或者单条数据入库
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.HTablePool;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.mapreduce.InputSplit; import cn.tansun.bd.hdfs.ReadHDFSDatas; /**
*
* @author root
*
*/ public class HBaseUtils {
private static HBaseAdmin hadmin = null;
private static Configuration conf;
private static HTable htable = null; static {
conf = new Configuration();
String filePath = "hbase-site.xml";
Path path = new Path(filePath);
conf.addResource(path);
conf = HBaseConfiguration.create(conf);
} /**
* insert one row
*
* @param tableName
* @param rowkey
* @param columnFinaly
* @param columnName
* @param values
* @return
*/
public static boolean addRow(String tableName, String rowkey,
String columnFinaly, String columnName, byte[] values) {
boolean flag = true;
if (tableName != null) {
HTablePool hTpool = new HTablePool(conf, 1000);
HTableInterface table = hTpool.getTable(tableName);
Put put = new Put(rowkey.getBytes());
put.addColumn(columnFinaly.getBytes(), columnName.getBytes(),
values);
try {
table.put(put);
System.out.print("addRow success..." + "tableName....."
+ tableName);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
System.out.println(" please select tableName");
} return flag;
} public static void main(String[] args) {
/*String makeRowKey = RegionSeverSplit.makeRowKey("adcdfef");
String tableName = "student";
String columnfianly = "info";
String columnName = "name";
String values = "zhangsan";
addRow(tableName, makeRowKey, columnfianly, columnName,
values.getBytes());*/
ReadHDFSDatas readh = new ReadHDFSDatas();
String hdfs = "/datas/u.data";
List<String> getDatas = readh.getDatas(hdfs);
for (int i = 0; i < getDatas.size(); i++){
if (i < 100){
System.out.println(getDatas.get(i));
}
}
} /**
* put many rows
*
* @param tableName
* @param rowkey
* @param columnFinaly
* @param columnName
* @param values
* @return
*/
public static List<Put> addRows(String tableName, String rowkey,
String columnFinaly, String columnName, byte[] values) {
List<Put> lists = null;
long start = System.currentTimeMillis();
if (tableName != null || rowkey != null) {
HTablePool hTablePool = new HTablePool(conf, 1000);
HTableInterface table = hTablePool.getTable(tableName);
try {
table.setAutoFlush(false);
table.setWriteBufferSize(1024 * 1024 * 1);
lists = new ArrayList<Put>();
Random random = new Random();
byte[] buffers = new byte[256];
int count = 100;
for (int i = 0; i < count; i++){
Put put = new Put(rowkey.getBytes());
random.nextBytes(buffers);
put.add(columnFinaly.getBytes(), columnName.toString().getBytes(), values);
put.getDurability();
//table.setAutoFlush(false);
if ( i % 100 == 0){ lists.add(put);
try {
table.batch(lists);
} catch (InterruptedException e) {
System.out.println("error......");
e.printStackTrace();
}
table.put(lists);
lists.clear();
table.flushCommits();
}
}
} catch (IOException e) { e.printStackTrace();
} } else {
System.out.println("..tableName not null");
}
long end = System.currentTimeMillis();
long times = end - start;
System.out.println(times * 1.0 / 1000 +"..... finsh........" );
return lists;
} /**
* read datas by fileName
* @param fileName
* @return
*/
public List<String> getFileDatas(String fileName){ return null;
} /**
* read hdfs datas by fileName
* @param fileName
* @return
*/
public static List<String> getHdfsDatas(String fileName){ /* List<String> getDatas = ReadHDFSDatas.getDatas(fileName);
for (int i = 0; i < getDatas.size(); i++){
if (i < 100){
System.out.println(getDatas.get(i));
}
}
return getDatas;*/
return null;
}
/**
*
* @param startKey
* @param endKey
* @return
*/
public List<InputSplit> getSplits(byte[] startKey, byte[] endKey) {
return null;
}
}
HDFS 工具类的更多相关文章
- flink---实时项目--day02-----1. 解析参数工具类 2. Flink工具类封装 3. 日志采集架构图 4. 测流输出 5. 将kafka中数据写入HDFS 6 KafkaProducer的使用 7 练习
1. 解析参数工具类(ParameterTool) 该类提供了从不同数据源读取和解析程序参数的简单实用方法,其解析args时,只能支持单只参数. 用来解析main方法传入参数的工具类 public c ...
- hadoop的dfs工具类一个【原创】
开始没搞定插件问题,就弄了个dsf操作类,后面搞定了插件问题,这玩意也就聊胜于无了,还是丢这里算了. 首先是一个配置,ztool.hadoop.properties hadoop.home.dir=G ...
- Hbase javaAPI(工具类)表的增删改查
建立连接: package Init; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.*; i ...
- Java基础Map接口+Collections工具类
1.Map中我们主要讲两个接口 HashMap 与 LinkedHashMap (1)其中LinkedHashMap是有序的 怎么存怎么取出来 我们讲一下Map的增删改查功能: /* * Ma ...
- Android—关于自定义对话框的工具类
开发中有很多地方会用到自定义对话框,为了避免不必要的城府代码,在此总结出一个工具类. 弹出对话框的地方很多,但是都大同小异,不同无非就是提示内容或者图片不同,下面这个类是将提示内容和图片放到了自定义函 ...
- [转]Java常用工具类集合
转自:http://blog.csdn.net/justdb/article/details/8653166 数据库连接工具类——仅仅获得连接对象 ConnDB.java package com.ut ...
- js常用工具类.
一些js的工具类 复制代码 /** * Created by sevennight on 15-1-31. * js常用工具类 */ /** * 方法作用:[格式化时间] * 使用方法 * 示例: * ...
- Guava库介绍之实用工具类
作者:Jack47 转载请保留作者和原文出处 欢迎关注我的微信公众账号程序员杰克,两边的文章会同步,也可以添加我的RSS订阅源. 本文是我写的Google开源的Java编程库Guava系列之一,主要介 ...
- Java程序员的日常—— Arrays工具类的使用
这个类在日常的开发中,还是非常常用的.今天就总结一下Arrays工具类的常用方法.最常用的就是asList,sort,toStream,equals,copyOf了.另外可以深入学习下Arrays的排 ...
随机推荐
- hdu4734 F(x)(数位dp)
题目传送门 F(x) Time Limit: 1000/500 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others)Total S ...
- Swift编程语言学习1.6——可选值
可选值 使用可选(optionals)来处理值可能缺失的情况.可选表示: 有值,等于 x 或者没有值 注意: C 和 Objective-C 中并没有可选这个概念.最接近的是 Objective- ...
- Yii2 错误 'Headers already sent.'
错误日志如下: __source__: __topic__: web category: yii\web\HeadersAlreadySentException ip: level: message: ...
- 使用onfocus与onblur实现搜索框附加信息
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8&quo ...
- Quartz实现数据库动态配置定时任务
项目实战 或许实现的方式跟之前的代码有点不一样的 1.定时任务的配置信息 @Configuration public class ScheduleConfigration { @Autowired p ...
- 从0构建webpack开发环境(二) 添加css,img的模块化支持
在一个简单的webpack.config.js中,构建了一个基础的webpack.config.js文件,但是只支持js模块的打包. 本篇中添加对css和img的模块化支持 首先需要安装三个个load ...
- 源码分析--HashMap(JDK1.8)
在JDK1.8中对HashMap的底层实现做了修改.本篇对HashMap源码从核心成员变量到常用方法进行分析. HashMap数据结构如下: 先看成员变量: 1.底层存放数据的是Node<K,V ...
- Linux防火墙--iptables--白名单配置
1.服务器22端口和1521端口开通给指定IP [root@node2 sysconfig]# iptables -t filter -nL INPUT Chain INPUT (policy ACC ...
- 【Leetcode周赛】从contest-81开始。(一般是10个contest写一篇文章)
Contest 81 (2018年11月8日,周四,凌晨) 链接:https://leetcode.com/contest/weekly-contest-81 比赛情况记录:结果:3/4, ranki ...
- 【LeetCode】随机化算法 random(共6题)
[384]Shuffle an Array(2019年3月12日) Shuffle a set of numbers without duplicates. 实现一个类,里面有两个 api,struc ...