Java代码操作HDFS
package com.hy.hdfs; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException; public class HDFSCommand { public static final Logger log = LoggerFactory.getLogger(HDFSCommand.class); public static void main(String[] args) throws Exception {
String hdfsURI = "hdfs://10.1.23.240:9000";
String srcPath = "D:" + File.separator + "readme.txt";
String descPath = "/xhy";
String data = "haohaohaohaohao\r\n善字\r\n善生\r\n善行\r\n守善\r\n愿善";
Configuration conf = new Configuration();
copyFromLocalFile(hdfsURI, srcPath, descPath, conf);
uploadFile(hdfsURI, data, descPath, conf);
RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = listFile(hdfsURI, descPath, conf, true);
while (locatedFileStatusRemoteIterator.hasNext()) {
LocatedFileStatus next = locatedFileStatusRemoteIterator.next();
System.out.println("listFile:" + next.toString());
}
FileStatus[] fileStatuses = listFileAndFolder(hdfsURI, descPath, conf);
for (FileStatus f : fileStatuses) {
System.out.println("listFileAndFolder:" + f.toString());
} } /**
* 本地指定路径文件上传到hdfs
*
* @param hdfsURI
* @param srcPath
* @param descPath
* @param conf
*/
public static void copyFromLocalFile(String hdfsURI, String srcPath, String descPath, Configuration conf) throws URISyntaxException, IOException {
log.info(">> copyFromLocalFile, srcPath is {}, descPath is {}", srcPath, descPath);
FileSystem fs = FileSystem.get(new URI(hdfsURI), conf);
fs.copyFromLocalFile(new Path(srcPath), new Path(descPath));
log.info("<< copyFromLocalFile success");
fs.close();
/*
* 底层是通过
* fs.open(new Path(srcPath), 4096);
* fs.create(new Path(descPath));
* IOUtils.copyBytes(in, out, conf, true);
*/
} /**
* 将数据写入到hdfs
*
* @param hdfsURI
* @param data
* @param descPath
* @param conf
*/
public static void uploadFile(String hdfsURI, String data, String descPath, Configuration conf) throws Exception {
log.info(">> uploadFile, descPath is {}, data is {}", descPath, data);
FileSystem fs = FileSystem.get(new URI(hdfsURI), conf);
/*FSDataOutputStream fsOutputStream = fs.create(new Path(descPath), new Progressable() {
@Override
public void progress() {
log.info("<< 写入hdfs成功,文件路径为:{}", descPath);
}
});*/
FSDataOutputStream fsOutputStream = fs.create(new Path(descPath),
() -> log.info("<< 写入hdfs成功,文件路径为:{}", descPath));
fsOutputStream.write(data.getBytes(), 0, data.getBytes().length);
/*
* 以下几种方式会出现中文乱码
* fsOutputStream.writeBytes(data);
* fsOutputStream.writeUTF(data);
* fsOutputStream.writeChars(data);
*/
fsOutputStream.close();
fs.close();
} /**
* 查找hdfs指定路径下的文件
*
* @param hdfsURI
* @param path
* @param conf
* @param recursive 是否递归查找
* @throws Exception
*/
public static RemoteIterator<LocatedFileStatus> listFile(String hdfsURI, String path, Configuration conf, boolean recursive) throws Exception {
log.info(">> listFile, path is {}, recursive is {}", path, recursive);
FileSystem fs = FileSystem.get(new URI(hdfsURI), conf);
RemoteIterator<LocatedFileStatus> result = fs.listFiles(new Path(path), recursive);
log.info("<< listFile, result is {}", result);
return result;
} /**
* 查找hdfs指定路径下的文件和文件夹
*
* @param hdfsURI
* @param path
* @param conf
*/
public static FileStatus[] listFileAndFolder(String hdfsURI, String path, Configuration conf) throws Exception {
log.info(">> listFileAndFolder, path is {}", path);
FileSystem fs = FileSystem.get(new URI(hdfsURI), conf);
FileStatus[] result = fs.listStatus(new Path(path));
log.info("<< listFileAndFolder, result is {}", result.toString());
return result;
// 方法二
} /**
* 创建文件夹
*
* @param hdfsURI
* @param path
* @param conf
* @throws Exception
*/
public static void mkDir(String hdfsURI, String path, Configuration conf) throws Exception {
log.info(">> mkDir, path is {}", path);
FileSystem fs = FileSystem.get(new URI(hdfsURI), conf);
boolean result = fs.mkdirs(new Path(path));
if (result) {
log.info("<< mkDir {} success", path);
} else {
log.error("<< mkDir {} error", path);
}
} /**
* 删除指定路径
*
* @param hdfsURI
* @param path
* @param conf
* @throws IOException
*/
public static void delete(String hdfsURI, String path, Configuration conf) throws IOException {
log.info(">> delete, path is {}", path);
conf.set("fs.defaultFS", hdfsURI);
FileSystem fs = FileSystem.get(conf);
if (!fs.exists(new Path(path))) {
log.info("<< delete {} error, path no exists", path);
return;
}
boolean result = fs.delete(new Path(path), true);
if (result) {
log.info("<< delete {} success", path);
} else {
log.error("<< delete {} error", path);
}
} /**
* 从hdfs上面下载
*
* @param hdfsURI
* @param srcPath
* @param descPath
* @param conf
* @throws Exception
*/
public static void downloadFile(String hdfsURI, String srcPath, String descPath, Configuration conf) throws Exception {
log.info(">> downloadFile, srcPath is {}, descPath is {}", srcPath, descPath);
FileSystem fs = FileSystem.get(new URI(hdfsURI), conf);
FSDataInputStream in = fs.open(new Path(srcPath));
OutputStream out = new FileOutputStream(new File(descPath));
IOUtils.copyBytes(in, out, conf);
} public static void catFile(String hdfsURI, String path, Configuration conf) throws Exception {
log.info(">> catFile, path is {}", path);
FileSystem fs = FileSystem.get(URI.create(hdfsURI), conf);
FSDataInputStream in = fs.open(new Path(path));
try {
IOUtils.copyBytes(in, System.out, 4096, false);
} finally {
IOUtils.closeStream(in);
fs.close();
}
} }
Java代码操作HDFS的更多相关文章
- Java代码操作HDFS测试类
1.Java代码操作HDFS需要用到Jar包和Java类 Jar包: hadoop-common-2.6.0.jar和hadoop-hdfs-2.6.0.jar Java类: java.net.URL ...
- Java代码操作HDFS(在/user/root/下面創建目錄)
1.创建HDFS目录并打成jar包 package Hdfs; import java.io.IOException; import java.net.URI; import org.apache.h ...
- 大数据之路week07--day01(HDFS学习,Java代码操作HDFS,将HDFS文件内容存入到Mysql)
一.HDFS概述 数据量越来越多,在一个操作系统管辖的范围存不下了,那么就分配到更多的操作系统管理的磁盘中,但是不方便管理和维护,因此迫切需要一种系统来管理多台机器上的文件,这就是分布式文件管理系统 ...
- 使用Java API操作HDFS文件系统
使用Junit封装HFDS import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org ...
- 使用java代码操作Redis
1导入pom.xml依赖 <dependency> <groupId>redis.clients</groupId> <artifactId>jedis ...
- java代码操作Redis
1.导入需要的pom依赖 <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEn ...
- Java代码操作zookeeper
.personSunflowerP { background: rgba(51, 153, 0, 0.66); border-bottom: 1px solid rgba(0, 102, 0, 1); ...
- Hadoop Java API操作HDFS文件系统(Mac)
1.下载Hadoop的压缩包 tar.gz https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/stable/ 2.关联jar包 在 ...
- 使用Java Api 操作HDFS
如题 我就是一个标题党 就是使用JavaApi操作HDFS,使用的是MAVEN,操作的环境是Linux 首先要配置好Maven环境,我使用的是已经有的仓库,如果你下载的jar包 速度慢,可以改变Ma ...
随机推荐
- JS检测浏览器是否最大化
function isFullScreen (){ if( window.outerHeight === screen.availHeight ){ i ...
- python小白——进阶之路——day3天-———容器类型数据+Number类型强制类型转换
-->Number 部分 int : 整型 浮点型 布尔类型 纯数字字符串 float: 整型 浮点型 布尔类型 纯数字字符串 complex: 整型 浮点型 布 ...
- jenkins的安装部署
jenkins安装 参考连接: https://wiki.jenkins.io/display/JENKINS/Installing+Jenkins+on+Red+Hat+distributions ...
- 1 Introduction
1. Introduction 1.1. License Flowable is distributed under the Apache V2 license. 1.2. Download http ...
- Python之find命令中的位置的算法
find("s",a,b) #s表示的是一个子序列,a表示的是检索的起始位置,b表示的是检索的终止位置,ab可有可无 test = "abcdefgh" ...
- js实现小功能 动态赋值
- iOS 简易型标签的实现(UICollectionView)
https://blog.csdn.net/sinat_39362502/article/details/80900984 2018年07月03日 16:49:05 Recorder_MZou 阅读数 ...
- nginx 编译参数详解(运维必看--转)
nginx参数: –prefix= 指向安装目录 –sbin-path 指向(执行)程序文件(nginx) –conf-path= 指向配置文件(nginx.conf) –error-log-path ...
- Shiro限制登录尝试次数
/** * 认证信息.(身份验证) : Authentication 是用来验证用户身份 * * @param token * @return * @throws AuthenticationExce ...
- DRF初识与序列化
一.Django的序列化方法 1.为什么要用序列化组件 做前后端分离的项目,我们前后端数据交互一般都选择JSON,JSON是一个轻量级的数据交互格式.那么我们给前端数据的时候都要转成json格式,那就 ...