Hadoop文件系统支持释疑之S3
一、引言
Hadoop版本提供了对多种文件系统的支持,但是这些文件系统是以何种方式实现的,其实现原理是什么以前并没有深究过。今天正好有人咨询我这个问题:Hadoop对S3的支持原理是什么?特此总结一下。Hadoop支持的文件系统包括:
文件系统 URI前缀 hadoop的具体实现类
Local file fs.LocalFileSystem
HDFS hdfs hdfs.DistributedFileSystem
HFTP hftp hdfs.HftpFileSystem
HSFTP hsftp hdfs.HsftpFileSystem
HAR har fs.HarFileSystem
KFS kfs fs.kfs.KosmosFileSystem
FTP ftp fs.ftp.FTPFileSystem
S3 (native) s3n fs.s3native.NativeS3FileSystem
S3 (blockbased) s3 fs.s3.S3FileSystem
二、争议观点
1.Hadoop对S3文件系统的支持是通过自己实现S3文件系统来做的吗?
2.Hadoop对S3文件系统的支持是通过S3文件系统接口,实现的对S3文件系统的整合?
三、源码解析
package org.apache.hadoop.fs.s3; import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3.INode.FileType;
import org.jets3t.service.S3Service;
import org.jets3t.service.S3ServiceException;
import org.jets3t.service.impl.rest.httpclient.RestS3Service;
import org.jets3t.service.model.S3Bucket;
import org.jets3t.service.model.S3Object;
import org.jets3t.service.security.AWSCredentials; class Jets3tFileSystemStore implements FileSystemStore { private static final String FILE_SYSTEM_NAME = "fs";
private static final String FILE_SYSTEM_VALUE = "Hadoop"; private static final String FILE_SYSTEM_TYPE_NAME = "fs-type";
private static final String FILE_SYSTEM_TYPE_VALUE = "block"; private static final String FILE_SYSTEM_VERSION_NAME = "fs-version";
private static final String FILE_SYSTEM_VERSION_VALUE = "1"; private static final Map<String, String> METADATA =
new HashMap<String, String>(); static {
METADATA.put(FILE_SYSTEM_NAME, FILE_SYSTEM_VALUE);
METADATA.put(FILE_SYSTEM_TYPE_NAME, FILE_SYSTEM_TYPE_VALUE);
METADATA.put(FILE_SYSTEM_VERSION_NAME, FILE_SYSTEM_VERSION_VALUE);
} private static final String PATH_DELIMITER = Path.SEPARATOR;
private static final String BLOCK_PREFIX = "block_"; private Configuration conf; private S3Service s3Service; private S3Bucket bucket; private int bufferSize; public void initialize(URI uri, Configuration conf) throws IOException { this.conf = conf; S3Credentials s3Credentials = new S3Credentials();
s3Credentials.initialize(uri, conf);
try {
AWSCredentials awsCredentials =
new AWSCredentials(s3Credentials.getAccessKey(),
s3Credentials.getSecretAccessKey());
this.s3Service = new RestS3Service(awsCredentials);
} catch (S3ServiceException e) {
if (e.getCause() instanceof IOException) {
throw (IOException) e.getCause();
}
throw new S3Exception(e);
}
bucket = new S3Bucket(uri.getHost()); this.bufferSize = conf.getInt("io.file.buffer.size", 4096);
} public String getVersion() throws IOException {
return FILE_SYSTEM_VERSION_VALUE;
} private void delete(String key) throws IOException {
try {
s3Service.deleteObject(bucket, key);
} catch (S3ServiceException e) {
if (e.getCause() instanceof IOException) {
throw (IOException) e.getCause();
}
throw new S3Exception(e);
}
} public void deleteINode(Path path) throws IOException {
delete(pathToKey(path));
} public void deleteBlock(Block block) throws IOException {
delete(blockToKey(block));
} public boolean inodeExists(Path path) throws IOException {
InputStream in = get(pathToKey(path), true);
if (in == null) {
return false;
}
in.close();
return true;
} public boolean blockExists(long blockId) throws IOException {
InputStream in = get(blockToKey(blockId), false);
if (in == null) {
return false;
}
in.close();
return true;
} private InputStream get(String key, boolean checkMetadata)
throws IOException { try {
S3Object object = s3Service.getObject(bucket, key);
if (checkMetadata) {
checkMetadata(object);
}
return object.getDataInputStream();
} catch (S3ServiceException e) {
if ("NoSuchKey".equals(e.getS3ErrorCode())) {
return null;
}
if (e.getCause() instanceof IOException) {
throw (IOException) e.getCause();
}
throw new S3Exception(e);
}
} private InputStream get(String key, long byteRangeStart) throws IOException {
try {
S3Object object = s3Service.getObject(bucket, key, null, null, null,
null, byteRangeStart, null);
return object.getDataInputStream();
} catch (S3ServiceException e) {
if ("NoSuchKey".equals(e.getS3ErrorCode())) {
return null;
}
if (e.getCause() instanceof IOException) {
throw (IOException) e.getCause();
}
throw new S3Exception(e);
}
} private void checkMetadata(S3Object object) throws S3FileSystemException,
S3ServiceException { String name = (String) object.getMetadata(FILE_SYSTEM_NAME);
if (!FILE_SYSTEM_VALUE.equals(name)) {
throw new S3FileSystemException("Not a Hadoop S3 file.");
}
String type = (String) object.getMetadata(FILE_SYSTEM_TYPE_NAME);
if (!FILE_SYSTEM_TYPE_VALUE.equals(type)) {
throw new S3FileSystemException("Not a block file.");
}
String dataVersion = (String) object.getMetadata(FILE_SYSTEM_VERSION_NAME);
if (!FILE_SYSTEM_VERSION_VALUE.equals(dataVersion)) {
throw new VersionMismatchException(FILE_SYSTEM_VERSION_VALUE,
dataVersion);
}
} public INode retrieveINode(Path path) throws IOException {
return INode.deserialize(get(pathToKey(path), true));
} public File retrieveBlock(Block block, long byteRangeStart)
throws IOException {
File fileBlock = null;
InputStream in = null;
OutputStream out = null;
try {
fileBlock = newBackupFile();
in = get(blockToKey(block), byteRangeStart);
out = new BufferedOutputStream(new FileOutputStream(fileBlock));
byte[] buf = new byte[bufferSize];
int numRead;
while ((numRead = in.read(buf)) >= 0) {
out.write(buf, 0, numRead);
}
return fileBlock;
} catch (IOException e) {
// close output stream to file then delete file
closeQuietly(out);
out = null; // to prevent a second close
if (fileBlock != null) {
fileBlock.delete();
}
throw e;
} finally {
closeQuietly(out);
closeQuietly(in);
}
} private File newBackupFile() throws IOException {
File dir = new File(conf.get("fs.s3.buffer.dir"));
if (!dir.exists() && !dir.mkdirs()) {
throw new IOException("Cannot create S3 buffer directory: " + dir);
}
File result = File.createTempFile("input-", ".tmp", dir);
result.deleteOnExit();
return result;
} public Set<Path> listSubPaths(Path path) throws IOException {
try {
String prefix = pathToKey(path);
if (!prefix.endsWith(PATH_DELIMITER)) {
prefix += PATH_DELIMITER;
}
S3Object[] objects = s3Service.listObjects(bucket, prefix, PATH_DELIMITER);
Set<Path> prefixes = new TreeSet<Path>();
for (int i = 0; i < objects.length; i++) {
prefixes.add(keyToPath(objects[i].getKey()));
}
prefixes.remove(path);
return prefixes;
} catch (S3ServiceException e) {
if (e.getCause() instanceof IOException) {
throw (IOException) e.getCause();
}
throw new S3Exception(e);
}
} public Set<Path> listDeepSubPaths(Path path) throws IOException {
try {
String prefix = pathToKey(path);
if (!prefix.endsWith(PATH_DELIMITER)) {
prefix += PATH_DELIMITER;
}
S3Object[] objects = s3Service.listObjects(bucket, prefix, null);
Set<Path> prefixes = new TreeSet<Path>();
for (int i = 0; i < objects.length; i++) {
prefixes.add(keyToPath(objects[i].getKey()));
}
prefixes.remove(path);
return prefixes;
} catch (S3ServiceException e) {
if (e.getCause() instanceof IOException) {
throw (IOException) e.getCause();
}
throw new S3Exception(e);
}
} private void put(String key, InputStream in, long length, boolean storeMetadata)
throws IOException { try {
S3Object object = new S3Object(key);
object.setDataInputStream(in);
object.setContentType("binary/octet-stream");
object.setContentLength(length);
if (storeMetadata) {
object.addAllMetadata(METADATA);
}
s3Service.putObject(bucket, object);
} catch (S3ServiceException e) {
if (e.getCause() instanceof IOException) {
throw (IOException) e.getCause();
}
throw new S3Exception(e);
}
} public void storeINode(Path path, INode inode) throws IOException {
put(pathToKey(path), inode.serialize(), inode.getSerializedLength(), true);
} public void storeBlock(Block block, File file) throws IOException {
BufferedInputStream in = null;
try {
in = new BufferedInputStream(new FileInputStream(file));
put(blockToKey(block), in, block.getLength(), false);
} finally {
closeQuietly(in);
}
} private void closeQuietly(Closeable closeable) {
if (closeable != null) {
try {
closeable.close();
} catch (IOException e) {
// ignore
}
}
} private String pathToKey(Path path) {
if (!path.isAbsolute()) {
throw new IllegalArgumentException("Path must be absolute: " + path);
}
return path.toUri().getPath();
} private Path keyToPath(String key) {
return new Path(key);
} private String blockToKey(long blockId) {
return BLOCK_PREFIX + blockId;
} private String blockToKey(Block block) {
return blockToKey(block.getId());
} public void purge() throws IOException {
try {
S3Object[] objects = s3Service.listObjects(bucket);
for (int i = 0; i < objects.length; i++) {
s3Service.deleteObject(bucket, objects[i].getKey());
}
} catch (S3ServiceException e) {
if (e.getCause() instanceof IOException) {
throw (IOException) e.getCause();
}
throw new S3Exception(e);
}
} public void dump() throws IOException {
StringBuilder sb = new StringBuilder("S3 Filesystem, ");
sb.append(bucket.getName()).append("\n");
try {
S3Object[] objects = s3Service.listObjects(bucket, PATH_DELIMITER, null);
for (int i = 0; i < objects.length; i++) {
Path path = keyToPath(objects[i].getKey());
sb.append(path).append("\n");
INode m = retrieveINode(path);
sb.append("\t").append(m.getFileType()).append("\n");
if (m.getFileType() == FileType.DIRECTORY) {
continue;
}
for (int j = 0; j < m.getBlocks().length; j++) {
sb.append("\t").append(m.getBlocks()[j]).append("\n");
}
}
} catch (S3ServiceException e) {
if (e.getCause() instanceof IOException) {
throw (IOException) e.getCause();
}
throw new S3Exception(e);
}
System.out.println(sb);
} }
四、有图有真相
五、结论
Hadoop对S3文件系统的支持通过S3文件系统接口,实现的对S3文件系统的整合。有感兴趣的可以自行参照源码。
Hadoop文件系统支持释疑之S3的更多相关文章
- hadoop学习笔记:hadoop文件系统浅析
1.什么是分布式文件系统? 管理网络中跨多台计算机存储的文件系统称为分布式文件系统. 2.为什么需要分布式文件系统了? 原因很简单,当数据集的大小超过一台独立物理计算机的存储能力时候,就有必要对它进行 ...
- hadoop文件系统浅析
1.什么是分布式文件系统? 管理网络中跨多台计算机存储的文件系统称为分布式文件系统. 2.为什么需要分布式文件系统了? 原因很简单,当数据集的大小超过一台独立物理计算机的存储能力时候,就有必要对它进行 ...
- hadoop2.5.2学习及实践笔记(六)—— Hadoop文件系统及其java接口
文件系统概述 org.apache.hadoop.fs.FileSystem是hadoop的抽象文件系统,为不同的数据访问提供了统一的接口,并提供了大量具体文件系统的实现,满足hadoop上各种数据访 ...
- [转帖]hadoop学习笔记:hadoop文件系统浅析
hadoop学习笔记:hadoop文件系统浅析 https://www.cnblogs.com/sharpxiajun/archive/2013/06/15/3137765.html 1.什么是分布式 ...
- hadoop文件系统与I/O流
本文地址:http://www.cnblogs.com/archimedes/p/hadoop-filesystem-io.html,转载请注明源地址. hadoop借鉴了Linux虚拟文件系统的概念 ...
- hadoop文件系统FileSystem详解 转自http://hi.baidu.com/270460591/item/0efacd8accb7a1d7ef083d05
Hadoop文件系统 基本的文件系统命令操作, 通过hadoop fs -help可以获取所有的命令的详细帮助文件. Java抽象类org.apache.hadoop.fs.FileSystem定义了 ...
- 云计算分布式大数据Hadoop实战高手之路第八讲Hadoop图文训练课程:Hadoop文件系统的操作实战
本讲通过实验的方式讲解Hadoop文件系统的操作. “云计算分布式大数据Hadoop实战高手之路”之完整发布目录 云计算分布式大数据实战技术Hadoop交流群:312494188,每天都会在群中发布云 ...
- Java API实现Hadoop文件系统增删改查
Java API实现Hadoop文件系统增删改查 Hadoop文件系统可以通过shell命令hadoop fs -xx进行操作,同时也提供了Java编程接口 maven配置 <project x ...
- Hadoop学习笔记(3) Hadoop文件系统二
1 查询文件系统 (1) 文件元数据:FileStatus,该类封装了文件系统中文件和目录的元数据,包括文件长度.块大小.备份.修改时间.所有者以及版权信息.FileSystem的getFileSta ...
随机推荐
- npm install mysql --save-dev
npm install X: 会把X包安装到node_modules目录中 不会修改package.json 之后运行npm install命令时,不会自动安装X npm install X –sav ...
- DBGridEh 在粘贴中文时出现乱码和错位 100zhx_888]
http://www.fx114.net/qa-29-3439.aspx 回复于: -- :: unit DBGridEh; 把下面这个函数替换成这样 procedure TDBGridInplace ...
- C语言strrev()函数:字符串逆置(倒序、逆序)
头文件:#include<string.h> strrev()函数将字符串逆置,其原型为: char *strrev(char *str); [参数说明]str为要逆置的字符串. s ...
- 【转】DelphiXE10.2.3——跨平台生成验证码图片
原文地址 Java.PHP.C#等很容易在网上找到生成验证码图片的代码,Delphi却寥寥无几,昨天花了一整天时间,做了个跨平台的验证码,可以用在C/S和B/S端,支持Windows.Linux.An ...
- windows开机自动登录
控制台输入control userpasswords2 ,去掉下图中的√,输入登陆所用用户名密码即可.
- iOS-----使用CoreLocation定位
使用CoreLocation定位 CoreLocation框架 (CoreLocation.framework)可用于定位设备当前经纬度, 通过该框架, 应用程序可通过附近的蜂窝基站\WIFI信号 或 ...
- SIM800C SIM卡唯一标识符ICCID
/******************************************************************************* * SIM800C SIM卡唯一标识符 ...
- OK335xS Linux Qt make: icpc: Command not found
OK335xS Linux Qt make: icpc: Command not found 一.出错现象: make: icpc: Command not found make: *** [main ...
- suggest parentheses around comparison in operand of &|
error discription: .:: warning: suggest parentheses around comparison in operand of ‘&’ [-Wparen ...
- xdoj 1044---炸红花 (话说 小时候经常玩这个被虐。。。。qwq)
// 我真的好笨 只会枚举 话说那个ac的370b到底是怎么做的 /(ㄒoㄒ)/~~ #include <iostream> #include <algorithm> usin ...