hdfs对文件的增删改查

源代码：

pom.xml:

<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0"

         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"

         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

    <modelVersion>4.0.0</modelVersion>

    <groupId>cn.idcast</groupId>

    <artifactId>hdfs_api_demo</artifactId>

    <version>1.0-SNAPSHOT</version>

    <properties>

        <maven.compiler.source>8</maven.compiler.source>

        <maven.compiler.target>8</maven.compiler.target>

    </properties>

    <dependencies>

        <dependency>

            <groupId>org.apache.hadoop</groupId>

            <artifactId>hadoop-common</artifactId>

            <version>3.1.4</version>

        </dependency>

        <dependency>

            <groupId>org.apache.hadoop</groupId>

            <artifactId>hadoop-hdfs</artifactId>

            <version>3.1.4</version>

        </dependency>

        <dependency>

            <groupId>org.apache.hadoop</groupId>

            <artifactId>hadoop-client</artifactId>

            <version>3.1.4</version>

        </dependency>

        <dependency>

            <groupId>org.apache.hadoop</groupId>

            <artifactId>hadoop-mapreduce-client-core</artifactId>

            <version>3.1.4</version>

        </dependency>

        <dependency>

            <groupId>junit</groupId>

            <artifactId>junit</artifactId>

            <version>RELEASE</version>

        </dependency>

    </dependencies>

    <build>

        <plugins>

            <!--java编译插件-->

            <plugin>

                <groupId>org.apache.maven.plugins</groupId>

                <artifactId>maven-compiler-plugin</artifactId>

                <version>3.1</version>

                <configuration>

                    <source>1.8</source>

                    <target>1.8</target>

                    <encoding>UTF-8</encoding>

                </configuration>

            </plugin>

            <plugin>

                <groupId>org.apache.maven.plugins</groupId>

                <artifactId>maven-shade-plugin</artifactId>

                <version>2.4.3</version>

                <executions>

                    <execution>

                        <phase>package</phase>

                        <goals>

                            <goal>shade</goal>

                        </goals>

                        <configuration>

                            <minimizeJar>true</minimizeJar>

                        </configuration>

                    </execution>

                </executions>

            </plugin>

        </plugins>

    </build>

</project>

java:

package cn.idcast.hdfs_api;

import com.jcraft.jsch.IO;

import org.apache.commons.io.IOUtils;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.*;

import org.apache.kerby.util.IOUtil;

import org.apache.log4j.BasicConfigurator;

import org.junit.Test;

import java.io.FileOutputStream;

import java.io.IOException;

import java.net.URI;

import java.net.URISyntaxException;

public class HdfsApiDemo {

    //获取FileSystem--方法1

    @Test

    public void getFileSystem1() throws IOException {

        Configuration configuration=new Configuration();

        configuration.set("fs.defaultFS","hdfs://node1:8020");

        FileSystem fileSystem = FileSystem.get(configuration);

        System.out.println(fileSystem.toString());

    }

    //获取FileSystem--方法2

    @Test

    public void getFileSystem2() throws IOException, URISyntaxException {

        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration());

        System.out.println(fileSystem);

    }

    //获取FileSystem--方法3

    @Test

    public void getFileSystem3() throws IOException {

        Configuration configuration=new Configuration();

        configuration.set("fs.defaultFS","hdfs://node1:8020");

        FileSystem fileSystem = FileSystem.newInstance(configuration);

        System.out.println(fileSystem.toString());

    }

    //获取FileSystem--方法4

    @Test

    public void getFileSystem4() throws IOException, URISyntaxException {

        FileSystem fileSystem = FileSystem.newInstance(new URI("hdfs://node1:8020"),new Configuration());

        System.out.println(fileSystem.toString());

    }

    //遍历所有文件

    @Test

    public void listMyFiles() throws Exception, URISyntaxException {

       //1:获取FileSystem实例

        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");

        //2:调用方法listFiles 获取 /  目录下所有文件信息

        RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fileSystem.listFiles(new Path("/"), true);

       //遍历迭代器

        while(locatedFileStatusRemoteIterator.hasNext()){

            LocatedFileStatus next = locatedFileStatusRemoteIterator.next();

            System.out.println(next.getPath().toString());

        }

        fileSystem.close();

    }

    //创建文件目录

    @Test

    public void mkdirs() throws IOException, URISyntaxException, InterruptedException {

        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");

        boolean mkdirs = fileSystem.mkdirs(new Path("/hello/mydir/test"));

        System.out.println(mkdirs);

        fileSystem.close();

    }

    //创建文件夹

    @Test

    public void mkdirsTest() throws IOException, URISyntaxException, InterruptedException {

        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");

        fileSystem.create(new Path("/hello/mydir/test/a.txt"));

       // System.out.println(mkdirs);

        //fileSystem.close();

    }

    //实现文件的下载

    @Test

    public void downloadFile() throws URISyntaxException, IOException, InterruptedException {

        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");

        FSDataInputStream inputStream = fileSystem.open(new Path("/hello/mydir/test/a.txt"));

        FileOutputStream outputStream = new FileOutputStream("D://a.txt");

        IOUtils.copy(inputStream,outputStream);

        IOUtils.closeQuietly(inputStream);

        IOUtils.closeQuietly(outputStream);

        fileSystem.close();

    }

    //实现文件的下载--简单方法

    @Test

    public void downloadFile2() throws URISyntaxException, IOException, InterruptedException {

        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");

        fileSystem.copyToLocalFile(new Path("/hello/mydir/test/a.txt"),new Path("D://a.txt"));

        fileSystem.close();

    }

    //实现文件的上传

    @Test

    public void uploadFile() throws URISyntaxException, IOException, InterruptedException {

        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");

        fileSystem.copyFromLocalFile(new Path("D://hdfs-site.txt"),new Path("/"));

        fileSystem.close();

    }

    //小文件的合并

    @Test

    public void mergeFile() throws URISyntaxException, IOException, InterruptedException {

        //1:获取FileSystem(分布式文件系统)

        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");

        //2:获取hdfs大文件的输出流

        FSDataOutputStream outputStream = fileSystem.create(new Path("/big_txt.txt"));

        //3:获取一个本地文件系统

        LocalFileSystem localFileSystem = FileSystem.getLocal(new Configuration());

        //4:获取本地文件夹下所有文件的详情

        FileStatus[] fileStatuses = localFileSystem.listStatus(new Path("D://input"));

        //5:遍历每个文件，获取每个文件的输入流

        for (FileStatus fileStatus : fileStatuses) {

            FSDataInputStream inputStream = localFileSystem.open(fileStatus.getPath());

            //6:将小文件的数据复制到文件

            IOUtils.copy(inputStream,outputStream);

            IOUtils.closeQuietly(inputStream);

        }

        //7:关闭流

        IOUtils.closeQuietly(outputStream);

        localFileSystem.close();

        fileSystem.close();

    }

}

hdfs对文件的增删改查的更多相关文章

Hadoop基础-HDFS的API实现增删改查
Hadoop基础-HDFS的API实现增删改查作者:尹正杰版权声明:原创作品,谢绝转载!否则将追究法律责任. 本篇博客开发IDE使用的是Idea,如果没有安装Idea软件的可以去下载安装,如何安装 ...
MyBatis学习（二）、SQL语句映射文件(2)增删改查、参数、缓存
二.SQL语句映射文件(2)增删改查.参数.缓存 2.2 select 一个select 元素非常简单.例如:  <select id=" ...
java对xml文件做增删改查------摘录
java对xml文件做增删改查 package com.wss; import java.io.File;import java.util.ArrayList;import java.util.Lis ...
MyBatis学习之二、SQL语句映射文件(2)增删改查、参数、缓存
目录(?)[-] 二SQL语句映射文件2增删改查参数缓存 select insert updatedelete sql parameters 基本类型参数 Java实体类型参数 Map参数多参数的实 ...
【练习】Python第四次：实现对文件的增删改查
一,实现对文件的增删改查 (一),三级菜单的处理结构及退出技巧:使用TAG标记 tag=True while tag: print('leve1') choice=input("level1 ...
基于SpringMVC的文件（增删改查）上传、下载、更新、删除
一.项目背景摘要:最近一直在忙着项目的事,3个项目过去了,发现有一个共同的业务,那就是附件的处理,附件包括各种文档,当然还有图片等特殊文件,由于时间的关系,每次都是匆匆忙忙的搞定上线,称这项目的空档 ...
Python文件操作-文件的增删改查
需求:对文件进行增删改查由于时间原因,本次代码没有增加任何注释,如有疑问,请联系编辑者:闫龙其实我也是醉了,看着这些个代码,我脑袋也特么大了,没办法,大神说了,不让用新知识,只可以使用学过的,所以 ...
使用dom4j对xml文件进行增删改查
1.使用dom4j技术对dom_demo.xml进行增删改查首选要下载dom4j的jar包在官网上找不到,网上搜索了一下在这个链接:http://sourceforge.net/projects/ ...
Python 模拟SQL对文件进行增删改查
#!/usr/bin/env python # _*_ coding:UTF-8 _*_ # __auth__: Dalhhin # Python 3.5.2,Pycharm 2016.3.2 # 2 ...

随机推荐

LeetCode-078-子集
子集题目描述:给你一个整数数组 nums ,数组中的元素互不相同 .返回该数组所有可能的子集(幂集). 解集不能包含重复的子集.你可以按任意顺序返回解集. 示例说明请见LeetCode官网 ...
LeetCode-039-组合总和
组合总和题目描述:给定一个无重复元素的数组 candidates 和一个目标数 target ,找出 candidates 中所有可以使数字和为 target 的组合. candidates 中的数 ...
2. Java入门
2.Java入门 2.1.安装开发环境卸载JDK 删除Java的安装目录删除JAVA_HOME 删除path下关于Java的目录 java -version 安装JDK 百度搜索JDK8,找到下载 ...
thinkphp 框架自带搜索+分页+搜索标红
..........控制器方法 public function index() { //接受搜索关键字 $word=input('word'); $where=[]; if (!empty($word ...
mybatis 日志实现学习总结03
日志 1.为什么要使用日志使用日志能对项目: 调试:日志便于记录程序在之前的运行结果错误定位数据分析:日志中蕴含了大量的用户数据,包括点击行为,兴趣偏好等,对公司下一步的战略方向有一定指引作用. ...
【Linux基础】ps命令详解
PS命令介绍 Linux中的ps命令是Process Status的缩写.ps命令用来列出系统中当前运行的那些进程.ps命令列出的是当前那些进程的快照,就是执行ps命令的那个时刻的那些进程,如果想要动 ...
ActiveMQ代码-01
p2p模式生产者 package com.activemq.activemqdemo.p2p; import org.apache.activemq.ActiveMQConnectionFactor ...
JSON.parse()和JSON.stringfy()区别
JSON.parse() 用于从一个json格式字符串解析出json类型的数据,如: 注意事项:json格式字符串必须是写在一排的,且括号外面用单引号,里面的每一个字符串用双引号 JSON.strin ...
sharding-jdbc教程看这一篇就够了
Sharding-JDBC是ShardingSphere的第一个产品,也是ShardingSphere的前身. 它定位为轻量级Java框架,在Java的JDBC层提供的额外服务.它使用客户端直连数 ...
浅谈systemd原理和应用
多不说,直接上代码(可谓配置): [Unit] Description=demo app After=network-is-online.target [Service] Type=Simple Ex ...

hdfs对文件的增删改查

hdfs对文件的增删改查的更多相关文章

随机推荐

热门专题