用java api读取HDFS文件

import java.io.IOException;

import java.io.InputStream;

import java.security.PrivilegedExceptionAction;

import java.text.SimpleDateFormat;

import java.util.concurrent.ConcurrentHashMap;

import java.util.concurrent.ConcurrentMap;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileStatus;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.FsStatus;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.security.UserGroupInformation;

import org.springframework.stereotype.Controller;

import org.springframework.web.bind.annotation.RequestMapping;

import org.springframework.web.bind.annotation.RequestMethod;

import org.springframework.web.bind.annotation.ResponseBody;

import com.alibaba.fastjson.JSON;

import com.alibaba.fastjson.JSONObject;

import bean.TableStatistic;

@Controller

@RequestMapping("/dfview")

public class DataFrameViewController extends BaseController {

    private ConcurrentMap<String, UserGroupInformation> cache = new ConcurrentHashMap<String, UserGroupInformation>();

    private ConcurrentMap<String, FileSystem> fileSystemCache = new ConcurrentHashMap<String, FileSystem>();

    private Configuration hadoopConf = new Configuration();

    private static final String HDFS_JSON_NAME = "jsonObj";

    @RequestMapping(value = "/getDFviewOfColumn", method = { RequestMethod.GET })

    @ResponseBody

    public TableStatistic getDFviewOfTable(String tableName)

            throws Exception {

        String user = "bi";

        String dirpath = "/user/cbt/datax/temp_transfer/zzzdes";

        Path homePath = new Path(dirpath);

        FileSystem fs = this.createFileSystem(user);

        FileStatus[] stats = fs.listStatus(homePath);

        StringBuffer txtContent = new StringBuffer();

        for (int i = 0; i < stats.length; ++i) {

            if (stats[i].isFile()) {

                FileStatus file = stats[i];

                if( HDFS_JSON_NAME.equalsIgnoreCase(file.getPath().getName())){

                    InputStream in = fs.open(file.getPath());

                    byte[] b = new byte[1];

                    while (in.read(b) != -1)

                    {

                    // 字符串拼接

                    txtContent.append(new String(b));

                    }

                    in.close();

                    break;

                }

            }

        }

        TableStatistic ts = JSON.parseObject(txtContent.toString(), TableStatistic.class);

        return ts;

    }

    public static void main(String[] args) throws Exception {

        DataFrameViewController aaa = new DataFrameViewController();

        FileSystem fs = aaa.createFileSystem("bi");

        Path homePath = new Path("/user/cbt/datax/temp_transfer/zzzdes");

        System.out.println("***********************************");

        FileStatus[] stats = fs.listStatus(homePath);

        for (int i = 0; i < stats.length; ++i) {

            if (stats[i].isFile()) {

                FileStatus file = stats[i];

                StringBuffer txtContent = new StringBuffer();

                if( "jsonObj".equalsIgnoreCase(file.getPath().getName())){

                    InputStream in = fs.open(file.getPath());

                    byte[] b = new byte[1];

                    while (in.read(b) != -1)

                    {

                    // 字符串拼接

                    txtContent.append(new String(b));

                    }

//                    IOUtils.copyBytes(fs.open(file.getPath()), System.out, 4096,false);

                    in.close();

//                    fs.close();

                }

                System.out.print(txtContent.toString());

                System.out

                        .println("************************************************");

                JSONObject jb = JSON.parseObject(txtContent.toString());

                System.out.println("********!!!!! : "  + jb.get("colUnique"));

                TableStatistic ts = JSON.parseObject(txtContent.toString(), TableStatistic.class);

                System.out.println("********!!!!! : "  + ts.getColUnique().toString());

            } else if (stats[i].isDirectory()) {

                System.out.println(stats[i].getPath().toString());

            } else if (stats[i].isSymlink()) {

                System.out.println("&&&&&&&&" + stats[i].getPath().toString());

            }

        }

        FsStatus fsStatus = fs.getStatus(homePath);

    }

    public FileSystem createFileSystem(String user) throws Exception {

        final Configuration conf = loadHadoopConf();

        conf.set("hadoop.job.ugi", user);

//        conf.set("HADOOP_USER_NAME", user);

        if (fileSystemCache.get(user) != null) {

            return fileSystemCache.get(user);

        }

        UserGroupInformation ugi = getProxyUser(user);

        FileSystem fs = ugi.doAs(new PrivilegedExceptionAction<FileSystem>() {

            public FileSystem run() throws Exception {

                return FileSystem.get(conf);

            }

        });

        fileSystemCache.put(user, fs);

        return fs;

    }

    public static final ThreadLocal<SimpleDateFormat> appDateFormat = new ThreadLocal<SimpleDateFormat>() {

        @Override

        public SimpleDateFormat initialValue() {

            SimpleDateFormat dateformat = new java.text.SimpleDateFormat(

                    "yyyy-MM-dd HH:mm:ss");

            return dateformat;

        }

    };

    private static final String[] HADOOP_CONF_FILES = { "core-site.xml",

            "hdfs-site.xml" };

    private Configuration loadHadoopConf() {

        if (hadoopConf != null) {

            return hadoopConf;

        }

        Configuration conf = new Configuration();

        for (String fileName : HADOOP_CONF_FILES) {

            try {

                InputStream inputStream = DataFrameViewController.class

                        .getClassLoader().getResourceAsStream(fileName);

                conf.addResource(inputStream);

            } catch (Exception ex) {

            }

        }

        return conf;

    }

    public void destroy() {

        for (UserGroupInformation ugi : cache.values()) {

            try {

                FileSystem.closeAllForUGI(ugi);

            } catch (IOException ioe) {

//                 Logger.error("Exception occurred while closing filesystems for "

//                 + ugi.getUserName(), ioe);

            }

        }

        cache.clear();

    }

    private UserGroupInformation getProxyUser(String user) throws IOException {

        cache.putIfAbsent(user, UserGroupInformation.createRemoteUser(user));

        return cache.get(user);

    }

}

用java api读取HDFS文件的更多相关文章

java Api 读取HDFS文件内容
package dao; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import java ...
使用JAVA API读取HDFS的文件数据出现乱码的解决方案
使用JAVA api读取HDFS文件乱码踩坑想写一个读取HFDS上的部分文件数据做预览的接口,根据网上的博客实现后,发现有时读取信息会出现乱码,例如读取一个csv时,字符串之间被逗号分割英文字符串 ...
JAVA API 实现hdfs文件操作
java api 实现hdfs 文件操作会出现错误提示: Permission denied: user=hp, access=WRITE, inode="/":hdfs:supe ...
Java API 读取HDFS的单文件
HDFS上的单文件: -bash-3.2$ hadoop fs -ls /user/pms/ouyangyewei/data/input/combineorder/repeat_rec_categor ...
Spark：java api读取hdfs目录下多个文件
需求: 由于一个大文件,在spark中加载性能比较差.于是把一个大文件拆分为多个小文件后上传到hdfs,然而在spark2.2下如何加载某个目录下多个文件呢? public class SparkJo ...
使用java api操作HDFS文件
实现的代码如下: import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import ...
记录一次读取hdfs文件时出现的问题java.net.ConnectException: Connection refused
公司的hadoop集群是之前的同事搭建的,我(小白一个)在spark shell中读取hdfs上的文件时,执行以下指令 >>> word=sc.textFile("hdfs ...
Spark读取HDFS文件，文件格式为GB2312，转换为UTF-8
package iie.udps.example.operator.spark; import scala.Tuple2; import org.apache.hadoop.conf.Configur ...
使用Java API操作HDFS文件系统
使用Junit封装HFDS import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org ...

随机推荐

asp.net 获取当前项目路径
方法一://获取当前项目的路径System.AppDomain.CurrentDomain.BaseDirectory.ToString(); // 得到的是当前项目的根目录取的值:F://Pro ...
centos不能挂在ntfs
root@s 下载]# mount /dev/sdb1 /mnt mount: unknown filesystem type 'ntfs' wget http://www.tuxera.com/co ...
【python】坑，坑，折腾一个下午python 3.5中 ImportError: No module named BeautifulSoup
将语句 from bs4 import BeautifulSoup4 改成 from bs4 import BeautifulSoup 通过尼玛------------------------! 总 ...
protues仿真 51点亮点阵
电路图程序 /*============================== 点亮点阵心形 ================================*/ #include <REGX5 ...
delphi xe5 android iny绿色版+最新SDK/NDK安装方法
转自: http://bbs.2ccc.com/topic.asp?topicid=438595 首先感谢iny的绿色版,因为我的精简Win7 32位安装原版镜像4.63G过程正常,但是编译出错,后来 ...
js 字符串扩展
<script type="text/javascript" language="javascript"> String.prototype.tri ...
[转].NET程序在windows操作系统上独立运行的技术要点
发现一个不错的网站,转载一篇文章方便查看转自 http://www.linuxdot.net/bbsfile-3354 ===================================== ...
java客户端连接MongoDB数据库的简单使用
1.下载mongoDB的jar包,并引入到工程的CLASSPATH中下载:mongodb2.5驱动包下载如果使用maven项目,最新的依赖如下: <dependency> <gro ...
目标识别：Bag-of-words表示图像
BOW (bag of words) 模型简介 Bag of words模型最初被用在文本分类中,将文档表示成特征矢量.它的基本思想是假定对于一个文本,忽略其词序和语法.句法,仅仅将其看做是一些词汇的 ...
AD设计中，三种大面积覆铜的区别
在AD设计中,主要有三种大面积覆铜方式,分别是Fill(铜皮) Polygon Pour(灌铜)和Plane(平面层),这三种方式刚开始的时候没有细细区分,现在分别应用了一下, 总结如下,欢迎指正 F ...

用java api读取HDFS文件

用java api读取HDFS文件的更多相关文章

随机推荐

热门专题