用java api读取HDFS文件

import java.io.IOException;

import java.io.InputStream;

import java.security.PrivilegedExceptionAction;

import java.text.SimpleDateFormat;

import java.util.concurrent.ConcurrentHashMap;

import java.util.concurrent.ConcurrentMap;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileStatus;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.FsStatus;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.security.UserGroupInformation;

import org.springframework.stereotype.Controller;

import org.springframework.web.bind.annotation.RequestMapping;

import org.springframework.web.bind.annotation.RequestMethod;

import org.springframework.web.bind.annotation.ResponseBody;

import com.alibaba.fastjson.JSON;

import com.alibaba.fastjson.JSONObject;

import bean.TableStatistic;

@Controller

@RequestMapping("/dfview")

public class DataFrameViewController extends BaseController {

    private ConcurrentMap<String, UserGroupInformation> cache = new ConcurrentHashMap<String, UserGroupInformation>();

    private ConcurrentMap<String, FileSystem> fileSystemCache = new ConcurrentHashMap<String, FileSystem>();

    private Configuration hadoopConf = new Configuration();

    private static final String HDFS_JSON_NAME = "jsonObj";

    @RequestMapping(value = "/getDFviewOfColumn", method = { RequestMethod.GET })

    @ResponseBody

    public TableStatistic getDFviewOfTable(String tableName)

            throws Exception {

        String user = "bi";

        String dirpath = "/user/cbt/datax/temp_transfer/zzzdes";

        Path homePath = new Path(dirpath);

        FileSystem fs = this.createFileSystem(user);

        FileStatus[] stats = fs.listStatus(homePath);

        StringBuffer txtContent = new StringBuffer();

        for (int i = 0; i < stats.length; ++i) {

            if (stats[i].isFile()) {

                FileStatus file = stats[i];

                if( HDFS_JSON_NAME.equalsIgnoreCase(file.getPath().getName())){

                    InputStream in = fs.open(file.getPath());

                    byte[] b = new byte[1];

                    while (in.read(b) != -1)

                    {

                    // 字符串拼接

                    txtContent.append(new String(b));

                    }

                    in.close();

                    break;

                }

            }

        }

        TableStatistic ts = JSON.parseObject(txtContent.toString(), TableStatistic.class);

        return ts;

    }

    public static void main(String[] args) throws Exception {

        DataFrameViewController aaa = new DataFrameViewController();

        FileSystem fs = aaa.createFileSystem("bi");

        Path homePath = new Path("/user/cbt/datax/temp_transfer/zzzdes");

        System.out.println("***********************************");

        FileStatus[] stats = fs.listStatus(homePath);

        for (int i = 0; i < stats.length; ++i) {

            if (stats[i].isFile()) {

                FileStatus file = stats[i];

                StringBuffer txtContent = new StringBuffer();

                if( "jsonObj".equalsIgnoreCase(file.getPath().getName())){

                    InputStream in = fs.open(file.getPath());

                    byte[] b = new byte[1];

                    while (in.read(b) != -1)

                    {

                    // 字符串拼接

                    txtContent.append(new String(b));

                    }

//                    IOUtils.copyBytes(fs.open(file.getPath()), System.out, 4096,false);

                    in.close();

//                    fs.close();

                }

                System.out.print(txtContent.toString());

                System.out

                        .println("************************************************");

                JSONObject jb = JSON.parseObject(txtContent.toString());

                System.out.println("********!!!!! : "  + jb.get("colUnique"));

                TableStatistic ts = JSON.parseObject(txtContent.toString(), TableStatistic.class);

                System.out.println("********!!!!! : "  + ts.getColUnique().toString());

            } else if (stats[i].isDirectory()) {

                System.out.println(stats[i].getPath().toString());

            } else if (stats[i].isSymlink()) {

                System.out.println("&&&&&&&&" + stats[i].getPath().toString());

            }

        }

        FsStatus fsStatus = fs.getStatus(homePath);

    }

    public FileSystem createFileSystem(String user) throws Exception {

        final Configuration conf = loadHadoopConf();

        conf.set("hadoop.job.ugi", user);

//        conf.set("HADOOP_USER_NAME", user);

        if (fileSystemCache.get(user) != null) {

            return fileSystemCache.get(user);

        }

        UserGroupInformation ugi = getProxyUser(user);

        FileSystem fs = ugi.doAs(new PrivilegedExceptionAction<FileSystem>() {

            public FileSystem run() throws Exception {

                return FileSystem.get(conf);

            }

        });

        fileSystemCache.put(user, fs);

        return fs;

    }

    public static final ThreadLocal<SimpleDateFormat> appDateFormat = new ThreadLocal<SimpleDateFormat>() {

        @Override

        public SimpleDateFormat initialValue() {

            SimpleDateFormat dateformat = new java.text.SimpleDateFormat(

                    "yyyy-MM-dd HH:mm:ss");

            return dateformat;

        }

    };

    private static final String[] HADOOP_CONF_FILES = { "core-site.xml",

            "hdfs-site.xml" };

    private Configuration loadHadoopConf() {

        if (hadoopConf != null) {

            return hadoopConf;

        }

        Configuration conf = new Configuration();

        for (String fileName : HADOOP_CONF_FILES) {

            try {

                InputStream inputStream = DataFrameViewController.class

                        .getClassLoader().getResourceAsStream(fileName);

                conf.addResource(inputStream);

            } catch (Exception ex) {

            }

        }

        return conf;

    }

    public void destroy() {

        for (UserGroupInformation ugi : cache.values()) {

            try {

                FileSystem.closeAllForUGI(ugi);

            } catch (IOException ioe) {

//                 Logger.error("Exception occurred while closing filesystems for "

//                 + ugi.getUserName(), ioe);

            }

        }

        cache.clear();

    }

    private UserGroupInformation getProxyUser(String user) throws IOException {

        cache.putIfAbsent(user, UserGroupInformation.createRemoteUser(user));

        return cache.get(user);

    }

}

用java api读取HDFS文件的更多相关文章

java Api 读取HDFS文件内容
package dao; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import java ...
使用JAVA API读取HDFS的文件数据出现乱码的解决方案
使用JAVA api读取HDFS文件乱码踩坑想写一个读取HFDS上的部分文件数据做预览的接口,根据网上的博客实现后,发现有时读取信息会出现乱码,例如读取一个csv时,字符串之间被逗号分割英文字符串 ...
JAVA API 实现hdfs文件操作
java api 实现hdfs 文件操作会出现错误提示: Permission denied: user=hp, access=WRITE, inode="/":hdfs:supe ...
Java API 读取HDFS的单文件
HDFS上的单文件: -bash-3.2$ hadoop fs -ls /user/pms/ouyangyewei/data/input/combineorder/repeat_rec_categor ...
Spark：java api读取hdfs目录下多个文件
需求: 由于一个大文件,在spark中加载性能比较差.于是把一个大文件拆分为多个小文件后上传到hdfs,然而在spark2.2下如何加载某个目录下多个文件呢? public class SparkJo ...
使用java api操作HDFS文件
实现的代码如下: import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import ...
记录一次读取hdfs文件时出现的问题java.net.ConnectException: Connection refused
公司的hadoop集群是之前的同事搭建的,我(小白一个)在spark shell中读取hdfs上的文件时,执行以下指令 >>> word=sc.textFile("hdfs ...
Spark读取HDFS文件，文件格式为GB2312，转换为UTF-8
package iie.udps.example.operator.spark; import scala.Tuple2; import org.apache.hadoop.conf.Configur ...
使用Java API操作HDFS文件系统
使用Junit封装HFDS import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org ...

随机推荐

51nod1240莫比乌斯函数
莫比乌斯函数,由德国数学家和天文学家莫比乌斯提出.梅滕斯(Mertens)首先使用μ(n)(miu(n))作为莫比乌斯函数的记号.(据说,高斯(Gauss)比莫比乌斯早三十年就曾考虑过这个函数). ...
工作踩坑记录：JavaScript跳转被缓存
起因:业务想要一个固定二维码来每周扫码跳转到不同的页面上去,我用JS写了个跳转,却发现被缓存了,虽然被具体被缓存多久不清楚,但是被缓存了很不爽,不符合业务实时更改这个二维码跳转页面的需求. 经过:既然 ...
const用法
一.const作用二.const用法 1.修饰一般常量修饰符const可以用在类型说明符前,也可以用在类型说明符后. 例如: ; ; 2.修饰常数组修饰符const可以用在类型说明符前,也 ...
树莓派2 安装mono3.0运行mvc4
sudo apt-get updatesudo apt-get upgradesudo apt-get mono-completewget -c http://www.linuxdot.net/dow ...
用android-x86模拟器不能运行程序错误Tag:libc的问题
如果用的是x86的android模拟器,运行软件时一闪就关闭logcat中:类似:Fatal signal 11 (SIGSEGV) at 0x00000078 (code=1), thread 16 ...
poj 3261 Milk Patterns（后缀数组）(k次的最长重复子串)
Milk Patterns Time Limit: 5000MS Memory Limit: 65536K Total Submissions: 7938 Accepted: 3598 Cas ...
DataGridView自动行号
最近又用了一下DataGridView控件,需要显示行号,我们知道在.net中DataGridView控件默认是不显示行号(数据的记录行数)的,后来通过查资料发现可以在DataGridView控件的R ...
Centos安装gnome主菜单编辑器无
首选项---主菜单-- 即是alacarte.. centos ===安装 alacarte.noarch 0:0.12.4-1.el6 即可.
[转载]C# Random 生成不重复随机数
Random 类命名空间:System 表示伪随机数生成器,一种能够产生满足某些随机性统计要求的数字序列的设备. 伪随机数是以相同的概率从一组有限的数字中选取的.所选数字并不具有完全的随机性,因为它 ...
Seay工具分享
百度网盘:http://pan.baidu.com/share/home?uk=4045637737&view=share#category/type=0

用java api读取HDFS文件

用java api读取HDFS文件的更多相关文章

随机推荐

热门专题