通过HA方式操作HDFS

之前操作hdfs的时候，都是固定namenode的地址，然后去操作。这个时候就必须判断namenode的状态为active还是standby，比较繁琐，如果集群使用了HA的形式，就很方便了

直接上代码，看注释：

package com.ideal.template.openbigdata.util;

import java.io.IOException;

import java.net.URI;

import java.sql.ResultSet;

import java.sql.ResultSetMetaData;

import java.sql.SQLException;

import java.sql.Timestamp;

import java.text.SimpleDateFormat;

import java.util.LinkedList;

import java.util.List;

//import org.anarres.lzo.LzoAlgorithm;

//import org.anarres.lzo.LzoDecompressor;

//import org.anarres.lzo.LzoInputStream;

//import org.anarres.lzo.LzoLibrary;

import org.apache.commons.logging.Log;

import org.apache.commons.logging.LogFactory;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FSDataOutputStream;

import org.apache.hadoop.fs.FileStatus;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.security.UserGroupInformation;

import org.apache.log4j.Logger;

public class HadoopUse

{

    private static Log log = LogFactory.getLog(HadoopUse.class);

    /**

	 * 设置hdfs配置信息

	 * @return

	 */

	private static Configuration getConf()

	{

		Configuration conf = new Configuration();

		//设置配置相关的信息，分别对应hdfs-site.xml core-site.xml

		conf.set("fs.defaultFS", "hdfs://dragoncluster");

		conf.set("dfs.nameservices", "dragoncluster");

		conf.set("dfs.ha.namenodes.dragoncluster", "nn1,nn2");

		conf.set("dfs.namenode.rpc-address.dragoncluster.nn1", "n01.dragon.com:8020");

		conf.set("dfs.namenode.rpc-address.dragoncluster.nn2", "n02.dragon.com:8020");

		conf.set("dfs.client.failover.proxy.provider.dragoncluster", "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider");

		//设置实现类，因为会出现类覆盖的问题

		conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());

		conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

		return conf;

	}

	/**

	 * 设置kerberos认证

	 * @param conf

	 * @throws Exception

	 */

	private static void kerberosLogin(Configuration conf) throws Exception

	{

		conf.set("hadoop.security.authentication", "kerberos");

		UserGroupInformation.setConfiguration(conf);

		UserGroupInformation.loginUserFromKeytab("openbigdata@DRAGON.COM", "/etc/security/keytabs/openbigdata.keytab");

	}

	public static long getSize(String uri, String user)

	{

		Path path = new Path(URI.create(uri));

		Configuration conf = new Configuration();

		try

		{

			FileSystem fs = FileSystem.get(URI.create(uri), conf, user);

			return fs.getContentSummary(path).getLength() / 1024 / 1024; // 单位为MB

		}

		catch (Exception ex)

		{

			log.error("HadoopUse.getSize" + ex.getMessage(), ex);

			return 0;

		}

	}

	/**

	 * 在hdfs上创建文件，并写入内容

	 *

	 * @param uri

	 * @param content

	 * @param user

	 * @return

	 */

	public static boolean createHdfsFile(String uri, String user, String fullName, String content)

	{

		if (fullName == null || fullName.length() == 0)

		{// 本地路径不正确

			return false;

		}

		if (content == null || content.length() == 0)

		{// hdfs路径不正确

			return false;

		}

		try

		{

			Configuration conf = new Configuration();

			FileSystem fs = FileSystem.get(URI.create(uri), conf, user);

			FSDataOutputStream os = null;

			if (fs.exists(new Path(fullName)) == true)

			{// 如果该路径存在

				// os = fs.append(new Path(fullName));

				fs.delete(new Path(fullName), true);

			}

			os = fs.create(new Path(fullName));

			os.write(content.getBytes());

			os.close();

			fs.close();

			return true;

		}

		catch (Exception ex)

		{

			log.error("HadoopUse.createHdfsFile" + ex.getMessage(), ex);

			return false;

		}

	}

	/**

	 * 删除hdfs上的文件

	 * @param uri

	 * @param user

	 * @param fullName

	 * @return

	 */

	public static boolean deleteHdfsFile(String uri, String user, String fullName)

	{

		if (fullName == null || fullName.length() == 0)

		{// 本地路径不正确

			log.error("HadoopUse.deleteHdfsFile文件名不合法");

			return false;

		}

		try

		{

			Configuration conf = new Configuration();

			FileSystem fs = FileSystem.get(URI.create(uri), conf, user);

			//FSDataOutputStream os = null;

			if (fs.exists(new Path(fullName)) == true)

			{// 如果该路径存在

				// os = fs.append(new Path(fullName));

				fs.delete(new Path(fullName), true);

			}

			return true;

		}

		catch (Exception ex)

		{

			log.error("HadoopUse.createHdfsFile" + ex.getMessage(), ex);

		}

		return false;

	}

	/**

	 * 根据resultset将值写入到hdfs上

	 * @param uri

	 * @param user

	 * @param fullName

	 * @param resultSet

	 * @param terminated

	 * @return

	 * @throws InterruptedException

	 * @throws IOException

	 * @throws SQLException

	 */

    public void createHdfsFile(String fullName, ResultSet resultSet, String terminated, FlagUtil flag)

        throws IOException, InterruptedException, SQLException, Exception

    {

        if (resultSet == null)

        { // 如果查询出来的游标为空，直接退出

            return;

        }

        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

        FileSystem fs = null;

        FSDataOutputStream out = null;

        Configuration conf = getConf();

        kerberosLogin(conf);

        fs = FileSystem.get(conf);

        if (fs.exists(new Path(fullName)) == true)

        {// 如果该路径存在

            fs.delete(new Path(fullName), true);

        }

        // 获取文件句柄

        out = fs.create(new Path(fullName));

        // 写入文件内容

        ResultSetMetaData rsmd = resultSet.getMetaData();

        int rowCnt = rsmd.getColumnCount();

        int count = 0;

        while (resultSet.next())

        {

        	count++;

        	if(count  >= 1000)

        	{//每1000条记录检查一次需要终止任务

        		if(flag.getTeminalStatus() == true)

        		{

        			break;

        		}

        		count = 0;

        	}

            for (int i = 1; i <= rowCnt; i++)

            {

                if (resultSet.getObject(i) == null)

                {// 如果是空的数据

                    out.write("".getBytes("utf-8"));

                }

                else

                {

                	String item = null;

                	if("DATE".equals(rsmd.getColumnTypeName(i).toUpperCase()))

                	{//如果是日期类型

                		Timestamp date = resultSet.getTimestamp(i);

                		item = sdf.format(date);

                	}

                	else

                	{

                		item = String.valueOf(resultSet.getObject(i));

                	}

					if (item != null)

					{

						out.write(item.getBytes("utf-8"));

					}

					else

					{

						out.write("".getBytes("utf-8"));

					}

                }

                if (i < rowCnt)

                {// 如果写完一列，则插入分隔符

                    out.write(terminated.getBytes("utf-8"));

                }

            }

            // 切换到下一行

            out.write("\r\n".getBytes("utf-8"));

        }

        log.info("fullName:" + fullName + "写入成功");

        if (out != null)

        {

            out.flush();

            out.close();

        }

        if (fs != null)

        {

            fs.close();

        }

    }

    /**

	 * 查询路径

	 * @param path

	 * @return

	 * @throws Exception

	 */

	public static List<String> listDir(String path) throws Exception

	{

		Configuration conf = getConf();

		kerberosLogin(conf);

		FileSystem fs = FileSystem.get(conf);

		Path hdfs = new Path(path);

		List<String> pathList = null;

		FileStatus files[] = fs.listStatus(hdfs);

		if(files!=null && files.length >0)

		{

			pathList = new LinkedList<String>();

			for (FileStatus file : files)

			{

				pathList.add(file.getPath().toString());

			}

		}

		return pathList;

	}

	public static void main(String[] args) throws Exception

	{

		List<String> pathList = listDir(args[0]);

		for(String path: pathList)

		{

			System.out.println(path);

		}

	}

}

注意，这用到了HA，以及kerberos认证，

通过HA方式操作HDFS的更多相关文章

使用命令行的方式操作hdfs
必须要用打全路径,没有相对路径的概念,或者cd的概念打印报告: 所有的命令显示出来: 以下的操作分别是创建创建文件夹,删除文件夹,显示文件夹,可见删除文件夹只能够使用-rmr . 从本地拷贝文件到h ...
Java API操作HA方式下的Hadoop
通过java api连接Hadoop集群时,如果集群支持HA方式,那么可以通过如下方式设置来自动切换到活动的master节点上.其中,ClusterName 是可以任意指定的,跟集群配置无关,dfs. ...
用流的方式来操作hdfs上的文件
import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import ...
使用javaAPI操作hdfs
欢迎到https://github.com/huabingood/everyDayLanguagePractise查看源码. 一.构建环境在hadoop的安装包中的share目录中有hadoop所有 ...
使用Java方式连接HDFS
IDEA中新建Maven工程,添加POM依赖, 在IDE的提示中, 点击 Import Changes 等待自动下载完成相关的依赖包. <?xml version="1.0" ...
Hadoop Java API操作HDFS文件系统（Mac）
1.下载Hadoop的压缩包 tar.gz https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/stable/ 2.关联jar包在 ...
使用Java API方式连接HDFS Client测试
IDEA中新建Maven工程,添加POM依赖, 在IDE的提示中, 点击 Import Changes 等待自动下载完成相关的依赖包. <?xml version="1.0" ...
Linux -- 之HDFS实现自动切换HA（全新HDFS）
Linux -- 之HDFS实现自动切换HA(全新HDFS) JDK规划 1.7及以上 https://blog.csdn.net/meiLin_Ya/article/details/8065094 ...
（第3篇）HDFS是什么？HDFS适合做什么？我们应该怎样操作HDFS系统？
摘要: 这篇文章会详细介绍HDFS是什么,HDFS的作用,适合和不适合的场景,我们该如何操作HDFS? HDFS文件系统 Hadoop 附带了一个名为 HDFS(Hadoop分布式文件系统)的分布 ...

随机推荐

Android中的ProgressBar的android:indeterminate
不明确(false)就是滚动条的当前值自动在最小到最大值之间来回移动,形成这样一个动画效果,这个只是告诉别人“我正在工作”,但不能提示工作进度到哪个阶段.主要是在进行一些无法确定操作时间的任务时作为提 ...
什么是Intent
Intent负责在应用程序的主要部件——活动,服务,广播接收器(处理Android消息)之间传递消息的信使对象 Intent是对要执行的操作的一种抽象的描述,它除了指定一个动作之外,Intent对象还 ...
wukong引擎源码分析之索引——part 3 文档评分无非就是将docid对应的fields信息存储起来，为搜索结果rank评分用
之前的文章分析过,接受索引请求处理的代码在segmenter_worker.go里: func (engine *Engine) segmenterWorker() { for { request : ...
codeforces 414A A. Mashmokh and Numbers(素数筛)
题目链接: A. Mashmokh and Numbers time limit per test 1 second memory limit per test 256 megabytes input ...
NSArray是强引用容器
经常比较疑惑NSArray.NSDictionary.NSSet这几个对象容器管理对象所采用的方式是“强引用”还是“弱引用”. 通过简单的命令行程序得到的结论是“NSArray.NSDictionar ...
BZOJ_3073_[Pa2011]Journeys_线段树优化建图+BFS
BZOJ_3073_[Pa2011]Journeys_线段树优化建图+BFS Description Seter建造了一个很大的星球,他准备建造N个国家和无数双向道路.N个国家很快建造好了,用1..N ...
Watir: 应用Watir，调用AutoIT清空IE浏览器的Cookies
require 'win32ole'ai = WIN32OLE.new("AutoItX3.Control")ai.RunWait("RunDll32.exe InetC ...
kubernetes1.13.1部署ingress-nginx-十一
一.Ingress 简介 (1) 在Kubernetes中,服务和Pod的IP地址仅可以在集群网络内部使用,对于集群外的应用是不可见的. 为了使外部的应用能够访问集群内的服务, 在Kubernetes ...
[原]Windows下openssl的下载安装和使用
安装openssl有两种方式,第一种直接下载安装包,装上就可运行:第二种可以自己下载源码,自己编译.下面对两种方式均进行详细描述. 一.下载和安装openss 方法一:直接使用openssl安装包 W ...
bzoj 2440: [中山市选2011]完全平方数【莫比乌斯函数+二分】
二分答案,然后用莫比乌斯函数作为容斥系数,计算当前枚举的mid内有几个满足要求的数 #include<iostream> #include<cstdio> #include&l ...

通过HA方式操作HDFS

通过HA方式操作HDFS的更多相关文章

随机推荐

热门专题