应用于:对于不同用户创建的表目录,进行文件的下载,程序中执行hadoop cat命令 下载文件到本地,随后通过ftp传至目标服务器,并将hdfs文件目录的修改时间存入mysql中。每次修改前将mysql中记录的数据,与本批次下载的HDFS文件路径修改时间对比,如果改变,则决定是否下载文件:

入口:

 package edm.spark.download.edm.spark.download;

 import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.Date;
import java.util.List;
import org.apache.hadoop.fs.Path; import edm.spark.download.edm.spark.util.HdfsFileProcessor;
import edm.spark.download.edm.spark.util.JdbcDirectUtils; public class FileDownload { public static void main(String[] args) throws Exception {
String local_path = args[0];//"/home/hdfs/ysy/";
String hdfs_path = args[1];//"hdfs://hdp/user/";
;
HdfsFileProcessor fileProcessor = new HdfsFileProcessor();
List<String> userLists = fileProcessor.getUserUnderFolder(hdfs_path);
List<Path> listPath = fileProcessor.getFileUnderFolder(userLists);
if (null != listPath && listPath.size() > 0) {
for (Path path : listPath) {
String pathName = path.toString();
String[] nameList = pathName.split("/");
String time = JdbcDirectUtils.DateTimeFormat(new Date());
String tableName = nameList[nameList.length - 1] + "_" + time
+ ".txt";
String userName = nameList[nameList.length - 3];
Process ps = null;
try {
// 提交本地进程
ps = Runtime.getRuntime().exec(
local_path + "download.sh " + pathName + " "
+ tableName + " " + userName);
System.out.println(local_path + "download.sh " + pathName
+ " " + tableName);
// 更新mysql中记录的时间
JdbcDirectUtils jdbcForTime = new JdbcDirectUtils();
long dateTime = jdbcForTime
.queryDate("select modify_time,path from download_time where path="
+ "'" + path.toString() + "'");
long insertTime = fileProcessor.getModifycationTime(path);
if (dateTime != 0) {
jdbcForTime.updateDateTime(insertTime, pathName);
} else {
// 第一次插入写入当前文件目录时间
jdbcForTime.insertDate(insertTime, path.toString());
}
jdbcForTime.destroy();
} catch (Exception e) {
e.printStackTrace();
}
BufferedReader br = new BufferedReader(new InputStreamReader(
ps.getInputStream()));
String line;
StringBuffer sb = new StringBuffer();
while ((line = br.readLine()) != null) {
sb.append(line).append("\n");
}
String result = sb.toString();
System.out.println(result);
ps.destroy();
}
} else {
System.out.println("no file to download"); }
// submit download cmd
}
}

HdfsFileProcessor:

 package edm.spark.download.edm.spark.util;

 import java.io.IOException;
import java.sql.SQLException;
import java.util.List; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.security.AccessControlException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; public class HdfsFileProcessor { static final Logger logger = LoggerFactory.getLogger(HdfsFileProcessor.class); protected FileSystem fileSystem; private Configuration conf; public HdfsFileProcessor(){
init();
} public void init(){
conf = new Configuration();
conf.addResource("resources/hdfs-site.xml");
conf.addResource("resources/core-site.xml");
try {
fileSystem = FileSystem.get(conf);
} catch (IOException e) {
logger.error("init error.......",e);
e.printStackTrace();
}
} public final boolean checkFile(String filePath){
boolean exists = false;
try{
Path path = new Path(filePath);
exists = fileSystem.exists(path);
}catch(IOException e){
logger.error("",e);
}catch(Exception e){
logger.error("",e);
}
return exists;
} public List<Path> getFileUnderFolder(List<String> names) throws IOException, SQLException{
JdbcDirectUtils jdbcForTime = new JdbcDirectUtils();
List<Path> paths = Lists.newArrayList();
for(String name : names){
Path folderPath = new Path("hdfs://hdp/user/" + name +"/");
if(fileSystem.exists(folderPath)){
try{
FileStatus[] fileStatus = fileSystem.listStatus(folderPath);
for(int i = 0; i< fileStatus.length;i++){
FileStatus fileStatu = fileStatus[i];
Path path = fileStatu.getPath();
if(path.toString().contains("tosas")){
FileStatus[] tableStatus = fileSystem.listStatus(path);
for(int j = 0; j < tableStatus.length;j++){
FileStatus tableStatu = tableStatus[i];
Path tablePath = tableStatu.getPath();
long modifycationTime = fileSystem.getFileStatus(tablePath).getModificationTime();
long dataTime = jdbcForTime.queryDate("select modify_time,path from download_time where path="
+"'"
+tablePath.toString()
+"'");
if(modifycationTime > dataTime){
paths.add(tablePath);
}
}
}
}
}catch(RemoteException e){
logger.error("",e);
}catch(AccessControlException e){
logger.error("",e);
}
}
} return paths;
} /**
* 查找文件目录属于哪个用户
* @param path
* @return
* @throws IOException
*/
public long getModifycationTime(Path path) throws IOException{
long modifycationTime = fileSystem.getFileStatus(path).getModificationTime();
return modifycationTime;
} public List<String> getUserUnderFolder(String Path) throws Exception{
List<String> userList = Lists.newArrayList();
Path userPath = new Path(Path);
if(fileSystem.exists(userPath)){
FileStatus[] fileStatus = fileSystem.listStatus(userPath);
for(int i = 0 ;i< fileStatus.length;i++){
FileStatus fileStatu = fileStatus[i];
String path = fileStatu.getPath().toString();
String pathes[] = path.split("/");
if(pathes.length > 4){
userList.add(pathes[4]);
}
}
}
return userList; } public void destory() throws IOException{
if(fileSystem != null){
fileSystem.close();
}
fileSystem = null;
}
}

JdbcDirectUtils:

 package edm.spark.download.edm.spark.util;

 import java.io.IOException;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map; import com.google.common.collect.Maps;
import com.mysql.jdbc.Connection;
import com.mysql.jdbc.Statement; public class JdbcDirectUtils { private static Connection conn ; private Statement stmt; private String file_dir = "/template/download_mysql.txt"; private Map<String,String> jdbcConfMap = Maps.newHashMap(); private LoadHdfsConf mysqlConf; public JdbcDirectUtils(){
initDriver();
} public void initDriver(){
try{
if(conn == null){
mysqlConf = new LoadHdfsConf();
jdbcConfMap = mysqlConf.readHdfsFile(file_dir);
Class.forName("com.mysql.jdbc.Driver");
String url = "jdbc:mysql://" + jdbcConfMap.get("url") + ":"
+ jdbcConfMap.get("port") + "/"
+ jdbcConfMap.get("schema") + "?user="
+ jdbcConfMap.get("user") + "@password="
+ jdbcConfMap.get("password")
+ "&useUnicode=true&characterEncoding="
+ jdbcConfMap.get("characterEncoding");
conn = (Connection) DriverManager.getConnection(url); }
}catch(ClassNotFoundException e){
e.printStackTrace();
}catch(IOException e){
e.printStackTrace();
}catch(SQLException e){
e.printStackTrace();
}
} /**
* 查询最新更新记录
* @param date
* @param path
* @throws SQLException
*/
public void updateDateTime(long date,String path) throws SQLException{
stmt.executeUpdate("update download_time set modify_time=" + date + "where path="+"'" + path + "'");
} public long queryDate(String sql) throws SQLException{
ResultSet rs = stmt.executeQuery(sql);
long dateTime = 0;
while(rs.next()){
dateTime = rs.getLong("modify_time");
}
return dateTime;
} public void insertDate(Long date,String path) throws SQLException{
stmt.executeUpdate("insert into download_time(path,modify_time) values " + "('" + path + "'" + "," + date + ")");
} /**
* String格式转Long
* @param date
* @return
*/
public long convert2Long(String date){
long time = 0;
String format = "yyyyMMdd";
SimpleDateFormat sf = new SimpleDateFormat(format);
try{
time = sf.parse(date).getTime();
}catch(java.text.ParseException e){
e.printStackTrace();
}
return time;
} public static String DateTimeFormat(Date date){
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
String time = sdf.format(date);
return time;
} public void destroy() throws SQLException{
if(conn != null){
conn.close();
}
conn = null;
}
}

LoadHdfsConf:

package edm.spark.download.edm.spark.util;

import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Map; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import com.google.common.collect.Maps; public class LoadHdfsConf {
static final Logger logger = LoggerFactory.getLogger(LoadHdfsConf.class); protected FileSystem fileSystem; public final boolean checkFile(String filePath){
boolean exists = false;
try{
Path path = new Path(filePath);
exists = fileSystem.equals(path);
}catch(Exception e){
logger.error("",e);
}
return exists;
} public Map<String,String> readHdfsFile(String hdfsPath) throws IOException{
Configuration conf = new Configuration();
conf.addResource("resources/hdfs-site.xml");
conf.addResource("resources/core-site.xml");
fileSystem = FileSystem.get(conf);
Path path = new Path(hdfsPath);
InputStream in = fileSystem.open(path);
List<String> lines = IOUtils.readLines(in);
if(null == lines || lines.isEmpty()){
return null;
}
Map<String,String> map = Maps.newHashMap();
int rowNum = 0;
for(String line : lines){
rowNum++;
String[] content = line.split("=");
String code = content[0];
String value = content[1];
if(StringUtils.isEmpty(line) || StringUtils.isEmpty(value)){
logger.error("{}",rowNum,line);
continue;
}
map.put(code, value);
}
return map;
} }

hdfs文件按修改时间下载的更多相关文章

  1. touch修改文件的修改时间和访问时间,ls --full-time显示文件详细,stat命令

    1. 同时修改文件的修改时间和访问时间 touch -d "2010-05-31 08:10:30" test.doc 2. 只修改文件的修改时间 touch -m -d &quo ...

  2. png文件格式详解,获取文件的修改时间,创作时间

    http://dev.gameres.com/Program/Visual/Other/PNGFormat.htmhttp://www.360doc.com/content/11/0428/12/10 ...

  3. python 获取文件的修改时间

    os.path.getmtime(name) #获取文件的修改时间 os.stat(path).st_mtime#获取文件的修改时间 os.stat(path).st_ctime #获取文件修改时间 ...

  4. Powershell按文件最后修改时间删除多余文件

    Powershell按文件最后修改时间删除多余文件 1. 删除目录内多余文件,目录文件个数大于$count后,按最后修改时间倒序排列,删除最旧的文件. Sort-Object -Property La ...

  5. C# 读取文件的修改时间、访问时间、创建时间

    C# 获取文件的各个时间如下: 表2<ccid_nobr> 属性 功能和用途 Attributes 返回和文件相关的属性值,运用了FileAttributes枚举类型值 CreationT ...

  6. C#实现对指定文件夹中文件按修改时间排序

    string path = "~/Document/Introduction/团队管理制度/";            DirectoryInfo dirinfo = new Di ...

  7. C++ 设置文件最近修改时间

    利用VS开发C++项目,经常发现修改系统时间后,每次编译过程会变得很慢,其原因就是当你把系统时间调到未来的一个时间点,然后有意或者无意编辑过一些代码文件,那么这些文件的时间戳就停留在未来. 当你把系统 ...

  8. C#获取ftp文件最后修改时间

    public static DateTime GetFileModifyDateTime(string ftpServerIP,string ftpFolder,string ftpUserID,st ...

  9. Java对文件夹中的文件按修改时间排序

    import java.io.File; import java.util.Arrays; import java.util.Comparator; import java.util.Date; pu ...

随机推荐

  1. Spark算子--SortByKey

    转载请标明出处http://www.cnblogs.com/haozhengfei/p/076a31e7caab1316b07990c02ac65e9c.html  SortByKey--Transf ...

  2. 从零开始学习前端开发 — 18、BFC

    一. BFC的概念 BFC--block formating context的缩写,中文译为"块级格式化上下文" 二.如何触发BFC 1.设置float除none以外的值(left ...

  3. b2b2c

    编辑 B2B2C是一种电子商务类型的网络购物商业模式,B是BUSINESS的简称,C是CUSTOMER的简称,第一个B指的是商品或服务的供应商,第二个B指的是从事电子商务的企业,C则是表示消费者.   ...

  4. h5学习笔记:vuethink 配置

    vuethink 是一款基于PHP TP5和Vuejs 结合的后台框架,设计起来是使用较为前沿.在使用的过程,需要对这款开源的后台做一些调整和面对一些细节的坑.前段时间也因为有项目需求,所以下载了玩了 ...

  5. OKMX6Q ffmpeg & ffserver

    通过ltib在根文件系统中增加mplayer和ffmpeg后,拟使用ffmpeg从摄像头(/dev/video0)采集视频. 刚开始使用了: ffmpeg -f video4linux2 -s 320 ...

  6. vue学习笔记(三)——目录结构介绍

    1.初始目录结构如下: 2.目录结构介绍 目录/文件 说明 build 最终发布的代码存放位置. config 配置目录,包括端口号等.我们初学可以使用默认的. node_modules npm 加载 ...

  7. Mybatis延迟加载和查询缓存

    摘录自:http://www.linuxidc.com/Linux/2016-07/133593.htm 阅读目录 一.延迟加载 二.查询缓存 一.延迟加载 resultMap可以实现高级映射(使用a ...

  8. 免费ssl证书申请和在IIS上启用https的使用教程

    因为微信小程序开发涉及到ssl证书,所以折腾了几天的这个. 非常感谢”亚洲诚信-TrustAsia“公司的售后工程师黄工(QQ2355718943 TEL:021-58895880-663)提供的技术 ...

  9. 使用 cURL 度量 Web 站点的响应时间

    curl -o /dev/null -s -w %{time_connect}:%{time_starttransfer}:%{time_total} http://www.canada.com 0. ...

  10. spring bean中子元素lookup-method和replaced-method

    lookup-method 示例: 步骤一:定义一个Car类 package org.hope.spring.bean.lookup; public class Car { private Strin ...