使用MapReduce将Mysql数据导入HDFS代码链接

将HDFS数据导入Mysql,代码示例

package com.zhen.mysqlToHDFS;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.lib.db.DBWritable;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner; /**
* @author FengZhen
* 将hdfs数据导入mysql
* 使用DBOutputFormat将HDFS路径下的结构化数据写入mysql中,结构化数据如下,第一列为key,后边三列为数据
* 0 1 Enzo 180.66
* 1 2 Din 170.666
*
*/
public class DBOutputFormatApp extends Configured implements Tool{ /**
* JavaBean
* 需要实现Hadoop序列化接口Writable以及与数据库交互时的序列化接口DBWritable
* 官方API中解释如下:
* public class DBInputFormat<T extends DBWritable>
* extends InputFormat<LongWritable, T> implements Configurable
* 即Mapper的Key是LongWritable类型,不可改变;Value是继承自DBWritable接口的自定义JavaBean
*/
public static class BeanWritable implements Writable, DBWritable { private int id;
private String name;
private double height; public void readFields(ResultSet resultSet) throws SQLException {
this.id = resultSet.getInt();
this.name = resultSet.getString();
this.height = resultSet.getDouble();
} public void write(PreparedStatement preparedStatement) throws SQLException {
preparedStatement.setInt(, id);
preparedStatement.setString(, name);
preparedStatement.setDouble(, height);
} public void readFields(DataInput dataInput) throws IOException {
this.id = dataInput.readInt();
this.name = dataInput.readUTF();
this.height = dataInput.readDouble();
} public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeInt(id);
dataOutput.writeUTF(name);
dataOutput.writeDouble(height);
} public void set(int id,String name,double height){
this.id = id;
this.name = name;
this.height = height;
} @Override
public String toString() {
return id + "\t" + name + "\t" + height;
} } public static class DBOutputMapper extends Mapper<LongWritable, Text, NullWritable, BeanWritable>{
private NullWritable outputKey;
private BeanWritable outputValue; @Override
protected void setup(Mapper<LongWritable, Text, NullWritable, BeanWritable>.Context context)
throws IOException, InterruptedException {
this.outputKey = NullWritable.get();
this.outputValue = new BeanWritable();
}
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, NullWritable, BeanWritable>.Context context)
throws IOException, InterruptedException {
//插入数据库成功的计数器
final Counter successCounter = context.getCounter("exec", "successfully");
//插入数据库失败的计数器
final Counter faildCounter = context.getCounter("exec", "faild");
//解析结构化数据
String[] fields = value.toString().split("\t");
//DBOutputFormatApp这个MapReduce应用导出的数据包含long类型的key,所以忽略key从1开始
if (fields.length > ) {
int id = Integer.parseInt(fields[]);
String name = fields[];
double height = Double.parseDouble(fields[]);
this.outputValue.set(id, name, height);
context.write(outputKey, outputValue);
//如果插入数据库成功则递增1,表示成功计数
successCounter.increment(1L);
}else{
//如果插入数据库失败则递增1,表示失败计数
faildCounter.increment(1L);
} }
} /**
* 输出的key必须是继承自DBWritable的类型,DBOutputFormat要求输出的key必须是DBWritable类型
* */
public static class DBOutputReducer extends Reducer<NullWritable, BeanWritable, BeanWritable, NullWritable>{
@Override
protected void reduce(NullWritable key, Iterable<BeanWritable> values,
Reducer<NullWritable, BeanWritable, BeanWritable, NullWritable>.Context context)
throws IOException, InterruptedException {
for (BeanWritable beanWritable : values) {
context.write(beanWritable, key);
}
}
} public int run(String[] arg0) throws Exception {
Configuration configuration = getConf();
//在创建Configuration的时候紧接着配置数据库连接信息
DBConfiguration.configureDB(configuration, "com.mysql.jdbc.Driver", "jdbc:mysql://localhost:3306/hadoop", "root", "123qwe");
Job job = Job.getInstance(configuration, DBOutputFormatApp.class.getSimpleName());
job.setJarByClass(DBOutputFormatApp.class);
job.setMapperClass(DBOutputMapper.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(BeanWritable.class); job.setReducerClass(DBOutputReducer.class);
job.setOutputFormatClass(DBOutputFormat.class);
job.setOutputKeyClass(BeanWritable.class);
job.setOutputValueClass(NullWritable.class); job.setInputFormatClass(TextInputFormat.class);
FileInputFormat.setInputPaths(job, arg0[]);
//配置当前作业输出到数据库表、字段信息
DBOutputFormat.setOutput(job, "people", new String[]{"id","name","height"}); return job.waitForCompletion(true)?:;
} public static int createJob(String[] args){
Configuration conf = new Configuration();
conf.set("dfs.datanode.socket.write.timeout", "");
conf.set("mapreduce.input.fileinputformat.split.minsize", "");
conf.set("mapreduce.input.fileinputformat.split.maxsize", "");
int status = ;
try {
status = ToolRunner.run(conf,new DBOutputFormatApp(), args);
} catch (Exception e) {
e.printStackTrace();
}
return status;
} public static void main(String[] args) {
args = new String[]{"/user/hadoop/mapreduce/mysqlToHdfs/people"};
int status = createJob(args);
System.exit(status);
} }

打成jar包,放在服务器上,执行hadoop jar命令

hadoop jar /Users/FengZhen/Desktop/Hadoop/other/mapreduce_jar/HDFSToMysql.jar com.zhen.mysqlToHDFS.DBOutputFormatApp

任务结束后mysql表中即可发现数据已经有了。

使用MapReduce将HDFS数据导入Mysql的更多相关文章

  1. 通过sqoop将hdfs数据导入MySQL

    简介:Sqoop是一款开源的工具,主要用于在Hadoop(Hive)与传统的数据库(mysql.postgresql...)间进行数据的传递,可以将一个关系型数据库(例如 : MySQL ,Oracl ...

  2. 使用MapReduce将HDFS数据导入到HBase(二)

    package com.bank.service; import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf. ...

  3. 使用MapReduce将HDFS数据导入到HBase(一)

    package com.bank.service; import java.io.IOException; import org.apache.hadoop.conf.Configuration;im ...

  4. 使用MapReduce将HDFS数据导入到HBase(三)

    使用MapReduce生成HFile文件,通过BulkLoader方式(跳过WAL验证)批量加载到HBase表中 package com.mengyao.bigdata.hbase; import j ...

  5. 将Excel数据导入mysql数据库的几种方法

    将Excel数据导入mysql数据库的几种方法 “我的面试感悟”有奖征文大赛结果揭晓! 前几天需要将Excel表格中的数据导入到mysql数据库中,在网上查了半天,研究了半天,总结出以下几种方法,下面 ...

  6. 使用MySQL Migration Toolkit快速将Oracle数据导入MySQL[转]

    使用MySQL Migration Toolkit快速将Oracle数据导入MySQL上来先说点废话本人最近在学习一些数据库方面的知识,之前接触过Oracle和MySQL,最近又很流行MongoDB非 ...

  7. SQLServer2005数据导入Mysql到详细教程

    如果转载请注明转载地址,谢谢. SQL SERVER数据导入MYSQL目录 1.Navicat for MySQL 版本10.0.9 2.创建目标数据库 3.创建正确的SQL SERVER数据库ODB ...

  8. Excel连接到MySQL,将Excel数据导入MySql,MySQL for Excel,,

    Excel连接到MySQL 即使当今时代我们拥有了类似微软水晶报表之类的强大报表工具和其他一些灵活的客户管 理应用工具,众多企业在分析诸如销售统计和收入信息的时候,微软的Excel依然是最常用的工具. ...

  9. 使用MySQL Migration Toolkit快速将Oracle数据导入MySQL

    MySQL GUI Tools中的MySQL Migration Toolkit可以非常方便快捷的将Oracle数据导到MySQL中,该软件可以在http://dev.mysql.com/downlo ...

随机推荐

  1. Android使用ImageView显示网络图片

    本案例使用ImageView 简单的实现了网络图片的调用.当中注意事项.由于用到了网络,这里採用了HttpClient方法訪问网络联接,关于怎样使用,可參照文章 Android中使用HttpClien ...

  2. [译]GLUT教程 - 每秒帧数

    Lighthouse3d.com >> GLUT Tutorial >> Extras >> Frames per Second 你的程序实际上跑得多快? 有时我们 ...

  3. linux下常用网页查看下载工具--wget

    http://www.linuxidc.com/Linux/2015-06/118256.htm 5 个基于Linux命令行的文件下载和网站浏览工具 rTorrent.wget.cURL.w3m.El ...

  4. JQ多种刷新方式

    下面介绍全页面刷新方法:有时候可能会用到 window.location.reload()刷新当前页面. parent.location.reload()刷新父亲对象(用于框架) opener.loc ...

  5. Http协议 详解(转载)

    http://blog.csdn.net/gueter/archive/2007/03/08/1524447.aspx 引言 HTTP是一个属于应用层的面向对象的协议,由于其简捷.快速的方式,适用于分 ...

  6. saltstack内置state模块file之append

    添加文件文本在文件尾部 salt.states.file.append(name, text=None, makedirs=False, source=None, source_hash=None, ...

  7. windows上mysql安装

    1. 下载MySQL Community Server 5.7.14 Index of /MySQL/Downloads/MySQL-Cluster-7.1 2. 解压MySQL压缩包 安装路径:E: ...

  8. BEGINNING SHAREPOINT&#174; 2013 DEVELOPMENT 第11章节--为Office和SP解决方式开发集成Apps Office新的App模型

    BEGINNING SHAREPOINT® 2013 DEVELOPMENT 第11章节--为Office和SP解决方式开发集成Apps  Office新的App模型         Office 2 ...

  9. phpStorm pycharm编辑器主题修改,自定义颜色

    新的启程 注: 本人小菜鸟一枚,内容也是从其他博客中借鉴的,谨以此作为写博客开端. phpstorm修改主题: 1. phpstorm主题下载 http://www.phpstorm-themes.c ...

  10. leetCode 57.Insert Interval (插入区间) 解题思路和方法

    Insert Interval  Given a set of non-overlapping intervals, insert a new interval into the intervals ...