使用MapReduce将mysql数据导入HDFS

package com.zhen.mysqlToHDFS;

import java.io.DataInput;

import java.io.DataOutput;

import java.io.IOException;

import java.sql.PreparedStatement;

import java.sql.ResultSet;

import java.sql.SQLException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.io.Writable;

import org.apache.hadoop.mapred.lib.db.DBWritable;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;

import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

/**

 * @author FengZhen

 * 将mysql数据导入hdfs

 */

public class DBInputFormatApp extends Configured implements Tool {

    /**

     * JavaBean

     * 需要实现Hadoop序列化接口Writable以及与数据库交互时的序列化接口DBWritable

     * 官方API中解释如下：

     * public class DBInputFormat<T extends DBWritable>

     *   extends InputFormat<LongWritable, T> implements Configurable

     * 即Mapper的Key是LongWritable类型，不可改变；Value是继承自DBWritable接口的自定义JavaBean

     */

    public static class BeanWritable implements Writable, DBWritable {

        private int id;

        private String name;

        private double height;

        public void readFields(ResultSet resultSet) throws SQLException {

            this.id = resultSet.getInt();

            this.name = resultSet.getString();

            this.height = resultSet.getDouble();

        }

        public void write(PreparedStatement preparedStatement) throws SQLException {

            preparedStatement.setInt(, id);

            preparedStatement.setString(, name);

            preparedStatement.setDouble(, height);

        }

        public void readFields(DataInput dataInput) throws IOException {

            this.id = dataInput.readInt();

            this.name = dataInput.readUTF();

            this.height = dataInput.readDouble();

        }

        public void write(DataOutput dataOutput) throws IOException {

            dataOutput.writeInt(id);

            dataOutput.writeUTF(name);

            dataOutput.writeDouble(height);

        }

        @Override

        public String toString() {

            return id + "\t" + name + "\t" + height;

        }

    }

    /**

     * Map

     * 当Map的输出key为LongWritable，value为Text时，reduce可以省略不写，默认reduce也是输出LongWritable:Text

     * */

    public static class DBInputMapper extends Mapper<LongWritable, BeanWritable, LongWritable, Text> {

        private LongWritable outputKey;

        private Text outputValue;

        @Override

        protected void setup(Mapper<LongWritable, BeanWritable, LongWritable, Text>.Context context)

                throws IOException, InterruptedException {

            this.outputKey = new LongWritable();

            this.outputValue = new Text();

        }

        @Override

        protected void map(LongWritable key, BeanWritable value,

                Mapper<LongWritable, BeanWritable, LongWritable, Text>.Context context)

                throws IOException, InterruptedException {

            outputKey.set(key.get());;

            outputValue.set(value.toString());

            context.write(outputKey, outputValue);

        }

    }

    public int run(String[] arg0) throws Exception {

        Configuration configuration = getConf();

        //配置当前作业需要使用的JDBC配置

        DBConfiguration.configureDB(configuration, "com.mysql.jdbc.Driver", "jdbc:mysql://localhost:3306/hadoop",

                "root", "123qwe");

        Job job = Job.getInstance(configuration, DBInputFormatApp.class.getSimpleName());

        job.setJarByClass(DBInputFormatApp.class);

        job.setMapperClass(DBInputMapper.class);

        job.setMapOutputKeyClass(LongWritable.class);

        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(LongWritable.class);

        job.setOutputValueClass(Text.class);

        //配置作业的输入数据格式

        job.setInputFormatClass(DBInputFormat.class);

        //配置当前作业需要查询的sql语句及接收sql语句的bean

        DBInputFormat.setInput(

                job,

                BeanWritable.class,

                "select * from people",

                "select count(1) from people");

        FileOutputFormat.setOutputPath(job, new Path(arg0[]));

        return job.waitForCompletion(true) ?  : ;

    }

    public static int createJob(String[] args) {

        Configuration conf = new Configuration();

        conf.set("dfs.datanode.socket.write.timeout", "");

        conf.set("mapreduce.input.fileinputformat.split.minsize", "");

        conf.set("mapreduce.input.fileinputformat.split.maxsize", "");

        int status = ;

        try {

            status = ToolRunner.run(conf,new DBInputFormatApp(), args);

        } catch (Exception e) {

            e.printStackTrace();

        }

        return status;

    }

    public static void main(String[] args) {

        args = new String[] { "/user/hadoop/mapreduce/mysqlToHdfs/people" };

        int status = createJob(args);

        System.exit(status);

    }

}

在mysql新建一张表 people

CREATE TABLE `people` (

  `id` int() NOT NULL,

  `name` varchar() DEFAULT NULL,

  `height` double DEFAULT NULL,

  PRIMARY KEY (`id`)

) ENGINE=InnoDB DEFAULT CHARSET=utf8

写入几条测试数据。

将mapreduce作业打成jar包，上传到Hadoop集群服务器，执行。

hadoop jar /Users/FengZhen/Desktop/Hadoop/other/mapreduce_jar/MysqlToHDFS.jar com.zhen.mysqlToHDFS.DBInputFormatApp

因为代码中已经指定了写入HDFS的路径，所以此处不需要传参，只需指定job所在类即可。

在运行中如果提示mysql驱动找不到，如下

Caused by: java.lang.ClassNotFoundException: com.jdbc.mysql.Driver

    at java.net.URLClassLoader$.run(URLClassLoader.java:)

    at java.net.URLClassLoader$.run(URLClassLoader.java:)

    at java.security.AccessController.doPrivileged(Native Method)

    at java.net.URLClassLoader.findClass(URLClassLoader.java:)

    at java.lang.ClassLoader.loadClass(ClassLoader.java:)

    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:)

    at java.lang.ClassLoader.loadClass(ClassLoader.java:)

    at java.lang.Class.forName0(Native Method)

    at java.lang.Class.forName(Class.java:)

    at org.apache.hadoop.mapreduce.lib.db.DBConfiguration.getConnection(DBConfiguration.java:)

    at org.apache.hadoop.mapreduce.lib.db.DBInputFormat.createConnection(DBInputFormat.java:)

    ...  more

解决办法：

将mysql jdbc驱动放入 .../hadoop/share/hadoop/mapreduce/lib下,然后重启集群再次执行即可。

使用MapReduce将HDFS数据导入MySql

使用MapReduce将mysql数据导入HDFS的更多相关文章

使用 sqoop 将mysql数据导入到hdfs（import）
Sqoop 将mysql 数据导入到hdfs(import) 1.创建mysql表 CREATE TABLE `sqoop_test` ( `id` ) DEFAULT NULL, `name` va ...
Hadoop 中利用 mapreduce 读写 mysql 数据
Hadoop 中利用 mapreduce 读写 mysql 数据有时候我们在项目中会遇到输入结果集很大,但是输出结果很小,比如一些 pv.uv 数据,然后为了实时查询的需求,或者一些 OLAP ...
Sqoop将mysql数据导入hbase的血与泪
Sqoop将mysql数据导入hbase的血与泪(整整搞了大半天) 版权声明:本文为yunshuxueyuan原创文章.如需转载请标明出处: https://my.oschina.net/yunsh ...
使用sqoop把mysql数据导入hive
使用sqoop把mysql数据导入hive export HADOOP_COMMON_HOME=/hadoop export HADOOP_MAPRED_HOME=/hadoop cp /hive ...
使用 sqoop 将mysql数据导入到hive表（import）
Sqoop将mysql数据导入到hive表中先在mysql创建表 CREATE TABLE `sqoop_test` ( `id` ) DEFAULT NULL, `name` varchar() ...
sqoop将oracle数据导入hdfs集群
使用sqoop将oracle数据导入hdfs集群集群环境: hadoop1.0.0 hbase0.92.1 zookeeper3.4.3 hive0.8.1 sqoop-1.4.1-incubati ...
使用sqoop将mysql数据导入到hive中
首先准备工具环境:hadoop2.7+mysql5.7+sqoop1.4+hive3.1 准备一张数据库表: 接下来就可以操作了... 一.将MySQL数据导入到hdfs 首先我测试将zhaopin表 ...
MySQL数据导入导出方法与工具mysqlimport
MySQL数据导入导出方法与工具mysqlimport<?xml:namespace prefix = o ns = "urn:schemas-microsoft-com:office ...
Logstash：把MySQL数据导入到Elasticsearch中
Logstash:把MySQL数据导入到Elasticsearch中前提条件需要安装好Elasticsearch及Kibana. MySQL安装根据不同的操作系统我们分别对MySQL进行安装.我 ...

随机推荐

自己动手开发IOC容器
前两天写简历.写了一句:精通Spring IoC容器.怎么个精通法?还是自己动手写个IOC容器吧. 什么是IoC(Inversion of Control)?什么是DI(Dependency Inje ...
js项目第一课：获取节点的方法有三个
第一种方法: demo.html代码如下:<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" &qu ...
NFS详细分析
1. NFS服务介绍 1.1什么是NFS服务 NFS(Network File System)即网络文件系统,它允许网络中的计算机之间通过TCP/IP网络共享资源.在NFS的应用中,本地NFS的客户端 ...
Java学习第一步——JDK安装及Java环境变量配置
Java作为当下很主流的编程语言,学习Java的朋友也越来越多了,作为一门面向对象的编程语言,Java也有着安全.高效等诸多有点.从TIOBE(TIOBE排行榜是根据互联网上有经验的程序员.课程和第 ...
HTML5 2D平台游戏开发#6地图绘制
此前已经完成了一部分角色的动作,现在还缺少可以交互的地图让游戏看起来能玩.不过在开始之前应当考虑清楚使用什么类型的地图,就2D平台游戏来说,一般有两种类型的地图,Tile-based和Art-base ...
FPGA开发流程
需求说明:Verilog设计内容 :FPGA开发基本流程及注意事项来自 :时间的诗原文来自:http://www.dzsc.com/data/2015-3-16/1080 ...
hive中遇到的问题
) from t_1 where country ='China' group by (name = 'qq'); 结果图 select * from t_sz_part; 按照理解来说,应该只有一个 ...
selenium实现在新窗口打开链接
问题:页面代码中不存在target="_blank",怎么实现点击一个按钮,在新窗口中打开? WebElement link = element.findElement(By.ta ...
Matrix4x4矩阵 api
Matrix4x4 矩阵api介绍 Namespace: UnityEngine Description 描述 A standard 4×4 transformation matrix. 一个标准的4 ...
ios 手势返回<1>
极其简单取巧的方法 iOS7之后是有侧滑返回手势功能的.注意,也就是说系统已经定义了一种手势,并且给这个手势已经添加了一个触发方法(重点).但是,系统的这个手势的触发条件是必须从屏幕左边缘开始滑动.我 ...

使用MapReduce将mysql数据导入HDFS

使用MapReduce将mysql数据导入HDFS的更多相关文章

随机推荐

热门专题