转眼间已经接触了hadoop两周了,从之前的极力排斥到如今的有点喜欢,刚开始被搭建hadoop开发环境搞得几乎要放弃,如今学会了编写小程序,每天都在成长一点挺好的,好好努力,为自己的装备库再填一件武器挺好的,学习在于坚持不懈,加油!!!

马上就要过年了,在最后一天的上班时间内完成了hadoop如何去连接mysql数据库,自己感到很满足,下面就把自己编写的源码贡献给大家,希望能够帮到你们,如存在优化的地方还请大牛们指出,也希望有hadoop的大牛能够给点学习建议,一个来个HA初学者的心声。第一次发布竟然被退回,也不知道为什么,瞬间心情都不好了,但我还是坚持写自己的博客...

StudentRecord类:

package com.simope.mr.db;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException; import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.lib.db.DBWritable; public class StudentRecord implements Writable, DBWritable{ int id; String name; int age; int departmentID; @Override
public void readFields(DataInput in) throws IOException {
this.id = in.readInt();
this.name = Text.readString(in);
this.age = in.readInt();
this.departmentID = in.readInt();
} @Override
public void write(DataOutput out) throws IOException {
out.write(this.id);
Text.writeString(out, this.name);
out.write(this.age);
out.write(this.departmentID);
} public void readFields(ResultSet rs) throws SQLException {
this.id = rs.getInt(1);
this.name = rs.getString(2);
this.age = rs.getInt(3);
this.departmentID = rs.getInt(4);
} public void write(PreparedStatement ps) throws SQLException {
ps.setInt(1, this.id);
ps.setString(2, this.name);
ps.setInt(3, this.age);
ps.setInt(4, this.departmentID); } @Override
public String toString() {
return new String(this.name + "\t" + this.age + "\t" + this.departmentID);
}
}

TeacherRecord类:

package com.simope.mr.db;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException; import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.lib.db.DBWritable; public class TeacherRecord implements Writable, DBWritable{ int id; String name; int age; int departmentID; @Override
public void readFields(DataInput in) throws IOException {
this.id = in.readInt();
this.name = Text.readString(in);
this.age = in.readInt();
this.departmentID = in.readInt();
} @Override
public void write(DataOutput out) throws IOException {
out.write(this.id);
Text.writeString(out, this.name);
out.write(this.age);
out.write(this.departmentID);
} public void readFields(ResultSet rs) throws SQLException {
this.id = rs.getInt(1);
this.name = rs.getString(2);
this.age = rs.getInt(3);
this.departmentID = rs.getInt(4);
} public void write(PreparedStatement ps) throws SQLException {
ps.setInt(1, this.id);
ps.setString(2, this.name);
ps.setInt(3, this.age);
ps.setInt(4, this.departmentID); } @Override
public String toString() {
return new String(this.name + "\t" + this.age + "\t" + this.departmentID);
}
}

DBMapper类:

package com.simope.mr.db;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter; public class DBMapper extends MapReduceBase implements
Mapper<LongWritable, TeacherRecord, LongWritable, Text> { public void map(LongWritable key, TeacherRecord value,
OutputCollector<LongWritable, Text> collector, Reporter reporter)
throws IOException { collector.collect(new LongWritable(value.id),
new Text(value.toString())); }
}

DBReducer类:

package com.simope.mr.db;

import java.io.IOException;
import java.util.Iterator; import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter; public class DBReducer extends MapReduceBase implements Reducer<LongWritable, Text, StudentRecord, Text>{ @Override
public void reduce(LongWritable key, Iterator<Text> values,
OutputCollector<StudentRecord, Text> output, Reporter reporter)
throws IOException {
String[] InfoArr = values.next().toString().split("\t");
StudentRecord s = new StudentRecord();
// t.id = Integer.parseInt(InfoArr[0]); //id是自增长
s.name = InfoArr[0];
s.age = Integer.parseInt(InfoArr[1]);
s.departmentID = Integer.parseInt(InfoArr[2]);
output.collect(s, new Text(s.name));
} }

DBJob类:(读取数据库表内容,并将数据写入hdfs文件中)数据库表-hdfs文件

package com.simope.mr.db;

import java.io.IOException;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.mapred.lib.db.DBConfiguration;
import org.apache.hadoop.mapred.lib.db.DBInputFormat; /**
* @deprecated 读取数据库录入文件
* @author JimLy
* @see 20160202
* */
public class DBJob { public static void main(String[] args) throws IOException{ JobConf jobConf = new JobConf(DBJob.class); jobConf.setOutputKeyClass(LongWritable.class);
jobConf.setOutputValueClass(Text.class);
jobConf.setInputFormat(DBInputFormat.class); FileOutputFormat.setOutputPath(jobConf, new Path("/usr/output/db")); DBConfiguration.configureDB(jobConf, "com.mysql.jdbc.Driver", "jdbc:mysql://10.10.1.1:3306/my_hd", "root", "root"); String[] fields = {"id", "name", "age", "departmentID"}; //从my_hd数据库的teacher表查询数据
DBInputFormat.setInput(jobConf, TeacherRecord.class, "teacher", null, "id", fields); jobConf.setMapperClass(DBMapper.class);
jobConf.setReducerClass(IdentityReducer.class); JobClient.runJob(jobConf);
} }

DB2Job类:(读取数据库表内容,并将数据写入hdfs文件中)数据库表-hdfs文件

package com.simope.mr.db;

import java.io.IOException;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.mapred.lib.db.DBConfiguration;
import org.apache.hadoop.mapred.lib.db.DBInputFormat; /**
* @deprecated 读取数据库录入文件
* @author JimLy
* @see 20160202
* */
public class DB2Job { public static void main(String[] args) throws IOException{ JobConf jobConf = new JobConf(DB2Job.class); jobConf.setOutputKeyClass(LongWritable.class);
jobConf.setOutputValueClass(Text.class);
jobConf.setInputFormat(DBInputFormat.class); FileOutputFormat.setOutputPath(jobConf, new Path("/usr/output/db")); DBConfiguration.configureDB(jobConf, "com.mysql.jdbc.Driver", "jdbc:mysql://10.10.1.1:3306/my_hd", "root", "root"); // String[] fields = {"id", "name", "age", "departmentID"}; String inputQuery = "SELECT * FROM teacher where id != 4";
String inputCountQuery = "SELECT COUNT(1) FROM teacher where id != 4"; //从my_hd数据库的teacher表查询数据
DBInputFormat.setInput(jobConf, TeacherRecord.class, inputQuery, inputCountQuery); jobConf.setMapperClass(DBMapper.class);
jobConf.setReducerClass(IdentityReducer.class); JobClient.runJob(jobConf);
} }

DB3Job类:(读取hdfs文件中的内容,并将数据写入指定的数据库表中)=>hdfs文件-数据库表

package com.simope.mr.db;

import java.io.IOException;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.lib.IdentityMapper;
import org.apache.hadoop.mapred.lib.db.DBConfiguration;
import org.apache.hadoop.mapred.lib.db.DBOutputFormat; /**
* @deprecated 读取文件录入数据库
* @author JimLy
* @see 20160202
* */
public class DB3Job { public static void main(String[] args) throws IOException{ JobConf jobConf = new JobConf(DB3Job.class); jobConf.setInputFormat(TextInputFormat.class);
jobConf.setOutputFormat(DBOutputFormat.class); FileInputFormat.addInputPath(jobConf, new Path("/usr/input/db")); DBConfiguration.configureDB(jobConf, "com.mysql.jdbc.Driver", "jdbc:mysql://10.10.1.1:3306/my_hd", "root", "root"); String[] fields = {"id", "name", "age", "departmentID"}; DBOutputFormat.setOutput(jobConf, "teacher", fields); jobConf.setMapperClass(IdentityMapper.class);
jobConf.setReducerClass(DBReducer.class); JobClient.runJob(jobConf);
} }

DB4Job类:(读取指定的数据库表信息,并将数据写入其他指定表中)=>数据库表-表

package com.simope.mr.db;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.lib.db.DBConfiguration;
import org.apache.hadoop.mapred.lib.db.DBInputFormat;
import org.apache.hadoop.mapred.lib.db.DBOutputFormat; /**
* @deprecated 读取数据库表录入其他表
* @author JimLy
* @see 20160202
* */
public class DB4Job { public static void main(String[] args) throws IOException{ JobConf jobConf = new JobConf(DB4Job.class); jobConf.setOutputKeyClass(LongWritable.class);
jobConf.setOutputValueClass(Text.class);
jobConf.setInputFormat(DBInputFormat.class);
jobConf.setOutputFormat(DBOutputFormat.class); DBConfiguration.configureDB(jobConf, "com.mysql.jdbc.Driver", "jdbc:mysql://10.10.1.1:3306/my_hd", "root", "root"); String inputQuery = "SELECT * FROM teacher";
String inputCountQuery = "SELECT COUNT(1) FROM teacher"; //从my_hd数据库的teacher表查询数据
DBInputFormat.setInput(jobConf, TeacherRecord.class, inputQuery, inputCountQuery); String[] fields = {"id", "name", "age", "departmentID"}; DBOutputFormat.setOutput(jobConf, "student", fields); jobConf.setMapperClass(DBMapper.class);
jobConf.setReducerClass(DBReducer.class); JobClient.runJob(jobConf);
} }

如果你觉得写的不错的,请点个推荐,你的推荐是我继续坚持写博客的动力。。。

如需转载的请注明出处http://www.cnblogs.com/JimLy-BUG/

hadoop与mysql数据库的那点事的更多相关文章

  1. 本地通过Eclipse链接Hadoop操作Mysql数据库问题小结

    前一段时间,在上一篇博文中描述了自己抽时间在构建的完全分布式Hadoop环境过程中遇到的一些问题以及构建成功后,通过Eclipse操作HDFS的时候遇到的一些问题,最近又想进一步学习学习Hadoop操 ...

  2. 一步一步跟我学习hadoop(7)----hadoop连接mysql数据库运行数据读写数据库操作

        为了方便 MapReduce 直接訪问关系型数据库(Mysql,Oracle).Hadoop提供了DBInputFormat和DBOutputFormat两个类.通过DBInputFormat ...

  3. MapReduce 程序mysql JDBC驱动类找不到原因及学习hadoop写入数据到Mysql数据库的方法

    报错 :ClassNotFoundException: com.mysql.jdbc.Driver 需求描述: hadoop需要动态加载个三方jar包(比如mysql JDBC 驱动包),是在MR结束 ...

  4. 关于MySQL数据库中null的那些事

    在mysql数据库中,null是一个经常出现的情况,关于mysql中的null,有哪些注意事项呢?下面简单总结归纳下,后续会不断补充. 1. is null 首先判断数据库中某一列的值是否为null, ...

  5. 吴裕雄--天生自然 HADOOP大数据分布式处理:安装配置MYSQL数据库

    安装之前先安装基本环境:yum install -y perl perl-Module-Build net-tools autoconf libaio numactl-libs # 下载mysql源安 ...

  6. (转)运维角度浅谈MySQL数据库优化

    转自:http://lizhenliang.blog.51cto.com/7876557/1657465 一个成熟的数据库架构并不是一开始设计就具备高可用.高伸缩等特性的,它是随着用户量的增加,基础架 ...

  7. 运维角度浅谈MySQL数据库优化(转)

    一个成熟的数据库架构并不是一开始设计就具备高可用.高伸缩等特性的,它是随着用户量的增加,基础架构才逐渐完善.这篇博文主要谈MySQL数据库发展周期中所面临的问题及优化方案,暂且抛开前端应用不说,大致分 ...

  8. 从运维角度浅谈 MySQL 数据库优化

    一个成熟的数据库架构并不是一开始设计就具备高可用.高伸缩等特性的,它是随着用户量的增加,基础架构才逐渐完善.这篇博文主要谈MySQL数据库发展周期中所面临的问题及优化方案,暂且抛开前端应用不说,大致分 ...

  9. [转载] 运维角度浅谈:MySQL数据库优化

    一个成熟的数据库架构并不是一开始设计就具备高可用.高伸缩等特性的,它是随着用户量的增加,基础架构才逐渐完善. 作者:zhenliang8,本文转自51CTO博客,http://lizhenliang. ...

随机推荐

  1. ACM-ICPC(10/21)

    写一发后缀数组套路题,看起来简单,写起来要人命哦~~~ 总共13题. 分两天debug吧,有点累了~~~ suffix(后缀数组的应用) sa[i] :排名第 i 的后缀在哪(i 从 1 开始) ra ...

  2. POJ 3694 无向图的桥

    Network Time Limit: 5000MS   Memory Limit: 65536K Total Submissions: 10404   Accepted: 3873 Descript ...

  3. [USACO12FEB]牛券Cow Coupons

    嘟嘟嘟 这其实是一道贪心题,而不是dp. 首先我们贪心的取有优惠券中价值最小的,并把这些东西都放在优先队列里,然后看[k + 1, n]中,有些东西使用了优惠券减的价钱是否比[1, k]中用了优惠券的 ...

  4. webstorm识别php代码

    在 setting --editor--filetype---找到html 在下面的框里点右边加好,添加*.php

  5. 获取input的值

    一.jQuery获取单选框的值1.$('input:radio:checked').val():2.$("input[type='radio']:checked").val();3 ...

  6. AQS(一) 对CLH队列的增强

    基本概念 AQS(AbstractQueuedSynchronizer),顾名思义,是一个抽象的队列同步器. 它的队列是先进先出(FIFO)的等待队列 基于这个队列,AQS提供了一个实现阻塞锁的机制 ...

  7. vue项目模拟后台数据

    这次我们来模拟一些后台数据,然后去请求它并且将其渲染到界面上.关于项目的搭建鄙人斗胆向大家推荐我的一篇随笔<Vue开发环境搭建及热更新> 一.数据建立 我这里为了演示这个过程所以自己编写了 ...

  8. Spring的声明式事务----Annotation注解方式(1)

    这里列一个小的demo工程,直接利用Spring的jdbcTemplate访问Mysql数据库. 工程结构: 数据库中的tbl_student表结构如下: 数据实体类Student.java代码如下: ...

  9. 【TOJ 3660】家庭关系(hash+并查集)

    描述 给定若干家庭成员之间的关系,判断2个人是否属于同一家庭,即2个人之间均可以通过这些关系直接或者间接联系. 输入 输入数据有多组,每组数据的第一行为一个正整数n(1<=n<=100), ...

  10. JDBC配置文件db.properties(Mysql) 及dbutils的编写

    #数据库驱动driver=com.mysql.jdbc.Driver#数据库连接url=jdb:mysql://localhost:3306/newdb3?useUnicode=true&ch ...