DataX的安装及使用
DataX的安装及使用
DataX的安装
DataX不需要依赖其他服务,直接上传、解压、安装、配置环境变量即可
也可以直接在windows上解压
DataX的使用
stream2stream
编写配置文件stream2stream.json
# stream2stream.json
{
"job": {
"content": [
{
"reader": {
"name": "streamreader",
"parameter": {
"sliceRecordCount": 10,
"column": [
{
"type": "long",
"value": "10"
},
{
"type": "string",
"value": "hello,你好,世界-DataX"
}
]
}
},
"writer": {
"name": "streamwriter",
"parameter": {
"encoding": "UTF-8",
"print": true
}
}
}
],
"setting": {
"speed": {
"channel": 5
}
}
}
}
执行同步任务
datax.py stream2stream.json
执行结果

mysql2mysql
需要新建student2数据库,并创建student表
编写配置文件mysql2mysql.json
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"splitPk": "age",
"connection": [
{
"table": [
"student"
],
"jdbcUrl": [
"jdbc:mysql://master:3306/student"
]
}
]
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "insert",
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"preSql": [
"truncate student2"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://master:3306/student2?useUnicode=true&characterEncoding=utf8",
"table": [
"student2"
]
}
]
}
}
}
],
"setting": {
"speed": {
"channel": 6
}
}
}
}
执行同步任务
datax.py mysql2mysql.json
mysql2hdfs
写hive跟hdfs时一样的
编写配置文件mysql2hdfs.json
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"splitPk": "age",
"connection": [
{
"table": [
"student"
],
"jdbcUrl": [
"jdbc:mysql://master:3306/student"
]
}
]
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"defaultFS": "hdfs://master:9000",
"fileType": "text",
"path": "/user/hive/warehouse/datax.db/students",
"fileName": "student",
"column": [
{
"name": "id",
"type": "bigint"
},
{
"name": "name",
"type": "string"
},
{
"name": "age",
"type": "INT"
},
{
"name": "gender",
"type": "string"
},
{
"name": "clazz",
"type": "string"
},
{
"name": "last_mod",
"type": "string"
}
],
"writeMode": "append",
"fieldDelimiter": ","
}
}
}
],
"setting": {
"speed": {
"channel": 6
}
}
}
}
hbase2mysql
{
"job": {
"content": [
{
"reader": {
"name": "hbase11xreader",
"parameter": {
"hbaseConfig": {
"hbase.zookeeper.quorum": "master:2181"
},
"table": "student",
"encoding": "utf-8",
"mode": "normal",
"column": [
{
"name": "rowkey",
"type": "string"
},
{
"name": "cf1:name",
"type": "string"
},
{
"name": "cf1:age",
"type": "string"
},
{
"name": "cf1:gender",
"type": "string"
},
{
"name": "cf1:clazz",
"type": "string"
}
],
"range": {
"startRowkey": "",
"endRowkey": "",
"isBinaryRowkey": false
}
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "insert",
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz"
],
"preSql": [
"truncate student2"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://master:3306/student2?useUnicode=true&characterEncoding=utf8",
"table": [
"student2"
]
}
]
}
}
}
],
"setting": {
"speed": {
"channel": 6
}
}
}
}
mysql2hbase
mysql中的score表需将cource_id改为course_id,并将student_id、course_id设为主键,并将所有字段的类型改为int
hbase需先创建score表:create 'score','cf1'
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "123456",
"column": [
"student_id",
"course_id",
"score"
],
"splitPk": "course_id",
"connection": [
{
"table": [
"score"
],
"jdbcUrl": [
"jdbc:mysql://master:3306/student"
]
}
]
}
},
"writer": {
"name": "hbase11xwriter",
"parameter": {
"hbaseConfig": {
"hbase.zookeeper.quorum": "master:2181"
},
"table": "score",
"mode": "normal",
"rowkeyColumn": [
{
"index":0,
"type":"string"
},
{
"index":-1,
"type":"string",
"value":"_"
},
{
"index":1,
"type":"string"
}
],
"column": [
{
"index":2,
"name": "cf1:score",
"type": "int"
}
],
"encoding": "utf-8"
}
}
}
],
"setting": {
"speed": {
"channel": 6
}
}
}
}
mysql2Phoenix
在Phoenix中创建STUDENT表
CREATE TABLE IF NOT EXISTS STUDENT (
ID VARCHAR NOT NULL PRIMARY KEY,
NAME VARCHAR,
AGE BIGINT,
GENDER VARCHAR ,
CLAZZ VARCHAR
);
编写配置文件MySQLToPhoenix.json
{
"job": {
"setting": {
"speed": {
"channel": 3
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz"
],
"splitPk": "id",
"connection": [
{
"table": [
"student"
],
"jdbcUrl": [
"jdbc:mysql://master:3306/student?useSSL=false"
]
}
]
}
},
"writer": {
"name": "hbase11xsqlwriter",
"parameter": {
"batchSize": "256",
"column": [
"ID",
"NAME",
"AGE",
"GENDER",
"CLAZZ"
],
"hbaseConfig": {
"hbase.zookeeper.quorum": "master,node1,node2",
"zookeeper.znode.parent": "/hbase"
},
"nullMode": "skip",
"table": "STUDENT"
}
}
}
]
}
}
HDFSToHBase
将students.txt数据上传至HDFS的
/data/student1/目录在HBase中创建datax表:
create 'datax','cf1'
{
"job": {
"setting": {
"speed": {
"channel": 3
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"path": "/data/student1/",
"defaultFS": "hdfs://master:9000",
"column": [
{
"index": 0,
"type": "string"
},
{
"index": 1,
"type": "string"
},
{
"index": 2,
"type": "string"
},
{
"index": 3,
"type": "string"
},
{
"index": 4,
"type": "string"
},
{
"index": 5,
"type": "string"
}
],
"fileType": "text",
"encoding": "UTF-8",
"fieldDelimiter": ","
}
},
"writer": {
"name": "hbase11xwriter",
"parameter": {
"hbaseConfig": {
"hbase.zookeeper.quorum": "master,node1,node2"
},
"table": "datax",
"mode": "normal",
"rowkeyColumn": [
{
"index": 0,
"type": "string"
},
{
"index": -1,
"type": "string",
"value": "_"
},
{
"index": 1,
"type": "string"
}
],
"column": [
{
"index": 2,
"name": "cf1:age",
"type": "string"
},
{
"index": 3,
"name": "cf1:gender",
"type": "string"
},
{
"index": 4,
"name": "cf1:clazz",
"type": "string"
},
{
"index": 5,
"name": "cf1:ts",
"type": "string"
}
],
"versionColumn": {
"index": 5
},
"encoding": "utf-8"
}
}
}
]
}
}
DataX的安装及使用的更多相关文章
- DataX的安装
DataX的安装 1. 可下载tar包 https://github.com/alibaba/DataX/blob/master/userGuid.md 2. 下载源码自己编译 git clone h ...
- [大数据技术]datax的安装以及使用
1.datax简述 DataX 是阿里巴巴集团内被广泛使用的离线数据同步工具/平台,实现包括 MySQL.Oracle.SqlServer.Postgre.HDFS.Hive.ADS.HBase.Ta ...
- dataX windows10安装
按照视频课程,从Github上下载文件:https://github.com/alibaba/DataX 然后将下载的压缩包解压即可,不过需要的前提Python环境是要求python2,于是在pyth ...
- datax的安装和使用(windows)
github官方文档和项目:https://github.com/alibaba/DataX 下载后在windows环境下是可以直接用python编译执行的,但从github上下载的版本只支持pyth ...
- 在datax之前版本中添加filewriter并创建job时出现问题
问题描述:
- DataX的使用——大数据同步技术
准备工作: 1.视频教学http://113.31.104.47/portal/#/course/dashboard/b34d160db64624732ef152a1118af11a 2.DataX的 ...
- Pyhton开源框架(加强版)
info:Djangourl:https://www.oschina.net/p/djangodetail: Django 是 Python 编程语言驱动的一个开源模型-视图-控制器(MVC)风格的 ...
- Python开源框架
info:更多Django信息url:https://www.oschina.net/p/djangodetail: Django 是 Python 编程语言驱动的一个开源模型-视图-控制器(MVC) ...
- DataX的简单编译安装测试
搭建环境: Java > =1.6 Python>=2.6 <3 Ant Rpmbuild G++ 编译DataX: 进入rpm文件夹 ...
随机推荐
- Python中的列表、元组、字典、字符串及切片操作
我们引入一个新的概念:数据结构 数据结构是通过某种方式组织在一起的数据元素的集合,这些数据元素可以是数字或字符,甚至可以是其他数据结构,在python中,最基本的数据结构是序列,序列中的每个元素匾被分 ...
- Linux下用Sed查找IP地址
ip addr|sed -n '9p'|egrep '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}'|sed -nr 's#^.*inet (.*) b ...
- 20210821 打表,蛇,购物,ants
考场 T1 没看懂 T4 一眼回滚莫队,但忘记怎么写了,小慌 模拟 T1 题意的时候教练让 zsy 澄清了一下,确定了我不会做... T2 一看就是毒瘤题,T3 感觉比较可做 T4 确定了回滚的细节, ...
- 测试开发【提测平台】分享9-DBUntils优化数据连接&实现应用搜索和分页功能
微信搜索[大奇测试开],关注这个坚持分享测试开发干货的家伙. 从本期开始知识点讲以思维导图的形式给出,内容点会按照讲解-应用-展示的形式体现,这样会更清晰些. DBUntils连接池 在项目中链接数据 ...
- Nginx:多项目开发配置跨域代理
简述Nginx应用场景(前后端) 当我们开发 vue 项目中可以通过 proxyTable 进行跨域,但如果是原生的 html+css+js ,或者其他没有跨域插件的项目中,想要跨域就要引入配置许多的 ...
- elementui table的新增,编辑和删除
\ 新增 this.tableData.unshift(data); 编辑 this.$set(this.tableData,data.index,data); 删除 rows.splice(inde ...
- RIAD配置
一.RIAD 磁盘阵列介绍 二.阵列卡介绍 三.案例举例 一.RAID磁盘阵列介绍 是Redundant Array of Independent Disks的缩写,中文简称为独立冗余磁盘阵列 把 ...
- ysoserial exploit/JRMPClient
ysoserial exploit/JRMPClient 上一篇文章讲到,当服务器反序列化payloads/JRMPListener,即会开启端口监听.再使用exploit/JRMPClient模块发 ...
- 获取docker镜像的tag列表
已nginx为例 命令如下 wget -q https://registry.hub.docker.com/v1/repositories/nginx/tags -O - | sed -e 's/[] ...
- 记录一次C语言中free(p)失败
首先介绍一下自己的程序出错的原因,然后总结一下什么时候free会失败. 1.程序伪代码 // 已知payload已经指向一部分内存数据 char * payload; int payload_len; ...