Hive数据导入与导出

数据导入

● 本地文件导入

-- 本地文件导入(local)

LOAD DATA local INPATH '/home/hadoop/sourceA.txt' INTO TABLE testA PARTITION(create_time='2015-07-08');

● HDFS文件导入

-- HDFS文件导入(非local)

LOAD DATA INPATH '/home/hadoop/sourceA.txt' INTO TABLE testA PARTITION(create_time='2015-07-08');

● 表数据导入

-- 从表数据导入

INSERT INTO TABLE testA PARTITION(create_time='2015-07-11') select id, name, area from testB where id = 1;

● 创建表的时候导入

-- 创建表的时候导入

create table testC as select name, code from testB;

数据导出

● 数据导出到本地

-- 导出到本地

INSERT OVERWRITE LOCAL DIRECTORY '/opt/data/zj/output' ROW FORMAT DELIMITED FIELDS TERMINATED by '\t' select * from testA;

● 数据导出到HDFS文件系统

-- 导出到HDFS

INSERT OVERWRITE DIRECTORY '/home/hadoop/output' select * from testA;

Sqoop完成hive数据导入与导出

数据导入

# 数据需要先迁移到HDFS，然后再加载到Hive表中

/opt/module/sqoop-1.4.6/bin/sqoop import \

-Dmapreduce.job.queuename=hive \

--connect "jdbc:mysql://shendu-data003:3306/sddw?useUnicode=true&characterEncoding=utf-8" \

--username root \

--password dizewei \

--driver com.mysql.jdbc.Driver \

--target-dir /sddw_origin_data/db/ods_main_acceptance_info/ \

--delete-target-dir \

--query "select * from acceptance where 1 = 1 and \$CONDITIONS" \

--num-mappers 1 \

--hive-drop-import-delims \

--fields-terminated-by '\t' \

--null-string '\\N' \

--null-non-string '\\N'

# hive中执行加载语句

load data inpath '/sddw_origin_data/db/ods_main_acceptance_info' OVERWRITE into table sddw.ods_main_acceptance_info;

MySQL数据全表字段导入

# 数据直接迁移到Hive中，如果表不存在则会自动按照Mysql字段信息进行创建

/opt/module/sqoop-1.4.6/bin/sqoop import \

--connect "jdbc:mysql://shendu-data003:3306/sddw?useUnicode=true&characterEncoding=utf-8" \

--username root \

--password dizewei \

--driver com.mysql.jdbc.Driver \

--table acceptance \

--fields-terminated-by '\t' \

--delete-target-dir \

--num-mappers 1 \

--hive-import \

--hive-database sddw \

--hive-table ods_main_acceptance_info \

--null-string '\\N' \

--null-non-string '\\N'

数据导出

/opt/module/sqoop-1.4.6/bin/sqoop export \

-Dmapreduce.job.queuename=hive \

--connect "jdbc:mysql://shendu-data003:3306/sddw?useUnicode=true&characterEncoding=utf-8" \

--username root \

--password dizewei \

--table ads_first_order_count \

--export-dir /sddw/ads/ads_first_order_count \ # 指定hive数据存储位置

--input-fields-terminated-by '\t' \

--num-mappers 1 \

--input-null-string '\\N' \

--input-null-non-string '\\N'

# 导入带有自增长ID序列的数据表

/opt/module/sqoop-1.4.6/bin/sqoop export \

-Dmapreduce.job.queuename=hive \

--connect "jdbc:mysql://106.13.27.33:3306/nightwatch?useUnicode=true&characterEncoding=utf-8" \

--username root \

--password Houkai17245 \

--table order_info \

--export-dir /sddw/ads/ads_seller_order_info_month \

--input-fields-terminated-by '\t' \

--num-mappers 1 \

--input-null-string '\\N' \

--input-null-non-string '\\N' \

--columns "code,order_time,amount,projectName,payerName,create_time,company_id"

注意：

Sqoop数据迁移的时候，会发现一些Null值得问题，在Mysql底层null值得存储为“NULL”或者“null”，但是在Hive中存储格式为“/N”，使用如下命令可以解决这个问题：

--input-null-string '\\N' \

--input-null-non-string '\\N'