hive创建分区表

#创建分区表
CREATE TABLE if not exists data_center.test_partition (
id int,
name string,
age int
)
PARTITIONED BY (date_id string)
row format delimited fields terminated by ','
stored as textfile
#
LOCATION
'hdfs://master:9000/user/hive/warehouse/data_center.db/test_table';

#添加分区
alter table tmp.soon_tbl_address add if not exists partition(date_id='2017-06-06') location 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-06'
alter table tmp.soon_tbl_address add if not exists partition(date_id='2017-06-07') location 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07'
alter table tmp.soon_tbl_address add if not exists partition(date_id='2017-06-08') location 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-08'

#插入数据操作：
set hive.exec.compress.output=true;
set mapred.output.compress=true;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
set io.compression.codecs=org.apache.hadoop.io.compress.GzipCodec;
SET mapred.output.compression.type=BLOCK;
insert overwrite table seqfile_table select * from textfile_table;

name:pass_address_id,type:string;name:order_id,type:string;name:address_type,type:string;name:receiver,type:string;name:receiver_tel,type:string;name:zip_code,type:string;name:province,type:string;name:city,type:string;name:district,type:string;name:address,type:string;name:x_coordinate,type:string;name:y_coordinate,type:string;name:del_flg,type:string;name:create_time,type:string;name:create_user,type:string;name:update_time,type:string;name:update_user,type:string;name:address_order,type:string;name:midway_order_status,type:string;name:street,type:string;name:order_type,type:string;name:out_order_id,type:string;name:poi_title,type:string

复制表结构
create table tmp.soon_tbl_address like select * from default.soon_tbl_address;

#拷贝数据：从default库soon_tbl_address到tmp库soon_tbl_address
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-05') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 3
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-06') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 6
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-07') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 9
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-08') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 12

pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update,date_id

/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07
hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-02-08/000000_0

#删除分区数据
ALTER TABLE soon_tbl_address DROP PARTITION (date_id='2017-06-07');

#删除表中数据，保留表
insert overwrite table tmp.soon_tbl_address select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address where 1=0
truncate table tmp.soon_tbl_address

#hadoop查看文件
[ds@master ~]$ hadoop fs -ls /user/hive/warehouse/tmp.db/soon_tbl_address
17/06/09 16:02:20 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Found 5 items
drwxr-xr-x - ds supergroup 0 2017-06-09 11:07 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-06
drwxr-xr-x - ds supergroup 0 2017-06-09 11:08 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07
drwxr-xr-x - ds supergroup 0 2017-06-09 14:23 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2
drwxr-xr-x - ds supergroup 0 2017-06-09 14:56 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__b6a456c4_73b6_4fec_8dfa_ddbe38f55856
drwxr-xr-x - ds supergroup 0 2017-06-09 11:08 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-08

#hadoop删除文件及递归文件
[ds@master ~]$ hadoop fs -rmr hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2
rmr: DEPRECATED: Please use 'rm -r' instead.
17/06/09 16:03:24 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
17/06/09 16:03:25 INFO fs.TrashPolicyDefault: Moved: 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2' to trash at: hdfs://master:9000/user/ds/.Trash/Current/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2
[ds@master ~]$

#导出数据到本地
INSERT OVERWRITE LOCAL DIRECTORY '/home/users/my' SELECT * FROM tmp.soon_tbl_address

#linux上本地.txt，导入到表某个分区
load data LOCAL INPATH '/home/users/my/test_table.txt' OVERWRITE into table data_center.test_table PARTITION(date='2017-06-07')
load data LOCAL INPATH '/home/users/my/test_partition.txt' OVERWRITE into table tmp.temp_test_partition PARTITION(date='2017-06-13')

#手动创建表
CREATE TABLE IF NOT EXISTS data_center.test_no_partition(
id int ,
name string ,
age int
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
stored as textfile;

创建空表
CREATE TABLE data_center.test_no_partition LIKE tmp.test_no_partition;

#修改表字段名称、类型
ALTER TABLE test_no_partition CHANGE ip id int;

#删除表及数据
drop table test_no_partition;

#删除表数据，表结构还在
delete from test_no_partition;
insert overwrite table test_no_partition select id,age,name from test_no_partition where id=3;

#给hive库加权限
hdfs dfs -chmod 777 hdfs://master:9000/user/hive/warehouse/tmp.db

#hadoop文件夹重命名
hadoop fs -mv hdfs://master:9000/user/hive/warehouse/ods.db/test_table__cb086c0f_88ee_4623_938c_311a1e717c8a hdfs://master:9000/user/hive/warehouse/ods.db/test_table

CREATE TABLE ods.test_table_tmp(
id int,
name_path string,
parent_path string
)
row format delimited fields terminated by ','
stored as textfile
#路径可以不要
LOCATION
'hdfs://master:9000/user/hive/warehouse/ods.db/test_table_tmp'

hdfs://master:9000/user/hive/warehouse/ods.db/test_table/000000_0__c2175f22_ec6f_4641_a17d_fdc37084713a

#导出到本地文件
#执行导出本地文件命令:
insert overwrite local directory '/home/ds/user/my' select * from ods.test_table;

#查看hdfs文件内容
hdfs dfs -cat hdfs://master:9000/user/hive/warehouse/ods.db/test_table/000000_0__c2175f22_ec6f_4641_a17d_fdc37084713a

CREATE TABLE `tmp.temp_test_partition`(
`id` int,
`name` string,
`age` int,
`date_id` string)
row format delimited fields terminated by ','
stored as textfile

CREATE TABLE `data_center.test_partition`(
`id` int,
`name` string,
`age` int)
PARTITIONED BY ( `date_id` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES ( 'field.delim'=',', 'serialization.format'=',')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'hdfs://master:9000/user/hive/warehouse/data_center.db/test_partition'
TBLPROPERTIES ( 'transient_lastDdlTime'='1497492633')

#手动创建分区表
CREATE TABLE if not exists data_center.test_partition (
id int,
name string,
age int
)
PARTITIONED BY (date string)
row format delimited fields terminated by ','
stored as textfile

#修改分区
ALTER TABLE table_name PARTITION (dt='2008-08-08') SET LOCATION "new location";
ALTER TABLE data_center.test_partition PARTITION (date_id='2008-08-08') RENAME TO PARTITION (dt='20080808');

set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=1000;
set mapreduce.reduce.shuffle.input.buffer.percent=0.5;
insert overwrite table data_center.test_partition partition (date_id) select * from data_center.test_partition where name in ('ccc','lisi')

#分区增量更新
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=1000;
set mapreduce.reduce.shuffle.input.buffer.percent=0.5;
insert overwrite table data_center.test_partition partition(date_id)
select tmp.id,tmp.name,tmp.age,tmp.date_id from tmp.temp_test_partition tmp
union all select a.* from data_center.test_partition a
left outer join tmp.temp_test_partition b on a.id=b.id where b.id is null and a.date_id in ('2017-06-14','2017-06-15','2017-06-16')

insert overwrite table data_center.
/home/ds/users/wcy/mysql_to_hdfs_update.txt

CREATE TABLE zjs_cust_endurance_time_copy (
id int,
computer_time string ,
user_city_code string ,
avg_patient_time double,
update_time date
)
row format delimited fields terminated by ','
stored as textfile

#从本地加载txt数据到表中
load data LOCAL INPATH '/home/ds/users/wcy/mysql_to_hdfs_update.txt'
OVERWRITE into table data_center.zjs_cust_endurance_time_copy
hdfs目标表中已有数据：
106 2017-06-21 120001 1062.22 2017-05-27
107 2017-06-21 310001 1387.85 2017-05-27
108 2017-06-21 111111 100.0 2017-05-27

mysql中抽取数据：
106 2017-04-01 120000 2062.22 2017-04-27
107 2017-04-01 310000 2387.85 2017-04-27

执行后hdfs目标表中数据：
108 2017-06-21 111111 100.0 2017-05-27
106 2017-04-01 120000 2062.22 2017-04-27
107 2017-04-01 310000 2387.85 2017-04-27

#表结果中没有id列，后手动添加id
CREATE TABLE zjs_cust_endurance_time_copy (
computer_time string ,
user_city_code string ,
avg_patient_time double,
update_time date,
id int
)
row format delimited fields terminated by ','
stored as textfile

hdfs目标表中已有数据：
2017-06-21 120001 1062.22 2017-05-27 106
2017-06-21 310001 1387.85 2017-05-27 107
2017-06-21 111111 100.0 2017-05-27 108

mysql表中抽数据：
2017-04-01 120000 2062.22 2017-04-27 06:46:30.0 106
2017-04-01 310000 2387.85 2017-04-27 06:46:30.0 107
2017-04-01 320100 2027.64 2017-04-27 06:46:30.0 108
2017-04-01 330100 3763.16 2017-04-27 06:46:30.0 109

执行后hdfs目标表中数据：
2017-04-01 120000 2062.22 2017-04-27 106
2017-04-01 310000 2387.85 2017-04-27 107
2017-04-01 320100 2027.64 2017-04-27 108
2017-04-01 330100 3763.16 2017-04-27 109

#从本地加载txt数据到分区表中
CREATE TABLE data_center.zjs_cust_endurance_time_copy_part (
id int,
computer_time string ,
user_city_code string ,
avg_patient_time double,
update_time date
)
PARTITIONED BY (date string)
row format delimited fields terminated by ','
stored as textfile

load data LOCAL INPATH '/home/ds/users/wcy/mysql_to_hdfs_partupdate.txt'
OVERWRITE into table data_center.zjs_cust_endurance_time_copy_part PARTITION(date='2017-06-21')

hdfs目标表中已有数据：
select * from data_center.zjs_cust_endurance_time_copy_part;
108 2017-06-21 111111 100.0 2017-05-27 2017-06-21
106 2017-04-01 222222 99.0 2017-04-27 2017-06-21

mysql中抽取数据：
106 2017-04-01 120000 2062.22 2017-04-27 06:46:30.0
107 2017-04-01 310000 2387.85 2017-04-27 06:46:30.0
108 2017-04-01 320100 2027.64 2017-04-27 06:46:30.0

insert overwrite table data_center.zjs_cust_endurance_time_copy_part partition(date)
select tmp.id,tmp.computer_time,tmp.user_city_code,tmp.avg_patient_time,tmp.update_time,tmp.date
from tmp.temp_zjs_cust_endurance_time_copy_part tmp
union all select a.* from data_center.zjs_cust_endurance_time_copy_part a
left outer join tmp.temp_zjs_cust_endurance_time_copy_part b
on a.id=b.id where b.id is null

#hdfs中文件数据下载到本地
hadoop fs -get 源数据路径本地文件夹路径
hadoop fs -get hdfs://SAD-HDP-003:9000/user/hive/warehouse/data_center.db/word_category_weights/000000_0 /home/ds/mydata
hadoop fs -get hdfs://SAD-HDP-003:9000/user/hive/warehouse/data_center.db/zjs_user_result/000000_0 /home/ds/mydata/zjs_user_result
下载文件
sz /home/ds/mydata/000000_0

create table data_center.word_category_weights(
keyword string,
category_id string,
weight_auto double,
category_name string,
del int
)
row format delimited fields terminated by ','
stored as textfile;

load data LOCAL INPATH '/home/users/wcy/000000_0'
OVERWRITE into table data_center.word_category_weights

load data LOCAL INPATH '/home/users/wcy/000000_0_more_data'
OVERWRITE into table data_center.word_category_weights

load data LOCAL INPATH '/home/users/wcy/zjs_user_result' OVERWRITE into table data_center.zjs_user_result

load data LOCAL INPATH '/home/users/wcy/goodsinfo' OVERWRITE into table data_center.goodsinfo

#启动hive元数据服务进程
hive --service metastore &
hive --service hiveserver2 &

UnstructuredStorageReaderUtil - CsvReader使用默认值[{"captureRawRecord":true,"columnCount":0,"comment":"#","currentRecord":-1,"delimiter":",","escapeMode":1,"headerCount":0,"rawRecord":"","recordDelimiter":"\u0000","safetySwitch":true,"skipEmptyRecords":true,"textQualifier":"\"","trimWhitespace":true,"useComments":false,"useTextQualifier":true,"values":[]}],csvReaderConfig值为[null]
2017-06-26 16:39:24.963 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
2017-06-26 16:39:44.629 [job-0] INFO StandAloneJobContainerCommunicator - Total 2560 records, 45917 bytes | Speed 4.48KB/s, 256 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.006s | All Task WaitReaderTime 0.138s | Percentage 0.00%
2017-06-26 16:39:45.393 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
2017-06-26 16:39:53.884 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
2017-06-26 16:39:54.630 [job-0] INFO StandAloneJobContainerCommunicator - Total 4608 records, 81924 bytes | Speed 3.52KB/s, 204 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 11.211s | All Task WaitReaderT

hadoop fs -rm hdfs://SAD-HDP-003:9000/user/hive/warehouse/ods.db/soon_tbl_address_part/.hive-staging_hive_2017-06-23_18-23-08_102_5432451293100937443-1

CREATE TABLE goodsinfo(
create_time string,
deal_code bigint ,
store_code bigint,
category_code1 bigint,
category_code2 bigint,
category_code3 bigint,
category_code4 bigint,
goods_code bigint,
term_code int,
term_name string,
store_name string,
goods_name string,
deal_name string,
goods_image string,
goods_pv int,
goods_uv int,
goods_pv_total int,
time_total int,
jump2_pv int,
collect_total int,
add_total int,
pay_total int,
pay_amount decimal(10,2))
row format delimited fields terminated by ','
stored as textfile;

CREATE TABLE zjs_user_result_1(
compute_day string,
member_id string,
mobile string,
uc_user_id String,
real_name string,
register_time string,
order_city_code string,
city_name string,
manage_amount double,
deduction double,
freight double,
actual_paid double,
insurance_amount double,
complete_orders String,
deduction_all double,
freight_all double,
complete_orders_all String,
last_complete_time string,
order_time string,
cancel_order_before String,
cancel_order_after String,
order_nums String,
invite_peoples String,
invite_peoples_all String,
share_orders String,
share_orders_all String)
row format delimited fields terminated by ','
stored as textfile;

hive创建分区表的更多相关文章

一起学Hive——创建内部表、外部表、分区表和分桶表及导入数据
Hive本身并不存储数据,而是将数据存储在Hadoop的HDFS中,表名对应HDFS中的目录/文件.根据数据的不同存储方式,将Hive表分为外部表.内部表.分区表和分桶表四种数据模型.每种数据模型各有 ...
hive创建表
一.为什么要创建分区表 1.select查询中会扫描整个表内容,会消耗大量时间.由于相当多的时候人们只关心表中的一部分数据, 故建表时引入了分区概念. 2.hive分区表:是指在创建表时指定的part ...
创建分区表和查看分区表的Metadata
未分区的表,只能存储在一个FileGroup中:对table进行分区后,每一个分区都存储在一个FileGroup中.表分区是将逻辑上一个完整的表,按照特定的字段拆分成Partition set,分散到 ...
【转】图解Sql2005创建分区表的全过程
第一.创建分区表的第一步,先创建数据库文件组,但这一步可以省略,因为你可以直接使用PRIMARY文件.但我个人认为,为了方便管理,还是可以先创建几个文件组,这样可以将不同的小表放在不同的文件组里,既便 ...
SQL Server 2005中的分区表（一）：什么是分区表？为什么要用分区表？如何创建分区表？(转)
如果你的数据库中某一个表中的数据满足以下几个条件,那么你就要考虑创建分区表了. 1.数据库中某个表中的数据很多.很多是什么概念?一万条?两万条?还是十万条.一百万条?这个,我觉得是仁者见仁.智者见 ...
[原创]PostgreSQL Plus Advanced Server批量创建分区表写入亿级别数据实例
当前情况:大表的数据量已接近2亿条我的解决思路:为它创建n*100个分区表,将各个分区表放在不同的tablespace上这样做的优点:1.首先是对这个级别的数据表的性能会有所提升2.数据管理更科学3. ...
hive创建表带中文注释报错解决方法
hive创建带有中文注释的表报错: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask ...
hive 创建表和导入数据实例
//创建数据库create datebase hive;//创建表create table t_emp(id int,name string,age int,dept_name string,like ...
hive新建分区表
hive新建分区表语句如下: create table table_name (col1_name string comment '备注1', col2_name string comment '备注 ...

随机推荐

Python web 项目的依赖管理工具
Poetry可以帮助你声明.管理和安装Python项目的依赖项,确保你可以在任何地方都拥有正确的堆栈. Poetry支持Python 2.7 和Python 3以上安装 Poetry提供了一个自定义 ...
"UICollectionView实现带头视图和组的头视图同时存在"实现
实现效果如下: 以前做这效果的界面,总是实现的是section的头视图,因为我一直觉得collectionView是不像UITableView那样有tableHeaderView的,所以每次实现只能是 ...
Hibernate5+Spring4整合
(1) pom.xml  <dependency> <groupId>org.spr ...
关于ckeditor在IE下出现不兼容的问题
今天在用ckeditor时在ie下测试出现了不兼容问题,样式,字体等属性设置不了. 后来在html标签上方添加了: <!DOCTYPE html PUBLIC "-//W3C//DTD ...
【HANA系列】SAP HANA中null变成问号的问题
公众号:SAP Technical 本文作者:matinal 原文出处:http://www.cnblogs.com/SAPmatinal/ 原文链接:[HANA系列]SAP HANA中null变成问 ...
业务型代码常用的SQL汇总（随时更新）
做了一年的业务代码开发,记录并分享一下自己平时在项目中遇到的比较好用的sql 1.查询表中是否某一字段下的数据有重复数据(以ID为例) SELECT id FROM 表名GROUP BY ID HAV ...
Linux删除命令rm
在用Linux的时候,有时分要删除一个文件夹,常常会提示次此文件非空,没法删除,这个时分,必需运用rm -rf命令.关于一些小白用户常常在运用Linux命令,会十分当心,以免搞出一些事情,下面小编将教 ...
字符串——AC自动机
目录一.前言二.思路三.代码四.参考资料一.前言以前一直没学AC自动机,主要是被名字吓到了,自动AC,这么强的名字肯定很难,学了后才发现,其实不难. AC自动机并不是Acept autom ...
【神经网络与深度学习】YOLO windows 配置《Darknet配置》
作者配置时的环境 visual studio 2013 显卡 GTX 960M CUDA 7.5 OpenCV 2.4.9 pthreadpthread 下载地址 YOLO官网 [http:// ...
NoSQL--couchdb
Couchdb CouchDB是Apache组织发布的一款开源的.面向文档类型的NoSQL数据库.由Erlang编写,使用json格式保存数据.CouchDB以RESTful的格式提供服务可以很方便的 ...

hive创建分区表

hive创建分区表的更多相关文章

随机推荐

热门专题