#创建分区表
CREATE TABLE if not exists data_center.test_partition (
id int,
name string,
age int
)
PARTITIONED BY (date_id string)
row format delimited fields terminated by ','
stored as textfile
#
LOCATION
'hdfs://master:9000/user/hive/warehouse/data_center.db/test_table';

#添加分区
alter table tmp.soon_tbl_address add if not exists partition(date_id='2017-06-06') location 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-06'
alter table tmp.soon_tbl_address add if not exists partition(date_id='2017-06-07') location 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07'
alter table tmp.soon_tbl_address add if not exists partition(date_id='2017-06-08') location 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-08'

#插入数据操作:
set hive.exec.compress.output=true; 
set mapred.output.compress=true; 
set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec; 
set io.compression.codecs=org.apache.hadoop.io.compress.GzipCodec; 
SET mapred.output.compression.type=BLOCK;
insert overwrite table seqfile_table select * from textfile_table;

name:pass_address_id,type:string;name:order_id,type:string;name:address_type,type:string;name:receiver,type:string;name:receiver_tel,type:string;name:zip_code,type:string;name:province,type:string;name:city,type:string;name:district,type:string;name:address,type:string;name:x_coordinate,type:string;name:y_coordinate,type:string;name:del_flg,type:string;name:create_time,type:string;name:create_user,type:string;name:update_time,type:string;name:update_user,type:string;name:address_order,type:string;name:midway_order_status,type:string;name:street,type:string;name:order_type,type:string;name:out_order_id,type:string;name:poi_title,type:string

复制表结构
create table tmp.soon_tbl_address like select * from default.soon_tbl_address;

#拷贝数据:从default库soon_tbl_address到tmp库soon_tbl_address
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-05') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 3
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-06') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 6
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-07') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 9
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-08') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 12

pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update,date_id

/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07
hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-02-08/000000_0

#删除分区数据
ALTER TABLE soon_tbl_address DROP PARTITION (date_id='2017-06-07');

#删除表中数据,保留表
insert overwrite table tmp.soon_tbl_address select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address where 1=0
truncate table tmp.soon_tbl_address

#hadoop查看文件
[ds@master ~]$ hadoop fs -ls /user/hive/warehouse/tmp.db/soon_tbl_address
17/06/09 16:02:20 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Found 5 items
drwxr-xr-x - ds supergroup 0 2017-06-09 11:07 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-06
drwxr-xr-x - ds supergroup 0 2017-06-09 11:08 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07
drwxr-xr-x - ds supergroup 0 2017-06-09 14:23 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2
drwxr-xr-x - ds supergroup 0 2017-06-09 14:56 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__b6a456c4_73b6_4fec_8dfa_ddbe38f55856
drwxr-xr-x - ds supergroup 0 2017-06-09 11:08 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-08

#hadoop删除文件及递归文件
[ds@master ~]$ hadoop fs -rmr hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2
rmr: DEPRECATED: Please use 'rm -r' instead.
17/06/09 16:03:24 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
17/06/09 16:03:25 INFO fs.TrashPolicyDefault: Moved: 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2' to trash at: hdfs://master:9000/user/ds/.Trash/Current/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2
[ds@master ~]$

#导出数据到本地
INSERT OVERWRITE LOCAL DIRECTORY '/home/users/my' SELECT * FROM tmp.soon_tbl_address

#linux上本地.txt,导入到表某个分区
load data LOCAL INPATH '/home/users/my/test_table.txt' OVERWRITE into table data_center.test_table PARTITION(date='2017-06-07')
load data LOCAL INPATH '/home/users/my/test_partition.txt' OVERWRITE into table tmp.temp_test_partition PARTITION(date='2017-06-13')

#手动创建表
CREATE TABLE IF NOT EXISTS data_center.test_no_partition( 
id int , 
name string , 
age int

ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 
stored as textfile;

创建空表
CREATE TABLE data_center.test_no_partition LIKE tmp.test_no_partition;

#修改表字段名称、类型
ALTER TABLE test_no_partition CHANGE ip id int;

#删除表及数据
drop table test_no_partition;

#删除表数据,表结构还在
delete from test_no_partition;
insert overwrite table test_no_partition select id,age,name from test_no_partition where id=3;

#给hive库加权限
hdfs dfs -chmod 777 hdfs://master:9000/user/hive/warehouse/tmp.db

#hadoop文件夹重命名
hadoop fs -mv hdfs://master:9000/user/hive/warehouse/ods.db/test_table__cb086c0f_88ee_4623_938c_311a1e717c8a hdfs://master:9000/user/hive/warehouse/ods.db/test_table

CREATE TABLE ods.test_table_tmp(
id int,
name_path string,
parent_path string
)
row format delimited fields terminated by ','
stored as textfile
#路径可以不要
LOCATION
'hdfs://master:9000/user/hive/warehouse/ods.db/test_table_tmp'

hdfs://master:9000/user/hive/warehouse/ods.db/test_table/000000_0__c2175f22_ec6f_4641_a17d_fdc37084713a

#导出到本地文件
#执行导出本地文件命令:
insert overwrite local directory '/home/ds/user/my' select * from ods.test_table;

#查看hdfs文件内容
hdfs dfs -cat hdfs://master:9000/user/hive/warehouse/ods.db/test_table/000000_0__c2175f22_ec6f_4641_a17d_fdc37084713a

CREATE TABLE `tmp.temp_test_partition`(
`id` int, 
`name` string, 
`age` int, 
`date_id` string)
row format delimited fields terminated by ','
stored as textfile

CREATE TABLE `data_center.test_partition`( 
`id` int, 
`name` string, 
`age` int) 
PARTITIONED BY ( `date_id` string) 
ROW FORMAT SERDE 
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
WITH SERDEPROPERTIES ( 'field.delim'=',', 'serialization.format'=',') 
STORED AS INPUTFORMAT 
'org.apache.hadoop.mapred.TextInputFormat' 
OUTPUTFORMAT 
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' 
LOCATION 
'hdfs://master:9000/user/hive/warehouse/data_center.db/test_partition' 
TBLPROPERTIES ( 'transient_lastDdlTime'='1497492633')

#手动创建分区表
CREATE TABLE if not exists data_center.test_partition (
id int,
name string,
age int
)
PARTITIONED BY (date string)
row format delimited fields terminated by ','
stored as textfile

#修改分区
ALTER TABLE table_name PARTITION (dt='2008-08-08') SET LOCATION "new location";
ALTER TABLE data_center.test_partition PARTITION (date_id='2008-08-08') RENAME TO PARTITION (dt='20080808');

set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=1000;
set mapreduce.reduce.shuffle.input.buffer.percent=0.5;
insert overwrite table data_center.test_partition partition (date_id) select * from data_center.test_partition where name in ('ccc','lisi')

#分区增量更新
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=1000;
set mapreduce.reduce.shuffle.input.buffer.percent=0.5;
insert overwrite table data_center.test_partition partition(date_id) 
select tmp.id,tmp.name,tmp.age,tmp.date_id from tmp.temp_test_partition tmp 
union all select a.* from data_center.test_partition a
left outer join tmp.temp_test_partition b on a.id=b.id where b.id is null and a.date_id in ('2017-06-14','2017-06-15','2017-06-16')

insert overwrite table data_center.
/home/ds/users/wcy/mysql_to_hdfs_update.txt

CREATE TABLE zjs_cust_endurance_time_copy (
id int,
computer_time string ,
user_city_code string ,
avg_patient_time double,
update_time date
)
row format delimited fields terminated by ','
stored as textfile

#从本地加载txt数据到表中
load data LOCAL INPATH '/home/ds/users/wcy/mysql_to_hdfs_update.txt' 
OVERWRITE into table data_center.zjs_cust_endurance_time_copy
hdfs目标表中已有数据:
106 2017-06-21 120001 1062.22 2017-05-27
107 2017-06-21 310001 1387.85 2017-05-27
108 2017-06-21 111111 100.0 2017-05-27

mysql中抽取数据:
106 2017-04-01 120000 2062.22 2017-04-27
107 2017-04-01 310000 2387.85 2017-04-27

执行后hdfs目标表中数据:
108 2017-06-21 111111 100.0 2017-05-27
106 2017-04-01 120000 2062.22 2017-04-27
107 2017-04-01 310000 2387.85 2017-04-27

#表结果中没有id列,后手动添加id
CREATE TABLE zjs_cust_endurance_time_copy (
computer_time string ,
user_city_code string ,
avg_patient_time double,
update_time date,
id int 
)
row format delimited fields terminated by ','
stored as textfile

hdfs目标表中已有数据:
2017-06-21 120001 1062.22 2017-05-27 106
2017-06-21 310001 1387.85 2017-05-27 107
2017-06-21 111111 100.0 2017-05-27 108

mysql表中抽数据:
2017-04-01 120000 2062.22 2017-04-27 06:46:30.0 106 
2017-04-01 310000 2387.85 2017-04-27 06:46:30.0 107 
2017-04-01 320100 2027.64 2017-04-27 06:46:30.0 108 
2017-04-01 330100 3763.16 2017-04-27 06:46:30.0 109

执行后hdfs目标表中数据:
2017-04-01 120000 2062.22 2017-04-27 106
2017-04-01 310000 2387.85 2017-04-27 107
2017-04-01 320100 2027.64 2017-04-27 108
2017-04-01 330100 3763.16 2017-04-27 109

#从本地加载txt数据到分区表中
CREATE TABLE data_center.zjs_cust_endurance_time_copy_part (
id int,
computer_time string ,
user_city_code string ,
avg_patient_time double,
update_time date
)
PARTITIONED BY (date string)
row format delimited fields terminated by ','
stored as textfile

load data LOCAL INPATH '/home/ds/users/wcy/mysql_to_hdfs_partupdate.txt' 
OVERWRITE into table data_center.zjs_cust_endurance_time_copy_part PARTITION(date='2017-06-21')

hdfs目标表中已有数据:
select * from data_center.zjs_cust_endurance_time_copy_part;
108 2017-06-21 111111 100.0 2017-05-27 2017-06-21
106 2017-04-01 222222 99.0 2017-04-27 2017-06-21

mysql中抽取数据:
106 2017-04-01 120000 2062.22 2017-04-27 06:46:30.0
107 2017-04-01 310000 2387.85 2017-04-27 06:46:30.0
108 2017-04-01 320100 2027.64 2017-04-27 06:46:30.0

insert overwrite table data_center.zjs_cust_endurance_time_copy_part partition(date) 
select tmp.id,tmp.computer_time,tmp.user_city_code,tmp.avg_patient_time,tmp.update_time,tmp.date 
from tmp.temp_zjs_cust_endurance_time_copy_part tmp 
union all select a.* from data_center.zjs_cust_endurance_time_copy_part a 
left outer join tmp.temp_zjs_cust_endurance_time_copy_part b 
on a.id=b.id where b.id is null

#hdfs中文件数据下载到本地
hadoop fs -get 源数据路径 本地文件夹路径
hadoop fs -get hdfs://SAD-HDP-003:9000/user/hive/warehouse/data_center.db/word_category_weights/000000_0 /home/ds/mydata
hadoop fs -get hdfs://SAD-HDP-003:9000/user/hive/warehouse/data_center.db/zjs_user_result/000000_0 /home/ds/mydata/zjs_user_result
下载文件
sz /home/ds/mydata/000000_0

create table data_center.word_category_weights(
keyword string,
category_id string,
weight_auto double,
category_name string,
del int
)
row format delimited fields terminated by ','
stored as textfile;

load data LOCAL INPATH '/home/users/wcy/000000_0' 
OVERWRITE into table data_center.word_category_weights

load data LOCAL INPATH '/home/users/wcy/000000_0_more_data' 
OVERWRITE into table data_center.word_category_weights

load data LOCAL INPATH '/home/users/wcy/zjs_user_result' OVERWRITE into table data_center.zjs_user_result

load data LOCAL INPATH '/home/users/wcy/goodsinfo' OVERWRITE into table data_center.goodsinfo

#启动hive元数据服务进程
hive --service metastore &
hive --service hiveserver2 &

UnstructuredStorageReaderUtil - CsvReader使用默认值[{"captureRawRecord":true,"columnCount":0,"comment":"#","currentRecord":-1,"delimiter":",","escapeMode":1,"headerCount":0,"rawRecord":"","recordDelimiter":"\u0000","safetySwitch":true,"skipEmptyRecords":true,"textQualifier":"\"","trimWhitespace":true,"useComments":false,"useTextQualifier":true,"values":[]}],csvReaderConfig值为[null]
2017-06-26 16:39:24.963 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
2017-06-26 16:39:44.629 [job-0] INFO StandAloneJobContainerCommunicator - Total 2560 records, 45917 bytes | Speed 4.48KB/s, 256 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.006s | All Task WaitReaderTime 0.138s | Percentage 0.00%
2017-06-26 16:39:45.393 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
2017-06-26 16:39:53.884 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
2017-06-26 16:39:54.630 [job-0] INFO StandAloneJobContainerCommunicator - Total 4608 records, 81924 bytes | Speed 3.52KB/s, 204 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 11.211s | All Task WaitReaderT

UnstructuredStorageReaderUtil - CsvReader使用默认值[{"captureRawRecord":true,"columnCount":0,"comment":"#","currentRecord":-1,"delimiter":",","escapeMode":1,"headerCount":0,"rawRecord":"","recordDelimiter":"\u0000","safetySwitch":true,"skipEmptyRecords":true,"textQualifier":"\"","trimWhitespace":true,"useComments":false,"useTextQualifier":true,"values":[]}],csvReaderConfig值为[null]
2017-06-27 10:37:49.949 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command

hadoop fs -rm hdfs://SAD-HDP-003:9000/user/hive/warehouse/ods.db/soon_tbl_address_part/.hive-staging_hive_2017-06-23_18-23-08_102_5432451293100937443-1

CREATE TABLE goodsinfo(
create_time string,
deal_code bigint , 
store_code bigint, 
category_code1 bigint, 
category_code2 bigint, 
category_code3 bigint, 
category_code4 bigint, 
goods_code bigint, 
term_code int, 
term_name string, 
store_name string, 
goods_name string, 
deal_name string, 
goods_image string, 
goods_pv int, 
goods_uv int, 
goods_pv_total int, 
time_total int, 
jump2_pv int, 
collect_total int, 
add_total int, 
pay_total int, 
pay_amount decimal(10,2))
row format delimited fields terminated by ','
stored as textfile;

CREATE TABLE zjs_user_result_1(
compute_day string, 
member_id string, 
mobile string, 
uc_user_id String, 
real_name string, 
register_time string, 
order_city_code string, 
city_name string, 
manage_amount double, 
deduction double, 
freight double, 
actual_paid double, 
insurance_amount double, 
complete_orders String, 
deduction_all double, 
freight_all double, 
complete_orders_all String, 
last_complete_time string, 
order_time string, 
cancel_order_before String, 
cancel_order_after String, 
order_nums String, 
invite_peoples String, 
invite_peoples_all String, 
share_orders String, 
share_orders_all String)
row format delimited fields terminated by ','
stored as textfile;

hive创建分区表的更多相关文章

  1. 一起学Hive——创建内部表、外部表、分区表和分桶表及导入数据

    Hive本身并不存储数据,而是将数据存储在Hadoop的HDFS中,表名对应HDFS中的目录/文件.根据数据的不同存储方式,将Hive表分为外部表.内部表.分区表和分桶表四种数据模型.每种数据模型各有 ...

  2. hive创建表

    一.为什么要创建分区表 1.select查询中会扫描整个表内容,会消耗大量时间.由于相当多的时候人们只关心表中的一部分数据, 故建表时引入了分区概念. 2.hive分区表:是指在创建表时指定的part ...

  3. 创建分区表和查看分区表的Metadata

    未分区的表,只能存储在一个FileGroup中:对table进行分区后,每一个分区都存储在一个FileGroup中.表分区是将逻辑上一个完整的表,按照特定的字段拆分成Partition set,分散到 ...

  4. 【转】图解Sql2005创建分区表的全过程

    第一.创建分区表的第一步,先创建数据库文件组,但这一步可以省略,因为你可以直接使用PRIMARY文件.但我个人认为,为了方便管理,还是可以先创建几个文件组,这样可以将不同的小表放在不同的文件组里,既便 ...

  5. SQL Server 2005中的分区表(一):什么是分区表?为什么要用分区表?如何创建分区表?(转)

      如果你的数据库中某一个表中的数据满足以下几个条件,那么你就要考虑创建分区表了. 1.数据库中某个表中的数据很多.很多是什么概念?一万条?两万条?还是十万条.一百万条?这个,我觉得是仁者见仁.智者见 ...

  6. [原创]PostgreSQL Plus Advanced Server批量创建分区表写入亿级别数据实例

    当前情况:大表的数据量已接近2亿条我的解决思路:为它创建n*100个分区表,将各个分区表放在不同的tablespace上这样做的优点:1.首先是对这个级别的数据表的性能会有所提升2.数据管理更科学3. ...

  7. hive创建表带中文注释报错解决方法

    hive创建带有中文注释的表报错: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask ...

  8. hive 创建表和导入数据实例

    //创建数据库create datebase hive;//创建表create table t_emp(id int,name string,age int,dept_name string,like ...

  9. hive新建分区表

    hive新建分区表语句如下: create table table_name (col1_name string comment '备注1', col2_name string comment '备注 ...

随机推荐

  1. Python web 项目的依赖管理工具

    Poetry可以帮助你声明.管理和安装Python项目的依赖项,确保你可以在任何地方都拥有正确的堆栈. Poetry支持Python 2.7 和Python 3以上 安装 Poetry提供了一个自定义 ...

  2. "UICollectionView实现带头视图和组的头视图同时存在"实现

    实现效果如下: 以前做这效果的界面,总是实现的是section的头视图,因为我一直觉得collectionView是不像UITableView那样有tableHeaderView的,所以每次实现只能是 ...

  3. Hibernate5+Spring4整合

    (1) pom.xml <!--Spring Object/Relational Mapping --> <dependency> <groupId>org.spr ...

  4. 关于ckeditor在IE下出现不兼容的问题

    今天在用ckeditor时在ie下测试出现了不兼容问题,样式,字体等属性设置不了. 后来在html标签上方添加了: <!DOCTYPE html PUBLIC "-//W3C//DTD ...

  5. 【HANA系列】SAP HANA中null变成问号的问题

    公众号:SAP Technical 本文作者:matinal 原文出处:http://www.cnblogs.com/SAPmatinal/ 原文链接:[HANA系列]SAP HANA中null变成问 ...

  6. 业务型代码常用的SQL汇总(随时更新)

    做了一年的业务代码开发,记录并分享一下自己平时在项目中遇到的比较好用的sql 1.查询表中是否某一字段下的数据有重复数据(以ID为例) SELECT id FROM 表名GROUP BY ID HAV ...

  7. Linux删除命令rm

    在用Linux的时候,有时分要删除一个文件夹,常常会提示次此文件非空,没法删除,这个时分,必需运用rm -rf命令.关于一些小白用户常常在运用Linux命令,会十分当心,以免搞出一些事情,下面小编将教 ...

  8. 字符串——AC自动机

    目录 一.前言 二.思路 三.代码 四.参考资料 一.前言 以前一直没学AC自动机,主要是被名字吓到了,自动AC,这么强的名字肯定很难,学了后才发现,其实不难. AC自动机并不是Acept autom ...

  9. 【神经网络与深度学习】YOLO windows 配置《Darknet配置》

    作者配置时的环境 visual studio 2013  显卡 GTX 960M  CUDA 7.5 OpenCV 2.4.9  pthreadpthread 下载地址 YOLO官网 [http:// ...

  10. NoSQL--couchdb

    Couchdb CouchDB是Apache组织发布的一款开源的.面向文档类型的NoSQL数据库.由Erlang编写,使用json格式保存数据.CouchDB以RESTful的格式提供服务可以很方便的 ...