hive创建分区表
#创建分区表
CREATE TABLE if not exists data_center.test_partition (
id int,
name string,
age int
)
PARTITIONED BY (date_id string)
row format delimited fields terminated by ','
stored as textfile
#
LOCATION
'hdfs://master:9000/user/hive/warehouse/data_center.db/test_table';
#添加分区
alter table tmp.soon_tbl_address add if not exists partition(date_id='2017-06-06') location 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-06'
alter table tmp.soon_tbl_address add if not exists partition(date_id='2017-06-07') location 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07'
alter table tmp.soon_tbl_address add if not exists partition(date_id='2017-06-08') location 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-08'
#插入数据操作:
set hive.exec.compress.output=true;
set mapred.output.compress=true;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
set io.compression.codecs=org.apache.hadoop.io.compress.GzipCodec;
SET mapred.output.compression.type=BLOCK;
insert overwrite table seqfile_table select * from textfile_table;
name:pass_address_id,type:string;name:order_id,type:string;name:address_type,type:string;name:receiver,type:string;name:receiver_tel,type:string;name:zip_code,type:string;name:province,type:string;name:city,type:string;name:district,type:string;name:address,type:string;name:x_coordinate,type:string;name:y_coordinate,type:string;name:del_flg,type:string;name:create_time,type:string;name:create_user,type:string;name:update_time,type:string;name:update_user,type:string;name:address_order,type:string;name:midway_order_status,type:string;name:street,type:string;name:order_type,type:string;name:out_order_id,type:string;name:poi_title,type:string
复制表结构
create table tmp.soon_tbl_address like select * from default.soon_tbl_address;
#拷贝数据:从default库soon_tbl_address到tmp库soon_tbl_address
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-05') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 3
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-06') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 6
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-07') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 9
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-08') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 12
pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update,date_id
/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07
hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-02-08/000000_0
#删除分区数据
ALTER TABLE soon_tbl_address DROP PARTITION (date_id='2017-06-07');
#删除表中数据,保留表
insert overwrite table tmp.soon_tbl_address select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address where 1=0
truncate table tmp.soon_tbl_address
#hadoop查看文件
[ds@master ~]$ hadoop fs -ls /user/hive/warehouse/tmp.db/soon_tbl_address
17/06/09 16:02:20 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Found 5 items
drwxr-xr-x - ds supergroup 0 2017-06-09 11:07 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-06
drwxr-xr-x - ds supergroup 0 2017-06-09 11:08 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07
drwxr-xr-x - ds supergroup 0 2017-06-09 14:23 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2
drwxr-xr-x - ds supergroup 0 2017-06-09 14:56 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__b6a456c4_73b6_4fec_8dfa_ddbe38f55856
drwxr-xr-x - ds supergroup 0 2017-06-09 11:08 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-08
#hadoop删除文件及递归文件
[ds@master ~]$ hadoop fs -rmr hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2
rmr: DEPRECATED: Please use 'rm -r' instead.
17/06/09 16:03:24 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
17/06/09 16:03:25 INFO fs.TrashPolicyDefault: Moved: 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2' to trash at: hdfs://master:9000/user/ds/.Trash/Current/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2
[ds@master ~]$
#导出数据到本地
INSERT OVERWRITE LOCAL DIRECTORY '/home/users/my' SELECT * FROM tmp.soon_tbl_address
#linux上本地.txt,导入到表某个分区
load data LOCAL INPATH '/home/users/my/test_table.txt' OVERWRITE into table data_center.test_table PARTITION(date='2017-06-07')
load data LOCAL INPATH '/home/users/my/test_partition.txt' OVERWRITE into table tmp.temp_test_partition PARTITION(date='2017-06-13')
#手动创建表
CREATE TABLE IF NOT EXISTS data_center.test_no_partition(
id int ,
name string ,
age int
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
stored as textfile;
创建空表
CREATE TABLE data_center.test_no_partition LIKE tmp.test_no_partition;
#修改表字段名称、类型
ALTER TABLE test_no_partition CHANGE ip id int;
#删除表及数据
drop table test_no_partition;
#删除表数据,表结构还在
delete from test_no_partition;
insert overwrite table test_no_partition select id,age,name from test_no_partition where id=3;
#给hive库加权限
hdfs dfs -chmod 777 hdfs://master:9000/user/hive/warehouse/tmp.db
#hadoop文件夹重命名
hadoop fs -mv hdfs://master:9000/user/hive/warehouse/ods.db/test_table__cb086c0f_88ee_4623_938c_311a1e717c8a hdfs://master:9000/user/hive/warehouse/ods.db/test_table
CREATE TABLE ods.test_table_tmp(
id int,
name_path string,
parent_path string
)
row format delimited fields terminated by ','
stored as textfile
#路径可以不要
LOCATION
'hdfs://master:9000/user/hive/warehouse/ods.db/test_table_tmp'
hdfs://master:9000/user/hive/warehouse/ods.db/test_table/000000_0__c2175f22_ec6f_4641_a17d_fdc37084713a
#导出到本地文件
#执行导出本地文件命令:
insert overwrite local directory '/home/ds/user/my' select * from ods.test_table;
#查看hdfs文件内容
hdfs dfs -cat hdfs://master:9000/user/hive/warehouse/ods.db/test_table/000000_0__c2175f22_ec6f_4641_a17d_fdc37084713a
CREATE TABLE `tmp.temp_test_partition`(
`id` int,
`name` string,
`age` int,
`date_id` string)
row format delimited fields terminated by ','
stored as textfile
CREATE TABLE `data_center.test_partition`(
`id` int,
`name` string,
`age` int)
PARTITIONED BY ( `date_id` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES ( 'field.delim'=',', 'serialization.format'=',')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'hdfs://master:9000/user/hive/warehouse/data_center.db/test_partition'
TBLPROPERTIES ( 'transient_lastDdlTime'='1497492633')
#手动创建分区表
CREATE TABLE if not exists data_center.test_partition (
id int,
name string,
age int
)
PARTITIONED BY (date string)
row format delimited fields terminated by ','
stored as textfile
#修改分区
ALTER TABLE table_name PARTITION (dt='2008-08-08') SET LOCATION "new location";
ALTER TABLE data_center.test_partition PARTITION (date_id='2008-08-08') RENAME TO PARTITION (dt='20080808');
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=1000;
set mapreduce.reduce.shuffle.input.buffer.percent=0.5;
insert overwrite table data_center.test_partition partition (date_id) select * from data_center.test_partition where name in ('ccc','lisi')
#分区增量更新
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=1000;
set mapreduce.reduce.shuffle.input.buffer.percent=0.5;
insert overwrite table data_center.test_partition partition(date_id)
select tmp.id,tmp.name,tmp.age,tmp.date_id from tmp.temp_test_partition tmp
union all select a.* from data_center.test_partition a
left outer join tmp.temp_test_partition b on a.id=b.id where b.id is null and a.date_id in ('2017-06-14','2017-06-15','2017-06-16')
insert overwrite table data_center.
/home/ds/users/wcy/mysql_to_hdfs_update.txt
CREATE TABLE zjs_cust_endurance_time_copy (
id int,
computer_time string ,
user_city_code string ,
avg_patient_time double,
update_time date
)
row format delimited fields terminated by ','
stored as textfile
#从本地加载txt数据到表中
load data LOCAL INPATH '/home/ds/users/wcy/mysql_to_hdfs_update.txt'
OVERWRITE into table data_center.zjs_cust_endurance_time_copy
hdfs目标表中已有数据:
106 2017-06-21 120001 1062.22 2017-05-27
107 2017-06-21 310001 1387.85 2017-05-27
108 2017-06-21 111111 100.0 2017-05-27
mysql中抽取数据:
106 2017-04-01 120000 2062.22 2017-04-27
107 2017-04-01 310000 2387.85 2017-04-27
执行后hdfs目标表中数据:
108 2017-06-21 111111 100.0 2017-05-27
106 2017-04-01 120000 2062.22 2017-04-27
107 2017-04-01 310000 2387.85 2017-04-27
#表结果中没有id列,后手动添加id
CREATE TABLE zjs_cust_endurance_time_copy (
computer_time string ,
user_city_code string ,
avg_patient_time double,
update_time date,
id int
)
row format delimited fields terminated by ','
stored as textfile
hdfs目标表中已有数据:
2017-06-21 120001 1062.22 2017-05-27 106
2017-06-21 310001 1387.85 2017-05-27 107
2017-06-21 111111 100.0 2017-05-27 108
mysql表中抽数据:
2017-04-01 120000 2062.22 2017-04-27 06:46:30.0 106
2017-04-01 310000 2387.85 2017-04-27 06:46:30.0 107
2017-04-01 320100 2027.64 2017-04-27 06:46:30.0 108
2017-04-01 330100 3763.16 2017-04-27 06:46:30.0 109
执行后hdfs目标表中数据:
2017-04-01 120000 2062.22 2017-04-27 106
2017-04-01 310000 2387.85 2017-04-27 107
2017-04-01 320100 2027.64 2017-04-27 108
2017-04-01 330100 3763.16 2017-04-27 109
#从本地加载txt数据到分区表中
CREATE TABLE data_center.zjs_cust_endurance_time_copy_part (
id int,
computer_time string ,
user_city_code string ,
avg_patient_time double,
update_time date
)
PARTITIONED BY (date string)
row format delimited fields terminated by ','
stored as textfile
load data LOCAL INPATH '/home/ds/users/wcy/mysql_to_hdfs_partupdate.txt'
OVERWRITE into table data_center.zjs_cust_endurance_time_copy_part PARTITION(date='2017-06-21')
hdfs目标表中已有数据:
select * from data_center.zjs_cust_endurance_time_copy_part;
108 2017-06-21 111111 100.0 2017-05-27 2017-06-21
106 2017-04-01 222222 99.0 2017-04-27 2017-06-21
mysql中抽取数据:
106 2017-04-01 120000 2062.22 2017-04-27 06:46:30.0
107 2017-04-01 310000 2387.85 2017-04-27 06:46:30.0
108 2017-04-01 320100 2027.64 2017-04-27 06:46:30.0
insert overwrite table data_center.zjs_cust_endurance_time_copy_part partition(date)
select tmp.id,tmp.computer_time,tmp.user_city_code,tmp.avg_patient_time,tmp.update_time,tmp.date
from tmp.temp_zjs_cust_endurance_time_copy_part tmp
union all select a.* from data_center.zjs_cust_endurance_time_copy_part a
left outer join tmp.temp_zjs_cust_endurance_time_copy_part b
on a.id=b.id where b.id is null
#hdfs中文件数据下载到本地
hadoop fs -get 源数据路径 本地文件夹路径
hadoop fs -get hdfs://SAD-HDP-003:9000/user/hive/warehouse/data_center.db/word_category_weights/000000_0 /home/ds/mydata
hadoop fs -get hdfs://SAD-HDP-003:9000/user/hive/warehouse/data_center.db/zjs_user_result/000000_0 /home/ds/mydata/zjs_user_result
下载文件
sz /home/ds/mydata/000000_0
create table data_center.word_category_weights(
keyword string,
category_id string,
weight_auto double,
category_name string,
del int
)
row format delimited fields terminated by ','
stored as textfile;
load data LOCAL INPATH '/home/users/wcy/000000_0'
OVERWRITE into table data_center.word_category_weights
load data LOCAL INPATH '/home/users/wcy/000000_0_more_data'
OVERWRITE into table data_center.word_category_weights
load data LOCAL INPATH '/home/users/wcy/zjs_user_result' OVERWRITE into table data_center.zjs_user_result
load data LOCAL INPATH '/home/users/wcy/goodsinfo' OVERWRITE into table data_center.goodsinfo
#启动hive元数据服务进程
hive --service metastore &
hive --service hiveserver2 &
UnstructuredStorageReaderUtil - CsvReader使用默认值[{"captureRawRecord":true,"columnCount":0,"comment":"#","currentRecord":-1,"delimiter":",","escapeMode":1,"headerCount":0,"rawRecord":"","recordDelimiter":"\u0000","safetySwitch":true,"skipEmptyRecords":true,"textQualifier":"\"","trimWhitespace":true,"useComments":false,"useTextQualifier":true,"values":[]}],csvReaderConfig值为[null]
2017-06-26 16:39:24.963 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
2017-06-26 16:39:44.629 [job-0] INFO StandAloneJobContainerCommunicator - Total 2560 records, 45917 bytes | Speed 4.48KB/s, 256 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.006s | All Task WaitReaderTime 0.138s | Percentage 0.00%
2017-06-26 16:39:45.393 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
2017-06-26 16:39:53.884 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
2017-06-26 16:39:54.630 [job-0] INFO StandAloneJobContainerCommunicator - Total 4608 records, 81924 bytes | Speed 3.52KB/s, 204 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 11.211s | All Task WaitReaderT
UnstructuredStorageReaderUtil - CsvReader使用默认值[{"captureRawRecord":true,"columnCount":0,"comment":"#","currentRecord":-1,"delimiter":",","escapeMode":1,"headerCount":0,"rawRecord":"","recordDelimiter":"\u0000","safetySwitch":true,"skipEmptyRecords":true,"textQualifier":"\"","trimWhitespace":true,"useComments":false,"useTextQualifier":true,"values":[]}],csvReaderConfig值为[null]
2017-06-27 10:37:49.949 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
hadoop fs -rm hdfs://SAD-HDP-003:9000/user/hive/warehouse/ods.db/soon_tbl_address_part/.hive-staging_hive_2017-06-23_18-23-08_102_5432451293100937443-1
CREATE TABLE goodsinfo(
create_time string,
deal_code bigint ,
store_code bigint,
category_code1 bigint,
category_code2 bigint,
category_code3 bigint,
category_code4 bigint,
goods_code bigint,
term_code int,
term_name string,
store_name string,
goods_name string,
deal_name string,
goods_image string,
goods_pv int,
goods_uv int,
goods_pv_total int,
time_total int,
jump2_pv int,
collect_total int,
add_total int,
pay_total int,
pay_amount decimal(10,2))
row format delimited fields terminated by ','
stored as textfile;
CREATE TABLE zjs_user_result_1(
compute_day string,
member_id string,
mobile string,
uc_user_id String,
real_name string,
register_time string,
order_city_code string,
city_name string,
manage_amount double,
deduction double,
freight double,
actual_paid double,
insurance_amount double,
complete_orders String,
deduction_all double,
freight_all double,
complete_orders_all String,
last_complete_time string,
order_time string,
cancel_order_before String,
cancel_order_after String,
order_nums String,
invite_peoples String,
invite_peoples_all String,
share_orders String,
share_orders_all String)
row format delimited fields terminated by ','
stored as textfile;
hive创建分区表的更多相关文章
- 一起学Hive——创建内部表、外部表、分区表和分桶表及导入数据
Hive本身并不存储数据,而是将数据存储在Hadoop的HDFS中,表名对应HDFS中的目录/文件.根据数据的不同存储方式,将Hive表分为外部表.内部表.分区表和分桶表四种数据模型.每种数据模型各有 ...
- hive创建表
一.为什么要创建分区表 1.select查询中会扫描整个表内容,会消耗大量时间.由于相当多的时候人们只关心表中的一部分数据, 故建表时引入了分区概念. 2.hive分区表:是指在创建表时指定的part ...
- 创建分区表和查看分区表的Metadata
未分区的表,只能存储在一个FileGroup中:对table进行分区后,每一个分区都存储在一个FileGroup中.表分区是将逻辑上一个完整的表,按照特定的字段拆分成Partition set,分散到 ...
- 【转】图解Sql2005创建分区表的全过程
第一.创建分区表的第一步,先创建数据库文件组,但这一步可以省略,因为你可以直接使用PRIMARY文件.但我个人认为,为了方便管理,还是可以先创建几个文件组,这样可以将不同的小表放在不同的文件组里,既便 ...
- SQL Server 2005中的分区表(一):什么是分区表?为什么要用分区表?如何创建分区表?(转)
如果你的数据库中某一个表中的数据满足以下几个条件,那么你就要考虑创建分区表了. 1.数据库中某个表中的数据很多.很多是什么概念?一万条?两万条?还是十万条.一百万条?这个,我觉得是仁者见仁.智者见 ...
- [原创]PostgreSQL Plus Advanced Server批量创建分区表写入亿级别数据实例
当前情况:大表的数据量已接近2亿条我的解决思路:为它创建n*100个分区表,将各个分区表放在不同的tablespace上这样做的优点:1.首先是对这个级别的数据表的性能会有所提升2.数据管理更科学3. ...
- hive创建表带中文注释报错解决方法
hive创建带有中文注释的表报错: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask ...
- hive 创建表和导入数据实例
//创建数据库create datebase hive;//创建表create table t_emp(id int,name string,age int,dept_name string,like ...
- hive新建分区表
hive新建分区表语句如下: create table table_name (col1_name string comment '备注1', col2_name string comment '备注 ...
随机推荐
- windows修复失效图标
taskkill /im explorer.exe /fcd /d %userprofile%\appdata\localdel iconcache.db /astart explorer.exeex ...
- mybatisplus EntityWrapper 常用方法
1. #WHERE (issue_type = ?) AND (status = ? OR status = ? OR status = ?) EntityWrapper wrapper=new En ...
- ubuntu安装dockers和images:dvwa
docker安装 安装前需要更新系统 apt-get update apt-get upgrade apt-get install docker.io 安装完之后就可以试下: docker docke ...
- Android测试之查看package和activity名称的方法
方法一:使用aapt //aapt是sdk自带的一个工具,在sdk\builds-tools\目录下 1.命令行中切换到aapt.exe目录执行:aapt dump badging + 路径 ...
- C#编程 socket编程之udp服务器端和客户端
基于Udp协议是无连接模式通讯,占用资源少,响应速度快,延时低.至于可靠性,可通过应用层的控制来满足.(不可靠连接) 使用Udp协议通讯需要具备以下几个条件: (1).建立一个套接字(Socket) ...
- python爬虫概述
爬虫的使用:爬虫用来对网络的数据信息进行爬取,通过URL的形式,将数据保存在数据库中并以文档形式或者报表形式进行展示. 爬虫可分为通用式爬虫或特定式爬虫,像我们经常用到的搜索引擎就属于通用式爬虫,如果 ...
- Tableau常用图表
条形图: 饼图: 调整大小: 折线图: 面积图: 组合图: 文本表: 突出显示表: 直方图: 气泡图: 散点图:
- 生成一个水平+垂直居中的div
这是前端布局经常用到的布局方式,水平垂直居中:面试也经常会问到. 一. 绝对定位实现居中 注意:使用绝对定位布局的时候,外层元素必须也设置有position属性,具体设置为什么值看具体情况.只要不是s ...
- 牛客小白月赛14 -B (逆元求组合数)
题目链接:https://ac.nowcoder.com/acm/contest/879/B 题意:题目意思就是求ΣC(n,i)pi(MOD+1-p)n-i (k<=i<=n),这里n,i ...
- Java基础(五)
方法概述 方法的定义格式 什么是方法?方法就好像是一个工厂. 如奶牛场 原料:奶牛,饲料 产出物:牛奶,各种奶制品 程序当中的方法 参数(原料):进入方法的数据 返回值(产出物):从方法中出来的数据 ...