set hive.cli.print.current.db=true;
set hive.mapred.mode=strict;
set hive.mapred.mode=nonstrict;
SHOW PARTITIONS tablename;

--Dynamic Partition Inserts --by position not by names

INSERT OVERWRITE TABLE employees
PARTITION (country, state)
SELECT ..., se.cnty, se.st
FROM staged_employees se;

SET hive.map.aggr=true;

----with this way , we can not generate the temporary table

FROM (
SELECT upper(name), salary, deductions["Federal Taxes"] as fed_taxes,
  round(salary * (1 - deductions["Federal Taxes"])) as salary_minus_fed_taxes
  FROM employees
) e
SELECT e.name, e.salary_minus_fed_taxes
WHERE e.salary_minus_fed_taxes > 70000;

--When Hive Can Avoid MapReduce

set hive.exec.mode.local.auto=true;

--Hive supports the classic SQL JOINstatement, but only equi-joinsare supported.
--Hive also assumes that the lasttable in the query is the  largest
--It attempts to buffer the other tables and then stream the last table through
-- you should structure your join queries so the largest table is last.

SELECT /*+ STREAMTABLE(s) */ s.ymd, s.symbol, s.price_close, d.dividend
FROM stocks s JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol
WHERE s.symbol = 'AAPL';

set hive.auto.convert.join=true;
hive.mapjoin.smalltable.filesize=25000000;--table size less than this can use in map phase
SELECT /*+ MAPJOIN(d) */ s.ymd, s.symbol, s.price_close, d.dividend
FROM stocks s JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol
WHERE s.symbol = 'AAPL';

set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
set hive.optimize.bucketmapjoin=true;
set hive.optimize.bucketmapjoin.sortedmerge=true;

--Using  DISTRIBUTE BY ... SORT BYor the shorthand  CLUSTER BYclauses is a way to exploit
--the parallelism of SORT BY, yet achieve a total ordering across the output files.
--this method is better than use order by (just one reducer);

--Queries that Sample Data

SELECT * from numbers TABLESAMPLE(BUCKET 3 OUT OF 10 ON rand()) s;
SELECT * FROM numbersflat TABLESAMPLE(0.1 PERCENT) s;--block sampling

--index

CREATE INDEX employees_index
ON TABLE employees (country)
AS 'org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler'
WITH DEFERRED REBUILD;

hiveql basic的更多相关文章

  1. Atitit HTTP 认证机制基本验证 (Basic Authentication) 和摘要验证 (Digest Authentication)attilax总结

    Atitit HTTP认证机制基本验证 (Basic Authentication) 和摘要验证 (Digest Authentication)attilax总结 1.1. 最广泛使用的是基本验证 ( ...

  2. Basic Tutorials of Redis(9) -First Edition RedisHelper

    After learning the basic opreation of Redis,we should take some time to summarize the usage. And I w ...

  3. Basic Tutorials of Redis(8) -Transaction

    Data play an important part in our project,how can we ensure correctness of the data and prevent the ...

  4. Basic Tutorials of Redis(7) -Publish and Subscribe

    This post is mainly about the publishment and subscription in Redis.I think you may subscribe some o ...

  5. Basic Tutorials of Redis(6) - List

    Redis's List is different from C#'s List,but similar with C#'s LinkedList.Sometimes I confuse with t ...

  6. Basic Tutorials of Redis(5) - Sorted Set

    The last post is mainly about the unsorted set,in this post I will show you the sorted set playing a ...

  7. Basic Tutorials of Redis(4) -Set

    This post will introduce you to some usages of Set in Redis.The Set is a unordered set,it means that ...

  8. Basic Tutorials of Redis(3) -Hash

    When you first saw the name of Hash,what do you think?HashSet,HashTable or other data structs of C#? ...

  9. Basic Tutorials of Redis(2) - String

    This post is mainly about how to use the commands to handle the Strings of Redis.And I will show you ...

随机推荐

  1. mybatis mapper association collection

    1.Question Description: sometimes, POJO bean contains another bean or collection as property, it's s ...

  2. HTML CSS样式表布局

    一.position:fixed 锁定位置(相对于浏览器的位置),例如有些网站的右下角的弹出窗口. 示例: 二.position:absolute 1.外层没有position:absolute(或r ...

  3. 【JavaEE】Hibernate继承映射,不用多态查询只查父表的方法

    几个月前,我在博问里面发了一个问题:http://q.cnblogs.com/q/64900/,但是一直没有找到好的答案,关闭问题以后才自己解决了,在这里分享一下. 首先我重复一下场景,博问里面举的动 ...

  4. mongodb 基本指令学习 (2)

    db.collectionname.find(<criteria>, <projection>) <criteria>   可选   类型 文档    文档的过滤条 ...

  5. Egret Engine(白鹭引擎)介绍及windows下安装

    Egret Engine简要介绍----- Egret Engine(白鹭引擎)[Egret Engine官网:http://www.egret-labs.org/]是一款使用TypeScript语言 ...

  6. ABAP:区别CALL SCREEN/SET SCREEN/LEAVE TO SCREEN

    1,CALL SCREEN XXXX将在Screen调用栈(CALL STACK)上面添加一层调用(进栈),调用XXXX的PBO和PAI,如果XXXX的Next Screen不为0,那么将继续其Nex ...

  7. 地理数据可视化:Simple,Not Easy

    如果要给2015年的地理信息行业打一个标签,地理大数据一定是其中之一.在信息技术飞速发展的今天,“大数据”作为一种潮流铺天盖地的席卷了各行各业,从央视的春运迁徙图到旅游热点预测,从大数据工程师奇货可居 ...

  8. Mybatis学习记录(四)----resultMap的使用

    resultMap使用方法 如果查询出来的列名和pojo的属性名不一致,通过定义一个resultMap对列名和pojo属性名之间作一个映射关系. 1.定义resultMap 2.使用resultMap ...

  9. sharepoint 中waiting screen dialog的使用方法(JSOM)

    sharepoint中有一个种wait screen的弹出框,其实就是一直转圈,告诉你等待一会儿时间.用法如下: 弹出: var watiDialog = SP.UI.ModalDialog.show ...

  10. SQL学习笔记:选取第N条记录

    Northwind数据库,选取价格第二高的产品. 有两种方法,一个是用Row_Number()函数: SELECT productname FROM ( productname, Row_Number ...