set hive.cli.print.current.db=true;
set hive.mapred.mode=strict;
set hive.mapred.mode=nonstrict;
SHOW PARTITIONS tablename;

--Dynamic Partition Inserts --by position not by names

INSERT OVERWRITE TABLE employees
PARTITION (country, state)
SELECT ..., se.cnty, se.st
FROM staged_employees se;

SET hive.map.aggr=true;

----with this way , we can not generate the temporary table

FROM (
SELECT upper(name), salary, deductions["Federal Taxes"] as fed_taxes,
  round(salary * (1 - deductions["Federal Taxes"])) as salary_minus_fed_taxes
  FROM employees
) e
SELECT e.name, e.salary_minus_fed_taxes
WHERE e.salary_minus_fed_taxes > 70000;

--When Hive Can Avoid MapReduce

set hive.exec.mode.local.auto=true;

--Hive supports the classic SQL JOINstatement, but only equi-joinsare supported.
--Hive also assumes that the lasttable in the query is the  largest
--It attempts to buffer the other tables and then stream the last table through
-- you should structure your join queries so the largest table is last.

SELECT /*+ STREAMTABLE(s) */ s.ymd, s.symbol, s.price_close, d.dividend
FROM stocks s JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol
WHERE s.symbol = 'AAPL';

set hive.auto.convert.join=true;
hive.mapjoin.smalltable.filesize=25000000;--table size less than this can use in map phase
SELECT /*+ MAPJOIN(d) */ s.ymd, s.symbol, s.price_close, d.dividend
FROM stocks s JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol
WHERE s.symbol = 'AAPL';

set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
set hive.optimize.bucketmapjoin=true;
set hive.optimize.bucketmapjoin.sortedmerge=true;

--Using  DISTRIBUTE BY ... SORT BYor the shorthand  CLUSTER BYclauses is a way to exploit
--the parallelism of SORT BY, yet achieve a total ordering across the output files.
--this method is better than use order by (just one reducer);

--Queries that Sample Data

SELECT * from numbers TABLESAMPLE(BUCKET 3 OUT OF 10 ON rand()) s;
SELECT * FROM numbersflat TABLESAMPLE(0.1 PERCENT) s;--block sampling

--index

CREATE INDEX employees_index
ON TABLE employees (country)
AS 'org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler'
WITH DEFERRED REBUILD;

hiveql basic的更多相关文章

  1. Atitit HTTP 认证机制基本验证 (Basic Authentication) 和摘要验证 (Digest Authentication)attilax总结

    Atitit HTTP认证机制基本验证 (Basic Authentication) 和摘要验证 (Digest Authentication)attilax总结 1.1. 最广泛使用的是基本验证 ( ...

  2. Basic Tutorials of Redis(9) -First Edition RedisHelper

    After learning the basic opreation of Redis,we should take some time to summarize the usage. And I w ...

  3. Basic Tutorials of Redis(8) -Transaction

    Data play an important part in our project,how can we ensure correctness of the data and prevent the ...

  4. Basic Tutorials of Redis(7) -Publish and Subscribe

    This post is mainly about the publishment and subscription in Redis.I think you may subscribe some o ...

  5. Basic Tutorials of Redis(6) - List

    Redis's List is different from C#'s List,but similar with C#'s LinkedList.Sometimes I confuse with t ...

  6. Basic Tutorials of Redis(5) - Sorted Set

    The last post is mainly about the unsorted set,in this post I will show you the sorted set playing a ...

  7. Basic Tutorials of Redis(4) -Set

    This post will introduce you to some usages of Set in Redis.The Set is a unordered set,it means that ...

  8. Basic Tutorials of Redis(3) -Hash

    When you first saw the name of Hash,what do you think?HashSet,HashTable or other data structs of C#? ...

  9. Basic Tutorials of Redis(2) - String

    This post is mainly about how to use the commands to handle the Strings of Redis.And I will show you ...

随机推荐

  1. 安装jdk For Windows

    1.下载JDK查看最新:http://www.oracle.com/technetwork/java/javase/downloads/index.html根据操作系统选择合适的JDK进行下载2.运行 ...

  2. 用C#开发的双色球走势图(二)

    昨晚由于时间的原因只写了一部分内容,今天将这一部分内容补充完毕,多谢各位园友的支持. 这是用C#开发的双色球走势图(一)新的园友可以看昨晚写的内容,以免脱节.首先回复园友的评论,有说好的有说不好的,本 ...

  3. 分享AceAdminUI后台框架-你喜欢吗?

    距离上次写文章也很久了,这次分享一下自己刚刚看上的一款UI框架(自己买的),国外货,提供下载 第100位评论的我将会送出一个小礼物 礼物链接:http://yanghenglian.taobao.co ...

  4. csharp:Compare two DataTables to rows in one but not the other

    /// <summary> /// 账面数据 Accounting /// </summary> /// <returns></returns> Dat ...

  5. ActiveReports 报表应用教程 (2)---清单类报表

    在大多报表系统中都有清单类报表的身影,比如:客户清单.商品信息清单.设备清单.物品采购清单.记账凭证.货品发货清单.员工清单等等.清单类报表看视乎比较简单,但是,由清单类报表演变而来的报表类型却十分丰 ...

  6. Maven初步搭建 (一)

    什么是maven? 也许很多人开始的时候跟我一样,在看了很多工程之后都不知道这个鸟东西到底是用来干嘛用的!:-D 一个东西之所以会出现是有其原因的,譬如Linus大神写git. Maven项目对象模型 ...

  7. Python可变参数

    #!/usr/bin/env python # -*- coding: utf-8 -*- import math def calc(*numbers): sum=0 for n in numbers ...

  8. 简单理解JavaScript闭包

    很多关于JS的书籍例如<JavaScript权威指南>或者<高程>都把闭包解释的晦涩难懂,萌新们是怎么也看不懂啊!不过别怕,今天我就用很简单的方式给大家讲解下到底什么是闭包.这 ...

  9. SharePoint 使用PowerShell恢复误删的网站集

    在SharePoint网站集的使用中,我们很有可能会误删我们需要的网站集,SharePoint其实并没有把网站集删掉,只是放到了SPDeletedSite中,这样,我们还可以通过PowerShell找 ...

  10. 关于停止AsyncTask和Thread的问题

    在java的线程中,没有办法停止一个正在运行中的线程.在Android的AsyncTask中也是一样的.如果必须要停止一个线程,可以采用这个线程中设置一个标志位,然后在线程run方法或AsyncTas ...