pig和mysql脚本对比
测试数据位于:/home/hadoop/luogankun/workspace/sync_data/pig
dept和emp表来源自oracle数据库自带的表
dept.txt
10 ACCOUNTING NEW YORK
20 RESEARCH DALLAS
30 SALES CHICAGO
40 OPERATIONS BOSTON
emp.txt
7369 SMITH CLERK 7902 1980-12-17 800.00 20
7499 ALLEN SALESMAN 7698 1981-2-20 1600.00 300.00 30
7521 WARD SALESMAN 7698 1981-2-22 1250.00 500.00 30
7566 JONES MANAGER 7839 1981-4-2 2975.00 20
7654 MARTIN SALESMAN 7698 1981-9-28 1250.00 1400.00 30
7698 BLAKE MANAGER 7839 1981-5-1 2850.00 30
7782 CLARK MANAGER 7839 1981-6-9 2450.00 10
7788 SCOTT ANALYST 7566 1987-4-19 3000.00 20
7839 KING PRESIDENT 1981-11-17 5000.00 10
7844 TURNER SALESMAN 7698 1981-9-8 1500.00 0.00 30
7876 ADAMS CLERK 7788 1987-5-23 1100.00 20
7900 JAMES CLERK 7698 1981-12-3 950.00 30
7902 FORD ANALYST 7566 1981-12-3 3000.00 20
7934 MILLER CLERK 7782 1982-1-23 1300.00 10
上传数据到HDFS系统中
cd /home/hadoop/luogankun/workspace/sync_data/pig
hadoop fs -put dept.txt input/pig/dept.txt
hadoop fs -put emp.txt input/pig/emp.txt
导入
CREATE TABLE TMP_TABLE(USER VARCHAR(32),AGE INT,IS_MALE BOOLEAN);
dept= LOAD 'input/pig/dept.txt' USING PigStorage('\t') AS (deptno:int,dname:chararray,loc:chararray);
emp = LOAD 'input/pig/emp.txt' USING PigStorage('\t') AS (empno:int,ename:chararray, job:chararray, mgr:int, hiredate:chararray,sal:double,comm:double,dept:int);
查看表结构
desc TMP_TABLE;
describe dept
dept: {deptno: int,dname: chararray,loc: chararray} describe emp
emp: {empno: int,ename: chararray,job: chararray,mgr: int,hiredate: chararray,sal: double,comm: double,dept: int}
查询整张表
SELECT * FROM TMP_TABLE;
DUMP dept
(10,ACCOUNTING,NEW YORK)
(20,RESEARCH,DALLAS)
(30,SALES,CHICAGO)
(40,OPERATIONS,BOSTON) DUMP emp
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10)
查询前N条
SELECT * FROM TMP_TABLE LIMIT 10;
emp_table_limit = LIMIT emp 10;
DUMP emp_table_limit;
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30)
查询表中的某些列
SELECT USER FROM TMP_TABLE;
emp_table_empno_ename = FOREACH emp GENERATE empno, ename; describe emp_table_empno_ename
emp_table_empno_ename: {empno: int,ename: chararray} DUMP emp_table_empno_ename;
(7369,SMITH)
(7499,ALLEN)
(7521,WARD)
(7566,JONES)
(7654,MARTIN)
(7698,BLAKE)
(7782,CLARK)
(7788,SCOTT)
(7839,KING)
(7844,TURNER)
(7876,ADAMS)
(7900,JAMES)
(7902,FORD)
(7934,MILLER)
给列取别名
SELECT USER AS USER_NAME,AGE AS USER_AGE FROM TMP_TABLE;
emp_table_column_alias = FOREACH emp GENERATE empno AS id,ename AS name; describe emp_table_column_alias
emp_table_column_alias: {id: int,name: chararray} DUMP emp_table_column_alias
(7369,SMITH)
(7499,ALLEN)
(7521,WARD)
(7566,JONES)
(7654,MARTIN)
(7698,BLAKE)
(7782,CLARK)
(7788,SCOTT)
(7839,KING)
(7844,TURNER)
(7876,ADAMS)
(7900,JAMES)
(7902,FORD)
(7934,MILLER)
排序
SELECT * FROM TMP_TABLE ORDER BY AGE;
emp_table_order = ORDER emp BY empno ASC; DUMP emp_table_order;
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10)
条件查询
SELECT * FROM TMP_TABLE WHERE AGE>20;
emp_table_where = FILTER emp by sal > 1500; DUMP emp_table_where;
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20)
内连接Inner Join
SELECT * FROM TMP_TABLE A JOIN TMP_TABLE_2 B ON A.AGE=B.AGE;
emp_table_inner_join = JOIN emp BY dept,dept BY deptno; describe emp_table_inner_join
emp_table_inner_join:
{emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_inner_join;
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)
左连接Left Join
SELECT * FROM TMP_TABLE A LEFT JOIN TMP_TABLE_2 B ON A.AGE=B.AGE;
emp_table_left_join = JOIN emp BY dept LEFT OUTER,dept BY deptno; describe emp_table_left_join
emp_table_left_join: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_left_join; (7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)
右连接Right Join
SELECT * FROM TMP_TABLE A RIGHT JOIN TMP_TABLE_2 B ON A.AGE=B.AGE;
emp_table_right_join = JOIN emp BY dept RIGHT OUTER,dept BY deptno; describe emp_table_right_join
emp_table_right_join: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_right_join;
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)
(,,,,,,,,40,OPERATIONS,BOSTON)
全连接Full Join
SELECT * FROM TMP_TABLE A JOIN TMP_TABLE_2 B ON A.AGE=B.AGE
emp_table_full_join = JOIN emp BY dept FULL OUTER,dept BY deptno; describe emp_table_full_join
emp_table_full_join: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_full_join;
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)
(,,,,,,,,40,OPERATIONS,BOSTON)
同时对多张表交叉查询
SELECT * FROM TMP_TABLE,TMP_TABLE_2;
emp_table_cross = CROSS emp,dept; describe emp_table_cross
emp_table_cross: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_cross;
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,30,SALES,CHICAGO)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,40,OPERATIONS,BOSTON)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,10,ACCOUNTING,NEW YORK)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,40,OPERATIONS,BOSTON)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,10,ACCOUNTING,NEW YORK)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,20,RESEARCH,DALLAS)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,40,OPERATIONS,BOSTON)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,10,ACCOUNTING,NEW YORK)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,30,SALES,CHICAGO)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,40,OPERATIONS,BOSTON)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,10,ACCOUNTING,NEW YORK)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,20,RESEARCH,DALLAS)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,40,OPERATIONS,BOSTON)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,10,ACCOUNTING,NEW YORK)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,20,RESEARCH,DALLAS)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,40,OPERATIONS,BOSTON)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,20,RESEARCH,DALLAS)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,30,SALES,CHICAGO)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,40,OPERATIONS,BOSTON)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,10,ACCOUNTING,NEW YORK)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,30,SALES,CHICAGO)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,40,OPERATIONS,BOSTON)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,20,RESEARCH,DALLAS)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,30,SALES,CHICAGO)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,40,OPERATIONS,BOSTON)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,10,ACCOUNTING,NEW YORK)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,20,RESEARCH,DALLAS)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,40,OPERATIONS,BOSTON)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,10,ACCOUNTING,NEW YORK)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,30,SALES,CHICAGO)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,40,OPERATIONS,BOSTON)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,10,ACCOUNTING,NEW YORK)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,20,RESEARCH,DALLAS)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,40,OPERATIONS,BOSTON)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,10,ACCOUNTING,NEW YORK)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,30,SALES,CHICAGO)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,40,OPERATIONS,BOSTON)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,20,RESEARCH,DALLAS)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,30,SALES,CHICAGO)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,40,OPERATIONS,BOSTON)
分组GROUP BY
SELECT * FROM TMP_TABLE GROUP BY IS_MALE;
emp_table_group = GROUP emp BY dept; describe emp_table_group
emp_table_group: {
group: int,
emp: {
(empno: int,ename: chararray,job: chararray,mgr: int,hiredate: chararray,sal: double, comm: double,dept: int)
}
} DUMP emp_table_group;
(10,{
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10),
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10),
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10)})
(20,{
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20),
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20),
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20),
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20),
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)})
(30,{
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30),
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30),
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30),
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30),
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30),
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)})
分组并统计
SELECT IS_MALE,COUNT(*) FROM TMP_TABLE GROUP BY IS_MALE;
emp_table_group_count = GROUP emp BY dept; describe emp_table_group_count
emp_table_group_count: {
group: int,
emp: {
(empno: int,ename: chararray,job: chararray,mgr: int,hiredate: chararray,sal: double,comm: double,dept: int
)
}
}
emp_table_group_count = FOREACH emp_table_group_count GENERATE group,COUNT($1); describe emp_table_group_count DUMP emp_table_group_count;
(10,3)
(20,5)
(30,6) emp_table_group_count = FOREACH emp_table_group_count GENERATE emp.dept,COUNT($1); describe emp_table_group_count
emp_table_group_count: {{(dept: int)},long} DUMP emp_table_group_count;
({(10),(10),(10)},3)
({(20),(20),(20),(20),(20)},5)
({(30),(30),(30),(30),(30),(30)},6)
去重DISTINCT
SELECT DISTINCT IS_MALE FROM TMP_TABLE;
emp_table_distinct = FOREACH emp GENERATE dept; describe emp_table_distinct
emp_table_distinct: {dept: int} emp_table_distinct = DISTINCT emp_table_distinct;
describe emp_table_distinct
emp_table_distinct: {dept: int} DUMP emp_table_distinct;
(10)
(20)
(30)
pig判空
select * from emp where comm is not null;
emp_table_where_null = FILTER emp by comm is not null;
DUMP emp_table_where_null; (7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30)
pig和mysql脚本对比的更多相关文章
- [原] KVM 环境下MySQL性能对比
KVM 环境下MySQL性能对比 标签(空格分隔): Cloud2.0 [TOC] 测试目的 对比MySQL在物理机和KVM环境下性能情况 压测标准 压测遵循单一变量原则,所有的对比都是只改变一个变量 ...
- MongoDB(五)mongo语法和mysql语法对比学习
我们总是在对比中看到自己的优点和缺点,对于mongodb来说也是一样,对比学习让我们尽快的掌握关于mongodb的基础知识. mongodb与MySQL命令对比 关系型数据库一般是由数据库(datab ...
- mongodb与mysql命令对比
mongodb与mysql命令对比 传统的关系数据库一般由数据库(database).表(table).记录(record)三个层次概念组成,MongoDB是由数据库(database).集合(col ...
- cmd执行mssql脚本或者执行mysql脚本
private static int ExecuteMSSql(DbInfo db, string sqlPath) { Console.WriteLine("=============== ...
- 聚集索引、非聚集索引、聚集索引组织表、堆组织表、Mysql/PostgreSQL对比、联合主键/自增长、InnoDB/MyISAM(引擎方面另开一篇)
参考了多篇文章,分别记录,如下. 下面是第一篇的总结 http://www.jb51.net/article/76007.htm: 在MySQL中,InnoDB引擎表是(聚集)索引组织表(cluste ...
- LoadRunner利用ODBC编写MySql脚本
最近做了几周的LoadRunner测试,有一些心得,记录下来,以便以后查找. LoadRunner测试数据库是模拟客户端去连接数据库服务器,因此,需要协议(或者说驱动的支持).LoadRunner本身 ...
- 数据市中心全省中国mysql脚本
1.查尔斯省 watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvd2h6aGFvY2hhbw==/font/5a6L5L2T/fontsize/400/fill ...
- Rehat一键安装mysql脚本和备份数据库脚本
Rehat一键安装mysql脚本 ##说明:适用,Rehat 5 6 7 1.运行状态,运行成功输出mysql临时密码 2.代码如下 #!/bin/bash #获取系统信息 sudo cat /etc ...
- MongoDB批量操作及与MySQL效率对比
本文主要通过批量与非批量对比操作的方式介绍MongoDB的bulkWrite()方法的使用.顺带与关系型数据库MySQL进行对比,比较这两种不同类型数据库的效率.如果只是想学习bulkWrite()的 ...
随机推荐
- tunning-prime优化mysql建议
#!/bin/sh # set tabstop=8 ################################################################## ...
- HAWQ + MADlib 玩转数据挖掘之(六)——主成分分析与主成分投影
一.主成分分析(Principal Component Analysis,PCA)简介 在数据挖掘中经常会遇到多个变量的问题,而且在多数情况下,多个变量之间常常存在一定的相关性.例如,网站的" ...
- 互评Beta版本——王者荣耀交流协会的PSP DAILY作品
基于NABCD评论,及改进建议 1)N(Need 需求) 开发本软件有利于我们记录PSP,省时省力.方便快捷.这样我们能充份的利用时间. 2)A(Approach 做法) 王者荣耀交流协会进行了网上调 ...
- liunx系统和其它的基本命令
1.su 更换用户 2.sudo 管理员权限 3.PATH 4.sudo shutdown -h now 现在关机 sudo shutdown -r now 现在重启 5.kill ...
- Unity打安卓包 Android 所有错误解决方案大全(几乎囊括所有打包错误 )
Unity打包出错解决方案 本文提供全流程,中文翻译. Chinar 坚持将简单的生活方式,带给世人!(拥有更好的阅读体验 -- 高分辨率用户请根据需求调整网页缩放比例) Chinar -- 心分享. ...
- HDU 1814 模板题 2-sat
敲模板做的,不知道怎么就对了,注意一下建边即可··· 贴代码: #include<cstdio> #include<vector> using namespace std; # ...
- 解决安装vmware-tools出现的“The path "" is not a valid path to the 3.2.0-4-amd64 kernel headers”问题
在用虚拟机安装使用64位Crunchbang(一种Debian GNU/Linux 的linux)的过程中出现很多小问题.其中vmware-tools安装就是第一个问题. 在使用终端安装vmware- ...
- 2012年东京区域赛 UVAlive6182~6191
暑假训练场 A(UVAL6182). 凯神看了敲掉的题目,还没有看过 #include <iostream> #include <memory.h> using namespa ...
- log4net保存到数据库系列五、新增数据库字段
园子里面有很多关于log4net保存到数据库的帖子,但是要动手操作还是比较不易,从头开始学习log4net数据库日志一.WebConfig中配置log4net 一.WebConfig中配置log4ne ...
- 【转】每天一个linux命令(61):wget命令
原文网址:http://www.cnblogs.com/peida/archive/2013/03/18/2965369.html Linux系统中的wget是一个下载文件的工具,它用在命令行下.对于 ...