pig和mysql脚本对比
测试数据位于:/home/hadoop/luogankun/workspace/sync_data/pig
dept和emp表来源自oracle数据库自带的表
dept.txt
10 ACCOUNTING NEW YORK
20 RESEARCH DALLAS
30 SALES CHICAGO
40 OPERATIONS BOSTON
emp.txt
7369 SMITH CLERK 7902 1980-12-17 800.00 20
7499 ALLEN SALESMAN 7698 1981-2-20 1600.00 300.00 30
7521 WARD SALESMAN 7698 1981-2-22 1250.00 500.00 30
7566 JONES MANAGER 7839 1981-4-2 2975.00 20
7654 MARTIN SALESMAN 7698 1981-9-28 1250.00 1400.00 30
7698 BLAKE MANAGER 7839 1981-5-1 2850.00 30
7782 CLARK MANAGER 7839 1981-6-9 2450.00 10
7788 SCOTT ANALYST 7566 1987-4-19 3000.00 20
7839 KING PRESIDENT 1981-11-17 5000.00 10
7844 TURNER SALESMAN 7698 1981-9-8 1500.00 0.00 30
7876 ADAMS CLERK 7788 1987-5-23 1100.00 20
7900 JAMES CLERK 7698 1981-12-3 950.00 30
7902 FORD ANALYST 7566 1981-12-3 3000.00 20
7934 MILLER CLERK 7782 1982-1-23 1300.00 10
上传数据到HDFS系统中
cd /home/hadoop/luogankun/workspace/sync_data/pig
hadoop fs -put dept.txt input/pig/dept.txt
hadoop fs -put emp.txt input/pig/emp.txt
导入
CREATE TABLE TMP_TABLE(USER VARCHAR(32),AGE INT,IS_MALE BOOLEAN);
dept= LOAD 'input/pig/dept.txt' USING PigStorage('\t') AS (deptno:int,dname:chararray,loc:chararray);
emp = LOAD 'input/pig/emp.txt' USING PigStorage('\t') AS (empno:int,ename:chararray, job:chararray, mgr:int, hiredate:chararray,sal:double,comm:double,dept:int);
查看表结构
desc TMP_TABLE;
describe dept
dept: {deptno: int,dname: chararray,loc: chararray} describe emp
emp: {empno: int,ename: chararray,job: chararray,mgr: int,hiredate: chararray,sal: double,comm: double,dept: int}
查询整张表
SELECT * FROM TMP_TABLE;
DUMP dept
(10,ACCOUNTING,NEW YORK)
(20,RESEARCH,DALLAS)
(30,SALES,CHICAGO)
(40,OPERATIONS,BOSTON) DUMP emp
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10)
查询前N条
SELECT * FROM TMP_TABLE LIMIT 10;
emp_table_limit = LIMIT emp 10;
DUMP emp_table_limit;
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30)
查询表中的某些列
SELECT USER FROM TMP_TABLE;
emp_table_empno_ename = FOREACH emp GENERATE empno, ename; describe emp_table_empno_ename
emp_table_empno_ename: {empno: int,ename: chararray} DUMP emp_table_empno_ename;
(7369,SMITH)
(7499,ALLEN)
(7521,WARD)
(7566,JONES)
(7654,MARTIN)
(7698,BLAKE)
(7782,CLARK)
(7788,SCOTT)
(7839,KING)
(7844,TURNER)
(7876,ADAMS)
(7900,JAMES)
(7902,FORD)
(7934,MILLER)
给列取别名
SELECT USER AS USER_NAME,AGE AS USER_AGE FROM TMP_TABLE;
emp_table_column_alias = FOREACH emp GENERATE empno AS id,ename AS name; describe emp_table_column_alias
emp_table_column_alias: {id: int,name: chararray} DUMP emp_table_column_alias
(7369,SMITH)
(7499,ALLEN)
(7521,WARD)
(7566,JONES)
(7654,MARTIN)
(7698,BLAKE)
(7782,CLARK)
(7788,SCOTT)
(7839,KING)
(7844,TURNER)
(7876,ADAMS)
(7900,JAMES)
(7902,FORD)
(7934,MILLER)
排序
SELECT * FROM TMP_TABLE ORDER BY AGE;
emp_table_order = ORDER emp BY empno ASC; DUMP emp_table_order;
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10)
条件查询
SELECT * FROM TMP_TABLE WHERE AGE>20;
emp_table_where = FILTER emp by sal > 1500; DUMP emp_table_where;
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20)
内连接Inner Join
SELECT * FROM TMP_TABLE A JOIN TMP_TABLE_2 B ON A.AGE=B.AGE;
emp_table_inner_join = JOIN emp BY dept,dept BY deptno; describe emp_table_inner_join
emp_table_inner_join:
{emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_inner_join;
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)
左连接Left Join
SELECT * FROM TMP_TABLE A LEFT JOIN TMP_TABLE_2 B ON A.AGE=B.AGE;
emp_table_left_join = JOIN emp BY dept LEFT OUTER,dept BY deptno; describe emp_table_left_join
emp_table_left_join: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_left_join; (7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)
右连接Right Join
SELECT * FROM TMP_TABLE A RIGHT JOIN TMP_TABLE_2 B ON A.AGE=B.AGE;
emp_table_right_join = JOIN emp BY dept RIGHT OUTER,dept BY deptno; describe emp_table_right_join
emp_table_right_join: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_right_join;
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)
(,,,,,,,,40,OPERATIONS,BOSTON)
全连接Full Join
SELECT * FROM TMP_TABLE A JOIN TMP_TABLE_2 B ON A.AGE=B.AGE
emp_table_full_join = JOIN emp BY dept FULL OUTER,dept BY deptno; describe emp_table_full_join
emp_table_full_join: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_full_join;
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)
(,,,,,,,,40,OPERATIONS,BOSTON)
同时对多张表交叉查询
SELECT * FROM TMP_TABLE,TMP_TABLE_2;
emp_table_cross = CROSS emp,dept; describe emp_table_cross
emp_table_cross: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_cross;
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,30,SALES,CHICAGO)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,40,OPERATIONS,BOSTON)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,10,ACCOUNTING,NEW YORK)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,40,OPERATIONS,BOSTON)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,10,ACCOUNTING,NEW YORK)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,20,RESEARCH,DALLAS)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,40,OPERATIONS,BOSTON)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,10,ACCOUNTING,NEW YORK)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,30,SALES,CHICAGO)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,40,OPERATIONS,BOSTON)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,10,ACCOUNTING,NEW YORK)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,20,RESEARCH,DALLAS)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,40,OPERATIONS,BOSTON)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,10,ACCOUNTING,NEW YORK)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,20,RESEARCH,DALLAS)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,40,OPERATIONS,BOSTON)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,20,RESEARCH,DALLAS)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,30,SALES,CHICAGO)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,40,OPERATIONS,BOSTON)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,10,ACCOUNTING,NEW YORK)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,30,SALES,CHICAGO)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,40,OPERATIONS,BOSTON)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,20,RESEARCH,DALLAS)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,30,SALES,CHICAGO)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,40,OPERATIONS,BOSTON)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,10,ACCOUNTING,NEW YORK)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,20,RESEARCH,DALLAS)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,40,OPERATIONS,BOSTON)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,10,ACCOUNTING,NEW YORK)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,30,SALES,CHICAGO)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,40,OPERATIONS,BOSTON)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,10,ACCOUNTING,NEW YORK)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,20,RESEARCH,DALLAS)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,40,OPERATIONS,BOSTON)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,10,ACCOUNTING,NEW YORK)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,30,SALES,CHICAGO)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,40,OPERATIONS,BOSTON)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,20,RESEARCH,DALLAS)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,30,SALES,CHICAGO)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,40,OPERATIONS,BOSTON)
分组GROUP BY
SELECT * FROM TMP_TABLE GROUP BY IS_MALE;
emp_table_group = GROUP emp BY dept; describe emp_table_group
emp_table_group: {
group: int,
emp: {
(empno: int,ename: chararray,job: chararray,mgr: int,hiredate: chararray,sal: double, comm: double,dept: int)
}
} DUMP emp_table_group;
(10,{
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10),
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10),
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10)})
(20,{
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20),
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20),
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20),
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20),
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)})
(30,{
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30),
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30),
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30),
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30),
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30),
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)})
分组并统计
SELECT IS_MALE,COUNT(*) FROM TMP_TABLE GROUP BY IS_MALE;
emp_table_group_count = GROUP emp BY dept; describe emp_table_group_count
emp_table_group_count: {
group: int,
emp: {
(empno: int,ename: chararray,job: chararray,mgr: int,hiredate: chararray,sal: double,comm: double,dept: int
)
}
}
emp_table_group_count = FOREACH emp_table_group_count GENERATE group,COUNT($1); describe emp_table_group_count DUMP emp_table_group_count;
(10,3)
(20,5)
(30,6) emp_table_group_count = FOREACH emp_table_group_count GENERATE emp.dept,COUNT($1); describe emp_table_group_count
emp_table_group_count: {{(dept: int)},long} DUMP emp_table_group_count;
({(10),(10),(10)},3)
({(20),(20),(20),(20),(20)},5)
({(30),(30),(30),(30),(30),(30)},6)
去重DISTINCT
SELECT DISTINCT IS_MALE FROM TMP_TABLE;
emp_table_distinct = FOREACH emp GENERATE dept; describe emp_table_distinct
emp_table_distinct: {dept: int} emp_table_distinct = DISTINCT emp_table_distinct;
describe emp_table_distinct
emp_table_distinct: {dept: int} DUMP emp_table_distinct;
(10)
(20)
(30)
pig判空
select * from emp where comm is not null;
emp_table_where_null = FILTER emp by comm is not null;
DUMP emp_table_where_null; (7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30)
pig和mysql脚本对比的更多相关文章
- [原] KVM 环境下MySQL性能对比
KVM 环境下MySQL性能对比 标签(空格分隔): Cloud2.0 [TOC] 测试目的 对比MySQL在物理机和KVM环境下性能情况 压测标准 压测遵循单一变量原则,所有的对比都是只改变一个变量 ...
- MongoDB(五)mongo语法和mysql语法对比学习
我们总是在对比中看到自己的优点和缺点,对于mongodb来说也是一样,对比学习让我们尽快的掌握关于mongodb的基础知识. mongodb与MySQL命令对比 关系型数据库一般是由数据库(datab ...
- mongodb与mysql命令对比
mongodb与mysql命令对比 传统的关系数据库一般由数据库(database).表(table).记录(record)三个层次概念组成,MongoDB是由数据库(database).集合(col ...
- cmd执行mssql脚本或者执行mysql脚本
private static int ExecuteMSSql(DbInfo db, string sqlPath) { Console.WriteLine("=============== ...
- 聚集索引、非聚集索引、聚集索引组织表、堆组织表、Mysql/PostgreSQL对比、联合主键/自增长、InnoDB/MyISAM(引擎方面另开一篇)
参考了多篇文章,分别记录,如下. 下面是第一篇的总结 http://www.jb51.net/article/76007.htm: 在MySQL中,InnoDB引擎表是(聚集)索引组织表(cluste ...
- LoadRunner利用ODBC编写MySql脚本
最近做了几周的LoadRunner测试,有一些心得,记录下来,以便以后查找. LoadRunner测试数据库是模拟客户端去连接数据库服务器,因此,需要协议(或者说驱动的支持).LoadRunner本身 ...
- 数据市中心全省中国mysql脚本
1.查尔斯省 watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvd2h6aGFvY2hhbw==/font/5a6L5L2T/fontsize/400/fill ...
- Rehat一键安装mysql脚本和备份数据库脚本
Rehat一键安装mysql脚本 ##说明:适用,Rehat 5 6 7 1.运行状态,运行成功输出mysql临时密码 2.代码如下 #!/bin/bash #获取系统信息 sudo cat /etc ...
- MongoDB批量操作及与MySQL效率对比
本文主要通过批量与非批量对比操作的方式介绍MongoDB的bulkWrite()方法的使用.顺带与关系型数据库MySQL进行对比,比较这两种不同类型数据库的效率.如果只是想学习bulkWrite()的 ...
随机推荐
- [LeetCode&Python] Problem 682. Baseball Game
You're now a baseball game point recorder. Given a list of strings, each string can be one of the 4 ...
- Robot Framework 安装笔记
安装python 自行搜寻安装 安装pip 进入python目录下的Scripts文件夹下,cmd,执行easy_install.exe pip 安装pywin32 pip install pywin ...
- null 与 undefinded
null表示"没有对象",即该处不应该有值.典型用法是: (1) 作为函数的参数,表示该函数的参数不是对象. (2) 作为对象原型链的终点. Object.getPrototype ...
- HDU1702:ACboy needs your help again!
ACboy needs your help again! Time Limit: 1000/1000 MS (Java/Others) Memory Limit: 32768/32768 K ( ...
- Make menuconfig生成文件
1.当我们在执行make menuconfig这个命令时,一共涉及到了以下几个文件: Linux内核根目录下的scripts文件夹 arch/$ARCH/Kconfig文件.各层目录下的Kconfig ...
- hasura-graphql 集成 pipelinedb 1.0.0
pipelinedb 1.0.0 已经是一个标准的pg 扩展了,同时以前的语法也有变动,但是集成进hasura-graphql 更方便了 使用docker-compose 运行 环境准备 docker ...
- C# 使用oledb 方式连接本地或者远程oracel 数据库的方式
对于C# 进行oracle 数据库的开发来说使用oracle 提供的odp.net 方式是比较方便的,同时在性能以及兼容性也是比较好的 但是,对于不打算使用的,那么该如何使用oledb 进行连接 连接 ...
- 理解git
为了真正了解git,我们从底部.底层开始,了解git核心,知其然并知其所以然. 为什么要进行版本控制呢? 因为编写文件不可能一次到位,文件总是有不同的状态需要保存下来,方便以后出错回滚. git 是目 ...
- Jmeter之JDBC
jdbc:mysql://ip:3306/test?useUnicode=true&characterEncoding=utf8&allowMultiQueries=true
- JWT 基础教程
原文地址:JWT 基础教程 博客地址:http://www.extlight.com 一.前言 针对前后端分离的项目,大多是通过 token 进行身份认证来进行交互,今天将介绍一种简单的创建 toke ...