测试数据位于:/home/hadoop/luogankun/workspace/sync_data/pig

dept和emp表来源自oracle数据库自带的表

dept.txt

10      ACCOUNTING      NEW YORK
20 RESEARCH DALLAS
30 SALES CHICAGO
40 OPERATIONS BOSTON

emp.txt

7369    SMITH   CLERK   7902    1980-12-17      800.00          20
7499 ALLEN SALESMAN 7698 1981-2-20 1600.00 300.00 30
7521 WARD SALESMAN 7698 1981-2-22 1250.00 500.00 30
7566 JONES MANAGER 7839 1981-4-2 2975.00 20
7654 MARTIN SALESMAN 7698 1981-9-28 1250.00 1400.00 30
7698 BLAKE MANAGER 7839 1981-5-1 2850.00 30
7782 CLARK MANAGER 7839 1981-6-9 2450.00 10
7788 SCOTT ANALYST 7566 1987-4-19 3000.00 20
7839 KING PRESIDENT 1981-11-17 5000.00 10
7844 TURNER SALESMAN 7698 1981-9-8 1500.00 0.00 30
7876 ADAMS CLERK 7788 1987-5-23 1100.00 20
7900 JAMES CLERK 7698 1981-12-3 950.00 30
7902 FORD ANALYST 7566 1981-12-3 3000.00 20
7934 MILLER CLERK 7782 1982-1-23 1300.00 10

上传数据到HDFS系统中

cd /home/hadoop/luogankun/workspace/sync_data/pig
hadoop fs -put dept.txt input/pig/dept.txt
hadoop fs -put emp.txt input/pig/emp.txt

导入

CREATE TABLE TMP_TABLE(USER VARCHAR(32),AGE INT,IS_MALE BOOLEAN);

dept= LOAD 'input/pig/dept.txt' USING PigStorage('\t') AS (deptno:int,dname:chararray,loc:chararray);
emp = LOAD 'input/pig/emp.txt' USING PigStorage('\t') AS (empno:int,ename:chararray, job:chararray, mgr:int, hiredate:chararray,sal:double,comm:double,dept:int);

查看表结构

desc TMP_TABLE;
describe dept
dept: {deptno: int,dname: chararray,loc: chararray} describe emp
emp: {empno: int,ename: chararray,job: chararray,mgr: int,hiredate: chararray,sal: double,comm: double,dept: int}

查询整张表

SELECT * FROM TMP_TABLE;
DUMP dept
(10,ACCOUNTING,NEW YORK)
(20,RESEARCH,DALLAS)
(30,SALES,CHICAGO)
(40,OPERATIONS,BOSTON) DUMP emp
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10)

查询前N条

SELECT * FROM TMP_TABLE LIMIT 10;
emp_table_limit = LIMIT emp 10;
DUMP emp_table_limit;
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30)

查询表中的某些列

SELECT USER FROM TMP_TABLE;
emp_table_empno_ename = FOREACH emp GENERATE empno, ename; describe emp_table_empno_ename
emp_table_empno_ename: {empno: int,ename: chararray} DUMP emp_table_empno_ename;
(7369,SMITH)
(7499,ALLEN)
(7521,WARD)
(7566,JONES)
(7654,MARTIN)
(7698,BLAKE)
(7782,CLARK)
(7788,SCOTT)
(7839,KING)
(7844,TURNER)
(7876,ADAMS)
(7900,JAMES)
(7902,FORD)
(7934,MILLER)

给列取别名

SELECT USER AS USER_NAME,AGE AS USER_AGE FROM TMP_TABLE;
emp_table_column_alias = FOREACH emp GENERATE empno AS id,ename AS name; describe emp_table_column_alias
emp_table_column_alias: {id: int,name: chararray} DUMP emp_table_column_alias
(7369,SMITH)
(7499,ALLEN)
(7521,WARD)
(7566,JONES)
(7654,MARTIN)
(7698,BLAKE)
(7782,CLARK)
(7788,SCOTT)
(7839,KING)
(7844,TURNER)
(7876,ADAMS)
(7900,JAMES)
(7902,FORD)
(7934,MILLER)

排序

SELECT * FROM TMP_TABLE ORDER BY AGE;
emp_table_order = ORDER emp BY empno ASC; DUMP emp_table_order;
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10)

条件查询

SELECT * FROM TMP_TABLE WHERE AGE>20;
emp_table_where = FILTER emp by sal > 1500; DUMP emp_table_where;
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20)

内连接Inner Join

SELECT * FROM TMP_TABLE A JOIN TMP_TABLE_2 B ON A.AGE=B.AGE;
emp_table_inner_join = JOIN emp BY dept,dept BY deptno; describe emp_table_inner_join
emp_table_inner_join:
{emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_inner_join;
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)

左连接Left Join

SELECT * FROM TMP_TABLE A LEFT JOIN TMP_TABLE_2 B ON A.AGE=B.AGE;
emp_table_left_join = JOIN emp BY dept LEFT OUTER,dept BY deptno; describe emp_table_left_join
emp_table_left_join: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_left_join; (7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)

右连接Right Join

SELECT * FROM TMP_TABLE A RIGHT JOIN TMP_TABLE_2 B ON A.AGE=B.AGE;
emp_table_right_join = JOIN emp BY dept RIGHT OUTER,dept BY deptno; describe emp_table_right_join
emp_table_right_join: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_right_join;
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)
(,,,,,,,,40,OPERATIONS,BOSTON)

全连接Full Join

SELECT * FROM TMP_TABLE A  JOIN TMP_TABLE_2 B ON A.AGE=B.AGE
emp_table_full_join = JOIN emp BY dept FULL OUTER,dept BY deptno; describe emp_table_full_join
emp_table_full_join: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_full_join;
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)
(,,,,,,,,40,OPERATIONS,BOSTON)

同时对多张表交叉查询

SELECT * FROM TMP_TABLE,TMP_TABLE_2;
emp_table_cross = CROSS emp,dept; describe emp_table_cross
emp_table_cross: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_cross;
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,30,SALES,CHICAGO)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,40,OPERATIONS,BOSTON)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,10,ACCOUNTING,NEW YORK)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,40,OPERATIONS,BOSTON)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,10,ACCOUNTING,NEW YORK)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,20,RESEARCH,DALLAS)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,40,OPERATIONS,BOSTON)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,10,ACCOUNTING,NEW YORK)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,30,SALES,CHICAGO)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,40,OPERATIONS,BOSTON)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,10,ACCOUNTING,NEW YORK)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,20,RESEARCH,DALLAS)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,40,OPERATIONS,BOSTON)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,10,ACCOUNTING,NEW YORK)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,20,RESEARCH,DALLAS)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,40,OPERATIONS,BOSTON)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,20,RESEARCH,DALLAS)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,30,SALES,CHICAGO)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,40,OPERATIONS,BOSTON)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,10,ACCOUNTING,NEW YORK)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,30,SALES,CHICAGO)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,40,OPERATIONS,BOSTON)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,20,RESEARCH,DALLAS)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,30,SALES,CHICAGO)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,40,OPERATIONS,BOSTON)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,10,ACCOUNTING,NEW YORK)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,20,RESEARCH,DALLAS)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,40,OPERATIONS,BOSTON)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,10,ACCOUNTING,NEW YORK)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,30,SALES,CHICAGO)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,40,OPERATIONS,BOSTON)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,10,ACCOUNTING,NEW YORK)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,20,RESEARCH,DALLAS)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,40,OPERATIONS,BOSTON)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,10,ACCOUNTING,NEW YORK)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,30,SALES,CHICAGO)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,40,OPERATIONS,BOSTON)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,20,RESEARCH,DALLAS)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,30,SALES,CHICAGO)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,40,OPERATIONS,BOSTON)

分组GROUP BY

SELECT * FROM TMP_TABLE GROUP BY IS_MALE;
emp_table_group = GROUP emp BY dept; describe emp_table_group
emp_table_group: {
group: int,
emp: {
(empno: int,ename: chararray,job: chararray,mgr: int,hiredate: chararray,sal: double, comm: double,dept: int)
}
} DUMP emp_table_group;
(10,{
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10),
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10),
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10)})
(20,{
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20),
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20),
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20),
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20),
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)})
(30,{
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30),
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30),
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30),
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30),
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30),
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)})

分组并统计

SELECT IS_MALE,COUNT(*) FROM TMP_TABLE GROUP BY IS_MALE;
emp_table_group_count = GROUP emp BY dept; describe emp_table_group_count
emp_table_group_count: {
group: int,
emp: {
(empno: int,ename: chararray,job: chararray,mgr: int,hiredate: chararray,sal: double,comm: double,dept: int
)
}
}
emp_table_group_count = FOREACH emp_table_group_count GENERATE group,COUNT($1); describe emp_table_group_count DUMP emp_table_group_count;
(10,3)
(20,5)
(30,6) emp_table_group_count = FOREACH emp_table_group_count GENERATE emp.dept,COUNT($1); describe emp_table_group_count
emp_table_group_count: {{(dept: int)},long} DUMP emp_table_group_count;
({(10),(10),(10)},3)
({(20),(20),(20),(20),(20)},5)
({(30),(30),(30),(30),(30),(30)},6)

去重DISTINCT

SELECT DISTINCT IS_MALE FROM TMP_TABLE;
emp_table_distinct = FOREACH emp GENERATE dept; describe emp_table_distinct
emp_table_distinct: {dept: int} emp_table_distinct = DISTINCT emp_table_distinct;
describe emp_table_distinct
emp_table_distinct: {dept: int} DUMP emp_table_distinct;
(10)
(20)
(30)

pig判空

select * from emp where comm is not null;
emp_table_where_null = FILTER emp by comm is not null;
DUMP emp_table_where_null; (7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30)

pig和mysql脚本对比的更多相关文章

  1. [原] KVM 环境下MySQL性能对比

    KVM 环境下MySQL性能对比 标签(空格分隔): Cloud2.0 [TOC] 测试目的 对比MySQL在物理机和KVM环境下性能情况 压测标准 压测遵循单一变量原则,所有的对比都是只改变一个变量 ...

  2. MongoDB(五)mongo语法和mysql语法对比学习

    我们总是在对比中看到自己的优点和缺点,对于mongodb来说也是一样,对比学习让我们尽快的掌握关于mongodb的基础知识. mongodb与MySQL命令对比 关系型数据库一般是由数据库(datab ...

  3. mongodb与mysql命令对比

    mongodb与mysql命令对比 传统的关系数据库一般由数据库(database).表(table).记录(record)三个层次概念组成,MongoDB是由数据库(database).集合(col ...

  4. cmd执行mssql脚本或者执行mysql脚本

    private static int ExecuteMSSql(DbInfo db, string sqlPath) { Console.WriteLine("=============== ...

  5. 聚集索引、非聚集索引、聚集索引组织表、堆组织表、Mysql/PostgreSQL对比、联合主键/自增长、InnoDB/MyISAM(引擎方面另开一篇)

    参考了多篇文章,分别记录,如下. 下面是第一篇的总结 http://www.jb51.net/article/76007.htm: 在MySQL中,InnoDB引擎表是(聚集)索引组织表(cluste ...

  6. LoadRunner利用ODBC编写MySql脚本

    最近做了几周的LoadRunner测试,有一些心得,记录下来,以便以后查找. LoadRunner测试数据库是模拟客户端去连接数据库服务器,因此,需要协议(或者说驱动的支持).LoadRunner本身 ...

  7. 数据市中心全省中国mysql脚本

    1.查尔斯省 watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvd2h6aGFvY2hhbw==/font/5a6L5L2T/fontsize/400/fill ...

  8. Rehat一键安装mysql脚本和备份数据库脚本

    Rehat一键安装mysql脚本 ##说明:适用,Rehat 5 6 7 1.运行状态,运行成功输出mysql临时密码 2.代码如下 #!/bin/bash #获取系统信息 sudo cat /etc ...

  9. MongoDB批量操作及与MySQL效率对比

    本文主要通过批量与非批量对比操作的方式介绍MongoDB的bulkWrite()方法的使用.顺带与关系型数据库MySQL进行对比,比较这两种不同类型数据库的效率.如果只是想学习bulkWrite()的 ...

随机推荐

  1. myeclipse 与 webstrom 免解析node_modules 的方法

    myeclipse :   1.项目文件夹上:右键  properites  - > 搜索 filter  -->resouce filters 2.  webStrom : File - ...

  2. LDA模型应用实践-希拉里邮件主题分类

    #coding=utf8 import numpy as np import pandas as pd import re from gensim import corpora, models, si ...

  3. CSS的使用

    1.行内样式/内联样式 特点:在具体的HTML标签中引入CSS代码 语法:所有的HTML标签都具有一个style属性,属性值就可以使用CSS样式规则 <标签 style="CSS样式规 ...

  4. liunx的磁盘管理的基本命令

    df     查看磁盘占用率 du -sh    查看磁盘多大 sudo fdisk -l   查看硬盘信息 sudo mkfs -t ext3 /dev/sdb1    建立文件系统(相当于格式化) ...

  5. WinForm 实现点击一个按钮,执行另一个按钮事件

    C# 代码如下: private void button1_Click(object sender, EventArgs e) { this.button1.Click += new System.E ...

  6. dockercompose up build fail

    docker https://auth.docker.io/token dial tcp lookup auth.docker.io i/o timeo Error response from dae ...

  7. graphql 数据导入工具

    graphql 是比 比较方便的工具,但是数据导入也是一个比较重要的工具类似prisma 包含一个seed 的功能,类似docker我们使用mysql 数据库的initdb.d,但是那个基本上就 是添 ...

  8. 可靠的推送IM消息

    一.      报文类型: 1.请求报文(request,后简称为为R): 2.应答报文(acknowledge,后简称为A): 3.通知报文(notify,后简称为N). R:客户端主动发送给服务器 ...

  9. Refused to display '[url]' in a frame because it set 'X-Frame-Options' to 'Deny'.

    X-Frame-Options是一个HTTP标头(header),用来告诉浏览器这个网页是否可以放在iFrame内.例如: X-Frame-Options: DENY X-Frame-Options: ...

  10. SpringCloud初体验:六、利用 Sleuth 和 Zipkin 给微服务加上链路监控追踪查看功能

    首先:装上 Zipkin 服务,收集调用链跟踪数据,体验时装在了本机docker上, 方便快捷 docker run -d -p : openzipkin/zipkin 安装后访问地址也是 9411端 ...