Nginx日志导入到Hive0.13.1,同步Hbase0.96.2,设置RowKey为autoincrement(ID自增长)
|
1
2
3
4
5
|
log_format main '$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for" ' '"$gzip_ratio" $request_time $bytes_sent $request_length ' '"$upstream_addr" $upstream_status $upstream_response_time'; |
|
1
2
3
4
5
6
7
8
9
10
11
12
13
|
8.8.8.8 - - [22/Aug/2014:20:23:45 +0800] "GET / HTTP/1.1" 200 2373 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "2.78" 0.004 2683 369 "unix:/var/run/php5-fpm.sock" 200 0.0048.8.8.8 - - [22/Aug/2014:20:23:45 +0800] "GET 8.8.8.8/b519d8ca/css/base.css HTTP/1.1" 200 940 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.000 1247 373 "-" - -8.8.8.8 - - [22/Aug/2014:20:23:45 +0800] "GET 8.8.8.8/a3e2e507/jquery.min.js HTTP/1.1" 200 93636 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.152 93976 359 "-" - -8.8.8.8 - - [22/Aug/2014:20:23:45 +0800] "GET 8.8.8.8/b519d8ca/image/logo.png HTTP/1.1" 200 6059 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.000 6369 377 "-" - -8.8.8.8 - - [22/Aug/2014:20:23:45 +0800] "GET 8.8.8.8/b519d8ca/image/p02.jpg HTTP/1.1" 200 22177 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.000 22489 376 "-" - -8.8.8.8 - - [22/Aug/2014:20:23:45 +0800] "GET 8.8.8.8/b519d8ca/image/p03.png HTTP/1.1" 200 3012 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.000 3321 376 "-" - -8.8.8.8 - - [22/Aug/2014:20:23:45 +0800] "GET 8.8.8.8/b519d8ca/image/two-dimension-code1.png HTTP/1.1" 200 761 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.000 1069 392 "-" - -8.8.8.8 - - [22/Aug/2014:20:23:45 +0800] "GET 8.8.8.8/b519d8ca/image/bg.png HTTP/1.1" 200 11474 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.000 11785 375 "-" - -8.8.8.8 - - [22/Aug/2014:20:23:45 +0800] "GET 8.8.8.8/b519d8ca/image/p04.png HTTP/1.1" 200 2860 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.000 3169 376 "-" - -8.8.8.8 - - [22/Aug/2014:20:23:45 +0800] "GET 8.8.8.8/b519d8ca/image/p06.png HTTP/1.1" 200 74097 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.062 74409 376 "-" - -8.8.8.8 - - [22/Aug/2014:20:23:45 +0800] "GET 8.8.8.8/b519d8ca/image/p05.png HTTP/1.1" 200 132072 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.256 132385 376 "-" - -8.8.8.8 - - [22/Aug/2014:20:23:46 +0800] "GET 8.8.8.8/b519d8ca/image/p07.png HTTP/1.1" 200 207987 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.592 208300 376 "-" - -8.8.8.8 - - [22/Aug/2014:20:23:46 +0800] "GET 8.8.8.8/b519d8ca/image/p01.png HTTP/1.1" 200 310418 "http://xxx.com/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36" "-" "-" 0.918 310731 376 "-" - - |
|
1
2
3
4
5
|
root@m1:/home/hadoop# /home/hadoop/hadoop-2.2.0/bin/hadoop fs -mkdir /user/hive/warehouse/nginxlogroot@m1:/home/hadoop# /home/hadoop/hadoop-2.2.0/bin/hadoop fs -ls /user/hive/warehouseFound 1 itemsdrwxr-xr-x - root supergroup 0 2014-01-22 23:13 /user/hive/warehouse/nginxlogroot@m1:/home/hadoop# |
#在Eclipse中创建Maven项目,然后使用Maven将项目打包成Jar文件,过程中需要引入hadoop-common-2.2.0.jar、hive-exec-0.13.1.jar这两个文件,可以在Hadoop2.2.0目录以及Hive0.13.1目录中找到。如果不会使用Maven将项目打包,可以参考这篇文章《Golang、Php、Python、Java基于Thrift0.9.1实现跨语言调用》中实现Java客户端部分,有如何打包的方法。
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
|
package idoall.org.hive;import org.apache.hadoop.hive.ql.exec.Description;import org.apache.hadoop.hive.ql.exec.UDF;import org.apache.hadoop.hive.ql.udf.UDFType;import org.apache.hadoop.io.LongWritable;/** * UDFRowSequence. */@Description(name = "row_sequence", value = "_FUNC_() - Returns a generated row sequence number starting from 1")@UDFType(deterministic = false, stateful = true)public class UDFRowSequence extends UDF{ private LongWritable result = new LongWritable(); public UDFRowSequence() { result.set(0); } public LongWritable evaluate() { result.set(result.get() + 1); return result; }} |
|
1
2
3
4
|
hive> ADD JAR /home/hadoop/hive-0.13.1/lib/idoall.org-0.0.1-SNAPSHOT-jar-with-dependencies.jar;Added /home/hadoop/hive-0.13.1/lib/idoall.org-0.0.1-SNAPSHOT-jar-with-dependencies.jar to class pathAdded resource: /home/hadoop/hive-0.13.1/lib/idoall.org-0.0.1-SNAPSHOT-jar-with-dependencies.jarhive> |
|
1
2
3
|
hive> CREATE TEMPORARY FUNCTION rowSequence AS 'idoall.org.hive.UDFRowSequence';OKTime taken: 0.048 seconds |
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
create external table nginx_accesslog(host string,hostuser string,times string,requestmethond string,requesturl string,requesthttp string,status string, body_bytes_sent string,referer string,useragent string,http_x_forwarded_for string,gzip_ratio string,request_time string,bytes_sent string,request_length string, upstream_addr string, upstream_status string,upstream_response_time string) PARTITIONED BY(YEAR STRING, MONTH STRING, DAY STRING)row format SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe'WITH SERDEPROPERTIES ("input.regex" = "([^ ]*)\\s+-\\s+(.+?|-)\\s+\\[(.*)\\]\\s+\"([^ ]*)\\s+([^ ]*)\\s+([^ |\"]*)\"\\s+(-|[0-9]*)\\s+(-|[0-9]*)\\s+\"(.+?|-)\"\\s+\"(.+?|-)\"\\s+\"(.+?|-)\"\\s+\"(.+?|-)\"\\s+(.+?|-)\\s+(.+?|-)\\s+(.+?|-)\\s+\"(.+?|-)\"\\s+(.+?|-)\\s+(.*)","output.format.string" = "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s %10$s %11$s %12$s %13$s %14$s %15$s %16$s %17$s %18$s") STORED AS TEXTFILE location '/user/hive/warehouse/nginxlog'; |
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
hive> LOAD DATA LOCAL INPATH '/home/hadoop/hive-0.13.1/a.com.access.20140821.log' OVERWRITE INTO TABLE nginx_accesslog partition (YEAR='2014', MONTH='08',DAY='21');Copying data from file:/home/hadoop/hive-0.13.1/a.com.access.20140821.logCopying file: file:/home/hadoop/hive-0.13.1/a.com.access.20140821.logLoading data to table default.nginx_accesslog partition (year=2014, month=08, day=21)Partition default.nginx_accesslog{year=2014, month=08, day=21} stats: [numFiles=1, numRows=0, totalSize=3483, rawDataSize=0]OKTime taken: 1.046 secondshive> select count(0) from nginx_accesslog;Total jobs = 1Launching Job 1 out of 1Number of reduce tasks determined at compile time: 1In order to change the average load for a reducer (in bytes): set hive.exec.reducers.bytes.per.reducer=<number>In order to limit the maximum number of reducers: set hive.exec.reducers.max=<number>In order to set a constant number of reducers: set mapreduce.job.reduces=<number>Starting Job = job_1408550631561_0005, Tracking URL = http://m1:8088/proxy/application_1408550631561_0005/Kill Command = /home/hadoop/hadoop-2.2.0/bin/hadoop job -kill job_1408550631561_0005Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 12014-08-22 23:19:55,322 Stage-1 map = 0%, reduce = 0%2014-08-22 23:20:01,669 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 0.74 sec2014-08-22 23:20:08,926 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 1.59 secMapReduce Total cumulative CPU time: 1 seconds 590 msecEnded Job = job_1408550631561_0005MapReduce Jobs Launched: Job 0: Map: 1 Reduce: 1 Cumulative CPU: 1.59 sec HDFS Read: 3734 HDFS Write: 3 SUCCESSTotal MapReduce CPU Time Spent: 1 seconds 590 msecOK13Time taken: 24.762 seconds, Fetched: 1 row(s)hive> |
|
1
2
3
4
5
|
root@m1:/home/hadoop# /home/hadoop/hadoop-2.2.0/bin/hadoop fs -copyFromLocal /home/hadoop/hive-0.13.1/a.com.access.20140821.log /user/hive/warehouse/nginxlog/root@m1:/home/hadoop# /home/hadoop/hadoop-2.2.0/bin/hadoop fs -ls /user/hive/warehouse/nginxlogFound 1 items-rw-r--r-- 3 root supergroup 3483 2014-08-22 23:18 /user/hive/warehouse/nginxlog/a.com.access.20140821.logroot@m1:/home/hadoop# |
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
#先删除表hive> drop table nginx_accesslog;OKTime taken: 0.363 secondshive> #再创建表....此处省略,参考上文命令重新创建一次#从HDFS导入数据(如果文件存在,要先删除),从下图可以看到,数据导入成功hive> LOAD DATA inpath '/user/hive/warehouse/nginxlog/a.com.access.20140821.log' overwrite INTO TABLE nginx_accesslog partition (YEAR='2014', MONTH='08',DAY='21');Loading data to table default.nginx_accesslog partition (year=2014, month=08, day=21)Partition default.nginx_accesslog{year=2014, month=08, day=21} stats: [numFiles=1, numRows=0, totalSize=3483, rawDataSize=0]OKTime taken: 0.373 secondshive> select * from nginx_accesslog limit 100;OK8.8.8.8 - 22/Aug/2014:20:23:45 +0800 GET / HTTP/1.1 200 2373 - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36 - 2.78 0.004 2683 369 unix:/var/run/php5-fpm.sock 200 0.004 2014 08 218.8.8.8 - 22/Aug/2014:20:23:45 +0800 GET 8.8.8.8/b519d8ca/css/base.css HTTP/1.1 200 940 http://xxx.com/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36 - - 0.000 1247 373 - - - 2014 08 218.8.8.8 - 22/Aug/2014:20:23:45 +0800 GET 8.8.8.8/a3e2e507/jquery.min.js HTTP/1.1 200 93636 http://xxx.com/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36 - - 0.152 93976 359 - - - 2014 08 218.8.8.8 - 22/Aug/2014:20:23:45 +0800 GET 8.8.8.8/b519d8ca/image/logo.png HTTP/1.1 200 6059 http://xxx.com/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36 - - 0.000 6369 377 - - - 2014 08 218.8.8.8 - 22/Aug/2014:20:23:45 +0800 GET 8.8.8.8/b519d8ca/image/p02.jpg HTTP/1.1 200 22177 http://xxx.com/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36 - - 0.000 22489 376 - - - 2014 08 218.8.8.8 - 22/Aug/2014:20:23:45 +0800 GET 8.8.8.8/b519d8ca/image/p03.png HTTP/1.1 200 3012 http://xxx.com/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36 - - 0.000 3321 376 - - - 2014 08 218.8.8.8 - 22/Aug/2014:20:23:45 +0800 GET 8.8.8.8/b519d8ca/image/two-dimension-code1.png HTTP/1.1 200 761 http://xxx.com/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36 - - 0.000 1069 392 - - - 2014 08 218.8.8.8 - 22/Aug/2014:20:23:45 +0800 GET 8.8.8.8/b519d8ca/image/bg.png HTTP/1.1 200 11474 http://xxx.com/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36 - - 0.000 11785 375 - - - 2014 08 218.8.8.8 - 22/Aug/2014:20:23:45 +0800 GET 8.8.8.8/b519d8ca/image/p04.png HTTP/1.1 200 2860 http://xxx.com/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36 - - 0.000 3169 376 - - - 2014 08 218.8.8.8 - 22/Aug/2014:20:23:45 +0800 GET 8.8.8.8/b519d8ca/image/p06.png HTTP/1.1 200 74097 http://xxx.com/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36 - - 0.062 74409 376 - - - 2014 08 218.8.8.8 - 22/Aug/2014:20:23:45 +0800 GET 8.8.8.8/b519d8ca/image/p05.png HTTP/1.1 200 132072 http://xxx.com/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36 - - 0.256 132385 376 - - - 2014 08 218.8.8.8 - 22/Aug/2014:20:23:46 +0800 GET 8.8.8.8/b519d8ca/image/p07.png HTTP/1.1 200 207987 http://xxx.com/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36 - - 0.592 208300 376 - - - 2014 08 218.8.8.8 - 22/Aug/2014:20:23:46 +0800 GET 8.8.8.8/b519d8ca/image/p01.png HTTP/1.1 200 310418 http://xxx.com/ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36 - - 0.918 310731 376 - - - 2014 08 21Time taken: 0.056 seconds, Fetched: 13 row(s)hive> |
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
CREATE TABLE h2b_nginx_accesslog(key int,host string,hostuser string,times string,requestmethond string,requesturl string,requesthttp string,status string, body_bytes_sent string,referer string,useragent string,http_x_forwarded_for string,gzip_ratio string,request_time string,bytes_sent string,request_length string, upstream_addr string, upstream_status string,upstream_response_time string) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,log:host,log:hostuser,log:times,log:requestmethond,log:requesturl,log:requesthttp,log:status,log:body_bytes_sent,log:referer,log:useragent,log:http_x_forwarded_for,log:gzip_ratio,log:request_time,log:bytes_sent,log:request_length,log:upstream_addr,log:upstream_status,log:upstream_response_time") TBLPROPERTIES ("hbase.table.name" = "h2b_nginx_accesslog"); |
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
|
hbase(main):002:0> listTABLE h2b_nginx_accesslog 1 row(s) in 0.1220 seconds=> ["h2b_nginx_accesslog"]/* 查看表结构时,只会显示列族,而不会显示列。Hbase表中的每个列,都归属与某个列族。列族是表的chema的一部分(而列不是)。*/hbase(main):003:0> describe "h2b_nginx_accesslog"DESCRIPTION ENABLED 'h2b_nginx_accesslog', {NAME => 'log', DATA_BLOCK_ENCODING => 'NONE', BLOOMFILTER => 'ROW', REPLICATION_SCOPE => '0', VERSIONS => '1', CO true MPRESSION => 'NONE', MIN_VERSIONS => '0', TTL => '2147483647', KEEP_DELETED_CELLS => 'false', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'} 1 row(s) in 0.5890 secondshbase(main):004:0> |
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
|
insert overwrite table h2b_nginx_accesslog select a.* from (selectrowSequence(),host,hostuser,times,requestmethond,requesturl,requesthttp,status,body_bytes_sent,referer,useragent,http_x_forwarded_for,gzip_ratio,request_time,bytes_sent,request_length,upstream_addr,upstream_status,upstream_response_time from nginx_accesslog) a; |
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
hive> insert overwrite table h2b_nginx_accesslog > select a.* > from (select > rowSequence(), > host, > hostuser, > times, > requestmethond, > requesturl, > requesthttp, > status, > body_bytes_sent, > referer, > useragent, > http_x_forwarded_for, > gzip_ratio, > request_time, > bytes_sent, > request_length, > upstream_addr, > upstream_status, > upstream_response_time > from nginx_accesslog) a;Total jobs = 1Launching Job 1 out of 1Number of reduce tasks is set to 0 since there's no reduce operatorStarting Job = job_1408550631561_0017, Tracking URL = http://m1:8088/proxy/application_1408550631561_0017/Kill Command = /home/hadoop/hadoop-2.2.0/bin/hadoop job -kill job_1408550631561_0017Hadoop job information for Stage-0: number of mappers: 1; number of reducers: 02014-08-24 11:57:24,051 Stage-0 map = 0%, reduce = 0%2014-08-24 11:57:32,403 Stage-0 map = 100%, reduce = 0%, Cumulative CPU 1.96 secMapReduce Total cumulative CPU time: 1 seconds 960 msecEnded Job = job_1408550631561_0017MapReduce Jobs Launched: Job 0: Map: 1 Cumulative CPU: 1.96 sec HDFS Read: 3734 HDFS Write: 0 SUCCESSTotal MapReduce CPU Time Spent: 1 seconds 960 msecOKTime taken: 20.378 secondshive> |
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
hbase(main):013:0> get "h2b_nginx_accesslog",1COLUMN CELL log:body_bytes_sent timestamp=1408852652522, value=2373 log:bytes_sent timestamp=1408852652522, value=2683 log:gzip_ratio timestamp=1408852652522, value=2.78 log:host timestamp=1408852652522, value=8.8.8.8 log:hostuser timestamp=1408852652522, value=- log:http_x_forwarded_for timestamp=1408852652522, value=- log:referer timestamp=1408852652522, value=- log:request_length timestamp=1408852652522, value=369 log:request_time timestamp=1408852652522, value=0.004 log:requesthttp timestamp=1408852652522, value=HTTP/1.1 log:requestmethond timestamp=1408852652522, value=GET log:requesturl timestamp=1408852652522, value=/ log:status timestamp=1408852652522, value=200 log:times timestamp=1408852652522, value=22/Aug/2014:20:23:45 +0800 log:upstream_addr timestamp=1408852652522, value=unix:/var/run/php5-fpm.sock log:upstream_response_time timestamp=1408852652522, value=0.004 log:upstream_status timestamp=1408852652522, value=200 log:useragent timestamp=1408852652522, value=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36 18 row(s) in 0.0440 secondshbase(main):015:0> get "h2b_nginx_accesslog",1,{COLUMN => 'log:useragent'} COLUMN CELL log:useragent timestamp=1408852652522, value=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Safari/537.36 1 row(s) in 0.0080 seconds |
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
hbase(main):031:0> import org.apache.hadoop.hbase.filter.CompareFilterhbase(main):032:0> import org.apache.hadoop.hbase.filter.SingleColumnValueFilterhbase(main):033:0> import org.apache.hadoop.hbase.filter.SubstringComparatorhbase(main):034:0> import org.apache.hadoop.hbase.util.Byteshbase(main):035:0> scan "h2b_nginx_accesslog",{FILTER => SingleColumnValueFilter.new(Bytes.toBytes('log'),Bytes.toBytes('requesturl'),CompareFilter::CompareOp.valueOf('EQUAL'),SubstringComparator.new('p04.png'))}ROW COLUMN+CELL 9 column=log:body_bytes_sent, timestamp=1408852652522, value=2860 9 column=log:bytes_sent, timestamp=1408852652522, value=3169 9 column=log:gzip_ratio, timestamp=1408852652522, value=- 9 column=log:host, timestamp=1408852652522, value=8.8.8.8 9 column=log:hostuser, timestamp=1408852652522, value=- 9 column=log:http_x_forwarded_for, timestamp=1408852652522, value=- 9 column=log:referer, timestamp=1408852652522, value=http://xxx.com/ 9 column=log:request_length, timestamp=1408852652522, value=376 9 column=log:request_time, timestamp=1408852652522, value=0.000 9 column=log:requesthttp, timestamp=1408852652522, value=HTTP/1.1 9 column=log:requestmethond, timestamp=1408852652522, value=GET 9 column=log:requesturl, timestamp=1408852652522, value=8.8.8.8/b519d8ca/image/p04.png 9 column=log:status, timestamp=1408852652522, value=200 9 column=log:times, timestamp=1408852652522, value=22/Aug/2014:20:23:45 +0800 9 column=log:upstream_addr, timestamp=1408852652522, value=- 9 column=log:upstream_response_time, timestamp=1408852652522, value=- 9 column=log:upstream_status, timestamp=1408852652522, value=- 9 column=log:useragent, timestamp=1408852652522, value=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.0 Sa fari/537.36 1 row(s) in 0.0320 secondshbase(main):036:0> |

|
1
|
hive> add jar /home/hjl/hive/lib/hive_contrib.jar; |
Nginx日志导入到Hive0.13.1,同步Hbase0.96.2,设置RowKey为autoincrement(ID自增长)的更多相关文章
- ubuntu12.04+hadoop2.2.0+zookeeper3.4.5+hbase0.96.2+hive0.13.1伪分布式环境部署
目录: 一.hadoop2.2.0.zookeeper3.4.5.hbase0.96.2.hive0.13.1都是什么? 二.这些软件在哪里下载? 三.如何安装 1.安装JDK 2.用parallel ...
- Nginx 日志分享
Nginx 日志对于大部分人来说是个未被发掘的宝藏,总结之前做某日志分析系统的经验,和大家分享一下 Nginx 日志的纯手工分析方式. Nginx 日志相关配置有 2 个地方:access_log 和 ...
- Nginx日志文件配置与切割
Nginx日志的指令主要有两条: log_format,设置日志的格式 access_log,指定日志文件的存放路径.格式和缓存大小 两条指令在Nginx配置文件中的位置可以在http{……..}之间 ...
- 【转】纯手工玩转 Nginx 日志
Nginx 日志对于大部分人来说是个未被发掘的宝藏,总结之前做某日志分析系统的经验,和大家分享一下 Nginx 日志的纯手工分析方式. Nginx 日志相关配置有 2 个地方:access_log 和 ...
- rsync同步Nginx日志遇到问题总结
一.目的 将nginx 日志通过普通用户利用rsync公钥认证的方式实时同步到本地服务器上,之后使用elk程序进行处理. 二.遇到问题及解决方法思路 问题1.文件权限:nginx 的日志默认权限如下: ...
- 使用 Heka 导入自定义的nginx日志到Elasticsearch
重置Heka执行进度 heka的进度配置文件存在配置项 base_dir 设置的目录,只需要删除这个文件夹下面的内容,就可以完全重置heka的进度. base_dir 配置项默认是在下面目录: '/v ...
- Nginx日志通过Flume导入到HDFS中
关注公众号:分享电脑学习回复"百度云盘" 可以免费获取所有学习文档的代码(不定期更新) flume上传到hdfs: 当我们的数据量比较大时,比如每天的日志文件达到5G以上 使用ha ...
- Hadoop-2.2.0 + Hbase-0.96.2 + Hive-0.13.1(转)
From:http://www.itnose.net/detail/6065872.html # 需要软件 Hadoop-2.2.0(目前Apache官网最新的Stable版本) Hbase-0.96 ...
- _00018 Hadoop-2.2.0 + Hbase-0.96.2 + Hive-0.13.1 分布式环境整合,Hadoop-2.X使用HA方式
博文作者:妳那伊抹微笑 itdog8 地址链接 : http://www.itdog8.com(个人链接) 博客地址:http://blog.csdn.net/u012185296 个性签名:世界上最 ...
随机推荐
- windows递归拷贝(或删除等操作)文件
SHFileOperation 以拷贝为例. CString strFrom = ....._T("src");CString strTo = ....._T("dest ...
- iOS开发 百度坐标点的计算
BMKMapPoint point1 = BMKMapPointForCoordinate(_userGps); BMKMapPoint point2 = BMKMapPointForCoordina ...
- java.util.logging.Logger 使用详解
概述: 第1部分 创建Logger对象 第2部分 日志级别 第3部分 Handler 第4部分 Formatter 第5部分 自定义 第6部分 Logger的层次关系 参考 第1部分 创建Logger ...
- E:Sudoku
总时间限制: 2000ms 内存限制: 65536kB描述Sudoku is a very simple task. A square table with 9 rows and 9 columns ...
- 读javascript高级程序设计06-面向对象之继承
原型链是实现继承的主要方法,通过原型能让一个引用类型继承另一个引用类型. 1.原型链实现继承 function SuperType(){ this.superprop=1; } SuperType.p ...
- CentOS7下ifconfig command not found
执行命令 yum install net-tools
- JS只弹出一个居中弹出窗口
var newWindow;//定义一个窗口,有利于窗口间的通讯function makeNewWindow(url) { if (!newWindow || newWindow.closed) ...
- iOS开发Swift篇—(四)运算符
iOS开发Swift篇—(四)运算符 一.运算符 1.Swift所支持的部分运算符有以下一些 赋值运算符:= 复合赋值运算符:+=.-= 算术运算符:+.-.*./ 求余运算符:% 自增.自减运算符: ...
- 转: 什么是REST?
REST (REpresentation State Transfer) 描述了一个架构样式的网络系统,比如 web 应用程序.它首次出现在 2000 年 Roy Fielding 的博士论文中,他是 ...
- wp8.1 Study19:通知
一.通知形式 在windowsphone系统中,通知有很多中形式,如下图 (Tile磁贴在前一博文已复习过,Badge形式与tile类似) 1.Toast 它主要是利用xml文件来编写的,xml代码如 ...