#!/usr/bin/env python ''' 处理访问日志,筛选时间大于1秒的请求 ''' with open('test.log','a+',encoding='utf-8') as f_a: with open('wkxz-api.access.log') as f: for line in f.readlines(): if line[-2:] == "-\n" : num =float(line[-7:-2]) else: num=float(line[-6:]) if…
处理nginx访问日志,筛选时间大于1秒的请求   #!/usr/bin/env python ''' 处理访问日志,筛选时间大于1秒的请求 ''' with open('test.log','a+',encoding='utf-8') as f_a: with open('wkxz-api.access.log') as f: for line in f.readlines(): if line[-2:] == "-\n" : num =float(line[-7:-2]) else…
前一阵子,搭建了ELK日志分析平台,用着挺爽的,再也不用给开发拉各种日志,节省了很多时间. 这篇博文是介绍用python代码实现日志分析的,用MRJob实现hadoop上的mapreduce,可以直接放到hadoop集群上运行. mrjob可以让我们使用Python编写MapReduce运算,并在多个不同平台运行,你可以: 使用纯python编写multi-step MapReduce 本机测试 在hadoop集群上运行 安装mrjob pip install mrjob nginx访问日志格式…
代码: # cat top_10_request.py #!/usr/bin/env python # coding=utf-8 from mrjob.job import MRJob from mrjob.step import MRStep from nginx_accesslog_parser import NginxLineParser import heapq class UrlRequest(MRJob): nginx_line_parser = NginxLineParser()…
nginx默认的日志格式 log_format main '$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for"'; 字段说明 127.0.0.1 - - [14/May/2017:12:51:13…
0:Nginx日志格式配置 # vim nginx.conf ## # Logging Settings ## log_format access '$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent $request_body "$http_referer" ' '"$http_user_agent" "$http_x_for…
#!/bin/bash export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/root/bin # Nginx 日志格式: # log_format main '$remote_addr - $remote_user [$time_local] "$request" ' # '$status $body_bytes_sent "$http_referer" ' # '"$http_us…
代码: # pv_day.py#!/usr/bin/env python # coding=utf-8 from mrjob.job import MRJob from nginx_accesslog_parser import NginxLineParser class PvDay(MRJob): nginx_line_parser = NginxLineParser() def mapper(self, _, line): self.nginx_line_parser.parse(line)…
1.说明 默认的时间格式是:[08/Mar/2013:09:30:58 +0800],由$time_local变量表示. 我想要改成如下格式:2013-03-08 12:21:03. 2.需要修改的文件 src/core/nginx_times.c src/http/modules/ngx_http_log_module.c 首先修改ngx_http_log_module.c文件: { ngx_string(, 更改后 { ngx_string(, 然后修改nginx_times.c文件: [s…
useragent: 代码(不包含蜘蛛): # cat top_10_useragent.py #!/usr/bin/env python # coding=utf-8 from mrjob.job import MRJob from mrjob.step import MRStep from nginx_accesslog_parser import NginxLineParser import heapq class UserAgent(MRJob): nginx_line_parser =…