Python分析NGINX LOG版本二

不好意思，上一版逻辑有错误，（只分析了一次就没了）

此版改正。

按同事要改，作成传参数形式，搞定。

#!/usr/bin/env python
# coding: utf-8

###################################
# User:chengang                   #
# Email:aguncn@163.com #
# Date:2016-02-25                 #
###################################

import time
import datetime
import sys
import os
import os.path
import re
import json

class NginxLog(object):

    def __init__(self, log_file, interface_list, seek_file):
        self.log_file = log_file
        self.interface_list = interface_list
        self.seek_file = seek_file

    # 将输出编码成json格式
    def jsonFormat(self, python_data):
        json_data = json.dumps(python_data, indent=2)
        return json_data

    # 获取电脑主机名
    def hostname(self):
        sys = os.name
        if sys == 'nt':
            hostname = os.getenv('computername')
            return hostname
        elif sys == 'posix':
            host = os.popen('echo $HOSTNAME')
            try:
                hostname = host.read()
                return hostname
            finally:
                host.close()
        else:
            return 'Unkwon hostname'

    # 将读过的文件游标写入临时文件
    def writeSeek(self, seek):
        with open(self.seek_file,'w') as f:
            f.write(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))+"\n")
            f.write(str(seek)+"\n")

    # 读出新生成的日志条目
    def LogRead(self):
        # 如果第一次运行，或是删除临时文件，从头运行，否则，从上次读取之后运行
        # 0代表从头开始，1代表当前位置，2代表文件最末尾位置。
        if os.path.exists(self.seek_file):
            with open(self.seek_file) as f:
                seek_tmp = f.readlines()
            seek_old = int(seek_tmp[1].strip())
        else:
            seek_old = 0
        with open(self.log_file) as f:
            #记录当前最新文件游标
            f.seek(0,2)
            seek_now = f.tell()
            # 读取上次读完之后的日志条目
            if seek_now >= seek_old:
                f.seek(seek_old,0)
                chunk = f.read(seek_now-seek_old)
                # 也可以考虑用xreadlines来实现
                # for line in f.xreadlines():
                #    pass # do something
            # 如果文件游标倒退，说明日志文件已轮循，从头开始
            else:
                f.seek(0,0)
                chunk = f.read(seek_now)
        # 将这次的游标写入临时文件
        self.writeSeek(seek_now)
        return chunk

    def LogStatistics(self):
        #分析NGINX日志的正则表达示，如果日志格式更改，则需要相应作更改
        #我拿到的日志样本和鹏龙的不一样，所以注释了一个字段
        #field 0
        field_remote_addr = r"?P<l_remote_addr>.*"
        #field 1
        field_remote_user = r"?P<l_remote_user>-"
        #field 2
        field_time_local = r"?P<l_time_local>\[.*\]"
        #field 3
        field_request = r"?P<l_request>\"[^\"]*\""
        #field 4
        field_status = r"?P<l_status>\"[^\"]*\""
        #field 5
        field_body_bytes_sent = r"?P<l_body_bytes_sent>\d+"
        #field 6
        field_http_refere = r"?P<l_http_refere>\"[^\"]*\""
        #field 7
        field_http_user_agent = r"?P<l_http_user_agent>\"[^\"]*\""
        #field 8
        #field_http_x_fowarded_for = r"?P<l_http_x_fowarded_for>\"[^\"]*\""
        #field 8
        field_all_cookie = r"?P<l_all_cookie>\"[^\"]*\""
        #field 9
        field_gzip_ratio = r"?P<l_gzip_ratio>\"[^\"]*\""
        #field 10
        field_upstream_addr = r"?P<l_upstream_addr>.*"
        #field 11
        field_bytes_sent = r"?P<l_bytes_sent>\d+"
        #field 12
        field_request_length = r"?P<l_request_length>\d+"
        #field 13
        field_request_time = r"?P<l_request_time>.*"

        #以下为样例，方便调试

        # 正则匹配字段
        nginxlog_pattern = re.compile(r"(%s)\s-\s(%s)\s(%s)\s(%s)\s(%s)\s(%s)\s(%s)\s(%s)\s(%s)\s(%s)\s(%s)\s(%s)\s(%s)\s(%s)" \
                                      %(field_remote_addr,field_remote_user,field_time_local,field_request,field_status, \
                                        field_body_bytes_sent,field_http_refere,field_http_user_agent, \
                                        field_all_cookie,field_gzip_ratio,field_upstream_addr,field_bytes_sent,field_request_length, \
                                        field_request_time),re.VERBOSE)
        #输出结果
        result_list = []
    org_list = []
    check_list = []
        # 未启用字段，作占位用
        time_ns =  datetime.datetime.now().microsecond
        #转换成符合要求的时间秒格式
        time_stamp = int(str(time.time())[0:10])
        host_name = self.hostname()
    chunk = self.LogRead()
        # 多少个URL，就要循环读取多少次，算法粗糙，后面再想办法吧，因为如果只循环一次文件读取，则在里面要循环列表，还难理顺思路
        for interface_item in self.interface_list:
        check_list.append(interface_item.lower())
            # json格式样例 {"ns":470464001,"clock":1450368176,"value":"1","key":"macs.func.exeCount_0ms_50ms[104_202]","host":"SQSZ-L3674"},
            # 构造符合要求的字典
            interface_item_dict_count = {}
            interface_item_dict_avg_request_time = {}
            interface_item_dict_2xx = {}
            interface_item_dict_4xx = {}
            interface_item_dict_5xx = {}
            interface_item_dict_count['ns']=interface_item_dict_avg_request_time['ns']=interface_item_dict_2xx['ns']=interface_item_dict_4xx['ns']=interface_item_dict_5xx['ns']=time_ns
            interface_item_dict_count['clock']=interface_item_dict_avg_request_time['clock']=interface_item_dict_2xx['clock']=interface_item_dict_4xx['clock']=interface_item_dict_5xx['clock']=time_stamp
            interface_item_dict_count['host']=interface_item_dict_avg_request_time['host']=interface_item_dict_2xx['host']=interface_item_dict_4xx['host']=interface_item_dict_5xx['host']=host_name
            interface_item_dict_count['key'] = interface_item + '_count'
            interface_item_dict_count['value'] = 0
            interface_item_dict_avg_request_time['key'] = interface_item + '_avg_request_time'
            interface_item_dict_avg_request_time['value'] = 0
            interface_item_dict_2xx['key'] = interface_item + '_2xx'
            interface_item_dict_2xx['value'] = 0
            interface_item_dict_4xx['key'] = interface_item + '_4xx'
            interface_item_dict_4xx['value'] = 0
            interface_item_dict_5xx['key'] = interface_item + '_5xx'
            interface_item_dict_5xx['value'] = 0
            hit_url_count = 0
            for line in chunk.split('\n'):
                line_matchs = nginxlog_pattern.match(line)
                if line_matchs!=None:
                    #匹配字段
                    allGroups = line_matchs.groups()
                    remote_addr = allGroups[0]
                    #切割出真正的URL
                    request_url = allGroups[3].split()[1].split('?')[0].split('/')[-1]
                    status_code = allGroups[4]
                    request_time = allGroups[13]
            # print interface_item.lower(), request_url.lower()
            if request_url.lower() not in org_list:
            org_list.append(request_url.lower())
                    # 匹配URL之后进行数据结构操作
                    if interface_item.lower() == request_url.lower():
                        hit_url_count += 1
                        interface_item_dict_count['value'] += 1
                        interface_item_dict_avg_request_time['value'] += float(request_time)
                        '):
                            interface_item_dict_2xx['value'] += 1
                        '):
                            interface_item_dict_4xx['value'] += 1
                        '):
                            interface_item_dict_5xx['value'] += 1
            # 求平均请求反应时间
            if interface_item_dict_avg_request_time['value'] != 0:
                interface_item_dict_avg_request_time['value'] = interface_item_dict_avg_request_time['value'] / hit_url_count

            #入总列表
            result_list.append(interface_item_dict_count)
            result_list.append(interface_item_dict_avg_request_time)
            result_list.append(interface_item_dict_2xx)
            result_list.append(interface_item_dict_4xx)
            result_list.append(interface_item_dict_5xx)
        return self.jsonFormat(result_list)

    def resultOutput(self):
        pass

def main():

    # 处理传参，生成日志路径及接口列表
    arg_length = len(sys.argv)
    if arg_length < 3:
    print 'args too short , at least 2(log path and interface name)'
    print 'sample: python NginxPyLog.py /fat_nginx/host.access.log getReportData getIndexData getRealTimeDatas getDayDataBigInt getBlockData getBlockDetail getRealTimeData getTrendData'
        sys.exit(0)

    #日志定位
    log_file = sys.argv[1]
    #需要收集的接口url
    interface_list = sys.argv[2:]

    # log_file  = '/applogs/fat_nginx/host.access.log'
    # interface_list 为下面的参数组合
    '''
    getReportData
    getIndexData
    getBlockData
    getBlockDetail
    getRealTimeData
    getRealTimeDatas
    getDayDataBigInt
    getTrendData
    '''

    # 临时文件游标文件
    seek_file = '/tmp/log_check_seek.tmp'

    # 传入相应参数，实例化类，获取和打印返回值
    nginx_log = NginxLog(log_file, interface_list, seek_file)
    return_json_data = nginx_log.LogStatistics()
    print return_json_data

if __name__ == "__main__":

    main()

Python分析NGINX LOG版本二的更多相关文章

利用python分析nginx日志
最近在学习python,写了个脚本分析nginx日志,练练手.写得比较粗糙,但基本功能可以实现. 脚本功能:查找出当天访问次数前十位的IP,并获取该IP来源,并将分析结果发送邮件到指定邮箱. 实现前两 ...
python分析nginx自定义日志
# -*- coding:utf-8 -*- import datetimeimport re logfile = '''192.168.23.43 - 2017-12-14:00:14:41 /se ...
使用Docker快速部署ELK分析Nginx日志实践（二）
Kibana汉化使用中文界面实践一.背景笔者在上一篇文章使用Docker快速部署ELK分析Nginx日志实践当中有提到如何快速搭建ELK分析Nginx日志,但是这只是第一步,后面还有很多仪表盘需要 ...
一天，python搞个分析NGINX日志的脚本
准备给ZABBIX用的. 统计接口访问字次,平均响应时间,4XX,5XX次数以后可以再改进.. #!/usr/bin/env python # coding: utf-8 ############# ...
分析nginx 日志常用命令
一.概念并发连接数客户端向服务器发起请求,并建立了TCP连接.每秒钟服务器链接的总TCP数量,就是并发连接数.请求数请求数指的是客户端在建立完连接后,向http服务发出GET/POS ...
烂泥：利用awstats分析nginx日志
本文由ilanniweb提供友情赞助,首发于烂泥行天下想要获得更多的文章,可以关注我的微信ilanniweb 昨天把nginx的日志进行了切割,关于如何切割nginx日志,可以查看<烂泥:切割 ...
elk平台分析nginx日志的基本搭建
一.elk套件介绍 ELK 由 ElasticSearch . Logstash 和 Kiabana 三个开源工具组成.官方网站: https://www.elastic.co/products El ...
Linux yum的配置 , python环境管理, nginx搭建简单学习
Linux yum的配置 , python环境管理, nginx搭建简单学习一丶配置yum的数据仓库 ### yum 工具, 方便,自行解决软件之间的依赖关系. # 配置yum源仓库 (可以使用,清 ...
GCE 部署 ELK 7.1可视化分析 nginx
目录一.准备 1.1.服务器环境准备二.安装 ES 2.1.遇到小问题三.安装 Kibana 四.安装 Logstash 一.准备我这边有一个网站放在了 Google VM 上面,所以打算在购 ...

随机推荐

builder-设计模式
package com.wp.java.builder; import org.junit.Test; public class DoDoContactDemo { @Test public void ...
EF6数据迁移
当Moldes发生改变时会提示数据上下文的模型已在数据库创建后发生改变,则需要重建数据库并数据迁移在NuGet程序包管理控制台输入enable-migrations启用数据迁移之后会提示&quo ...
一款功能强大的iphone购物应用源码
一款功能强大的iphone购物应用源码,这款应用源码比较完整的,并且还支持信用卡支付服务等功能的,基本实现了我们常用的购物应用功能了,实现商品的基本展示功能,还具有完整的用户管理,以及完整的购物流程等 ...
常用sql时间字符转化
这边主要用到2个函数 convert() cast() cast是对数据字符类型的转化,例如: cast(date as datetime) 这样就将date字段转化成为时间类型了因为常用到 ...
virtualbox安装centos6.5碰到的问题
今天无聊用virtualbox安装centos6.5 , 自己笔记本vm撑不住, 用公司的试试virtualbox先安装快完成时没有足够的内存配置kdump”(在英文界面下提示的是“insuffi ...
ASP.NET MVC局部验证及相关问题
在上一篇“asp.net mvc常用的数据注解和验证以及entity framework数据映射”话题中,有的博友提到 ‘“同一个实体在3-4个地方会发生修改,每个修改需要验证的方式都不一样,后端就不 ...
string和json转换的简单应用
import com.alibaba.fastjson.JSON; String strjson = request.getParameter("param"); //url-js ...
hdu 5690 2016"百度之星" - 初赛（Astar Round2A） All X 快速二次幂 || 寻找周期
题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=5690 题意:m个数字全为x mod k ?= c;其中m <= 1010,0 < c,k ...
Eclipse 下 opennms 开发环境搭建
1.eclipse3.5或更高版本,并且使用纯净的java版.下载地址:Eclipse for Java Developers. 2.安装需要的插件.通过Help/Install New Softwa ...
HTTP 错误 404.3 - Not Found
在使用win2012服务器上的IIS发布网页的时候,出现下面的错误解决办法: 将应用程序开发下的所有功能都安装. 如果上面的方法没解决问题的话,那么看看下图中的这些安装没,没有的话就继续安装.

Python分析NGINX LOG版本二

Python分析NGINX LOG版本二的更多相关文章

随机推荐

热门专题