rabbit_消费者
import pika
import json
import time
import os
import ast
import uuid
import time
import json
import hashlib import redis
import pymysql import logging
from logging import handlers # 日志记录
class Logger(object):
level_relations = {
'debug': logging.DEBUG,
'info': logging.INFO,
'warning': logging.WARNING,
'error': logging.ERROR,
'crit': logging.CRITICAL
} # 日志级别关系映射 def __init__(self, filename, level='info', when='D', backCount=3,
fmt='%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s'):
self.logger = logging.getLogger(filename)
format_str = logging.Formatter(fmt) # 设置日志格式
self.logger.setLevel(self.level_relations.get(level)) # 设置日志级别
sh = logging.StreamHandler() # 往屏幕上输出
sh.setFormatter(format_str) # 设置屏幕上显示的格式
th = handlers.TimedRotatingFileHandler(filename=filename, when=when, backupCount=backCount,
encoding='utf-8') # 往文件里写入#指定间隔时间自动生成文件的处理器
# 实例化TimedRotatingFileHandler
# interval是时间间隔,backupCount是备份文件的个数,如果超过这个个数,就会自动删除,when是间隔的时间单位,单位有以下几种:
# S 秒
# M 分
# H 小时、
# D 天、
# W 每星期(interval==0时代表星期一)
# midnight 每天凌晨
th.setFormatter(format_str) # 设置文件里写入的格式
self.logger.addHandler(sh) # 把对象加到logger里
self.logger.addHandler(th) file = 'all'
log = Logger('clear' + os.sep + '%s.log' % file)
logger = log.logger DEBUG = False class ClearLog(object):
if DEBUG:
DATABASE = 'unionlog'
# 本地测试
poll = redis.ConnectionPool(host='192.168.10.10', port=7000, db=5, password='', decode_responses=True)
conn = pymysql.connect(host='192.168.10.5', user='root',
password='root',
database=DATABASE, charset='utf8')
cursor = conn.cursor()
else:
DATABASE = 'log'
# 线上正式
poll = redis.ConnectionPool(host='192.168.5.219', port=6379, db=5, password='', decode_responses=True)
conn = pymysql.connect(host='', user='datacenter',
password='kbs11zx@',
database=DATABASE, charset='utf8')
cursor = conn.cursor()
CONN = redis.Redis(connection_pool=poll)
REDIS_PID_HASH = "tarsier.log.clear.pid.hash"
REDIS_PID_DELETE_HASH = "tarsier.log.delete.pid.hash"
REDIS_PID_DELETE_LIST = "tarsier.log.delete.pid.list"
REDIS_PID_DELETE_LIST_TEMP = "tarsier.log.delete.pid.list.temp"
table_list = []
table = 'tarsier_log_details'
instance = None def __new__(cls, *args, **kwargs):
if cls.instance:
return cls.instance
else:
return super().__new__(cls) @staticmethod
def get_table_list(table):
ClearLog.table = table
# 判断表是否存在
if table in ClearLog.table_list:
# print('表存在1')
pass
else:
ClearLog.cursor.execute("SHOW TABLES")
res = ClearLog.cursor.fetchall()
table_temp = []
for i in res:
table_temp.append(i[0])
# print(table_temp)
ClearLog.table_list = table_temp
if table in ClearLog.table_list:
# print('表存在2')
pass
else:
# 创建表
sql = """create table %s like tarsier_log_details""" % (table)
try:
print('创建表')
ClearLog.cursor.execute(sql)
except Exception as e:
pass
ClearLog.cursor.execute("SHOW TABLES")
res = ClearLog.cursor.fetchall()
table_temp = []
for i in res:
table_temp.append(i[0])
ClearLog.table_list = table_temp # 更新数据库
@staticmethod
def updata_db(data):
# ##################### 表名 #####################
table = "tarsier_log_details_%s" % ClearLog.timestamp_to_str(format="%Y%m") ClearLog.get_table_list(table)
keys = ', '.join(data.keys())
values = ', '.join(['%s'] * len(data))
# 实际用的是插入语句,不过加了ON DUPLICATE KEY UPDATE(主键存在,则执行更新操作)
sql = 'INSERT INTO {table}({keys}) VALUES ({values}) ON DUPLICATE KEY UPDATE'.format(table=table, keys=keys,
values=values)
update = ','.join([" {key} = %s".format(key=key) for key in data])
sql += update
try:
ClearLog.cursor.execute(sql, tuple(data.values()) * 2)
print('update Successful')
ClearLog.conn.commit()
except Exception as e:
logger.error(e)
print('update Failed') @staticmethod
def update_db_sql(sql):
try:
ClearLog.cursor.execute(sql) # 执行sql
ClearLog.conn.commit() # 提交到数据库
print('更新成功')
except Exception as e:
print("ERROR:{}".format(str(e)))
ClearLog.conn.rollback() # 发生错误则回滚
logger.info('error:%s' % str(e)) def __call__(self, *args, **kwargs):
pass def __init__(self):
# ClearLog.main()
pass @staticmethod
def md5_me(key):
md5 = hashlib.md5()
md5.update(str(key).encode('utf-8'))
value = md5.hexdigest()
return value @staticmethod
def main():
with open('20201110.log', encoding='utf-8') as f:
count = 0
for item in f:
line = item.strip()
data = ast.literal_eval(line)
# 数据清洗 - 开始
ClearLog.clear_log(data)
# print(ClearLog.__dict__)
count += 1
if count % 10000 == 0:
print(count)
# break @staticmethod
def main2(data):
# 数据清洗 - 开始
ClearLog.clear_log(data) @staticmethod
def clear_log(data):
res_data = {}
rsUid = data.get('rsUid', '')
rsPageId = data.get('rsPageId', '')
rshyuid = data.get('rshyuid', '')
pageid = data.get('pageid', '')
pageUrl = data.get('pageUrl', '')
userAgent = data.get('userAgent', '')
referrer = data.get('referrer', '')
nowDate = data.get('nowDate', '')
device = data.get('device', '')
rshyType = data.get('rshyType', '')
targetDataset = str(data.get('targetDataset', ''))
targetValue = data.get('targetValue', '')
targetClassName = data.get('targetClassName', '')
inputData = str(data.get('inputData', ''))
rshyUserIp = data.get('rshyUserIp', '')
netloc = data.get('netloc', '')
urlPath = data.get('urlPath', '')
siteName = data.get('siteName', '')
TIME = ClearLog.timestamp_to_str()
ID = ClearLog.get_uuid()
rshyTime = data.get('rshyTime', '')
try:
rsdate = rshyTime.split()[0]
temp = rshyTime.split()[1]
rshour = temp.split(':')[0]
except:
rsdate = ''
rshour = 0
res_data.update({
"id": ID,
"rsuid": rsUid,
"rshytime": rshyTime,
"rshour": rshour,
"rsdate": rsdate,
"rspageid": rsPageId,
"rshyuid": rshyuid,
"pageid": pageid,
"pageurl": pageUrl,
"useragent": userAgent,
"referrer": referrer,
"device": device,
"rshytype": rshyType,
"targetvalue": targetValue,
"targetdataset": targetDataset,
"targetclassname": targetClassName,
"inputdata": inputData,
"starttime": nowDate,
"rshyuserip": rshyUserIp,
"netloc": netloc,
"urlpath": urlPath,
"sitename": siteName,
"createtime": TIME,
"updatetime": TIME,
})
if rshyType == 'view' or rshyType == '':
# 先判断这个值是否与存储一样
rsUidKey = rsPageId # ClearLog.md5_me(pageid)
# print("pid", rsUidKey)
if not rsPageId:
return # 一直刷新pid
ClearLog.CONN.hset(ClearLog.REDIS_PID_DELETE_HASH, rsUidKey, nowDate) res_temp = rsUid + pageUrl + referrer + userAgent + device
# print('##############')
res_rs_uid = ClearLog.md5_me(res_temp)
# print(res_rs_uid)
# 从redis中获取uid对应数据, 如果数据一样不做存储
exist_uid = ClearLog.CONN.hget(ClearLog.REDIS_PID_HASH, rsUidKey)
# print(exist_uid)
if not exist_uid or res_rs_uid != str(exist_uid):
ClearLog.CONN.hset(ClearLog.REDIS_PID_HASH, rsUidKey, res_rs_uid)
# 数据入库
ClearLog.write_data(res_data)
# 存储一份记录时间hash
ClearLog.CONN.hset(ClearLog.REDIS_PID_DELETE_HASH, rsUidKey, nowDate)
# 并将此数据入删除队列
data_temp = {"pid": rsUidKey, "date": nowDate}
ClearLog.CONN.lpush(ClearLog.REDIS_PID_DELETE_LIST, json.dumps(data_temp))
return
# if not rshyType:
# return
ClearLog.write_data(res_data) @staticmethod
def write_data(data):
ClearLog.updata_db(data)
file_name = ClearLog.timestamp_to_str_m()
with open('clear{}{}.clear.log'.format(os.sep, file_name), 'a+', encoding='utf-8') as f:
f.write(str(data) + '\n') # 格式化时间转时间戳
@staticmethod
def str_to_timestamp(str_time=None, format='%Y-%m-%d %H:%M:%S'):
if str_time:
time_tuple = time.strptime(str_time, format) # 把格式化好的时间转换成元祖
result = time.mktime(time_tuple) # 把时间元祖转换成时间戳
return int(result)
return int(time.time()) # 把时间戳转换成格式化
@staticmethod
def timestamp_to_str(timestamp=None, format='%Y-%m-%d %H:%M:%S'):
if timestamp:
time_tuple = time.localtime(timestamp) # 把时间戳转换成时间元祖
result = time.strftime(format, time_tuple) # 把时间元祖转换成格式化好的时间
return result
else:
return time.strftime(format) # 把时间戳转换成格式化
@staticmethod
def timestamp_to_str_m(timestamp=None, format='%Y-%m-%d'):
if timestamp:
time_tuple = time.localtime(timestamp) # 把时间戳转换成时间元祖
result = time.strftime(format, time_tuple) # 把时间元祖转换成格式化好的时间
return result
else:
return time.strftime(format) # uuid
@staticmethod
def get_uuid():
res = str(uuid.uuid4())
UUID = ''.join(res.split('-'))
return UUID # 每5分钟删除一次hash中的值,并将停留时间算出
@staticmethod
def del_tarsier_log_pid_hash():
table = ClearLog.table + '_%s' % ClearLog.timestamp_to_str_m(format='%Y%m') print('每5分钟删除一次hash中的值,并将停留时间算出')
get_pid_list = ClearLog.CONN.hgetall(ClearLog.REDIS_PID_DELETE_HASH)
# print(get_pid_list)
for hash_pid_item in get_pid_list: redisDate = ClearLog.CONN.hget(ClearLog.REDIS_PID_DELETE_HASH, hash_pid_item)
# 如果存储时间与当前时间相差1min之外更新 最后访问时间与停留时间 并将hash的值进行删除 否则不做处理
try:
redis_data_time = ClearLog.str_to_timestamp(redisDate)
now_data_time = time.time()
chufatime = now_data_time - redis_data_time
# starttime =
# staytime = redis_data_time - starttime
if chufatime >= 60:
# 进行更新操作
sql = """update {} set endtime='{}' where rspageid='{}'""".format(table, redisDate,
hash_pid_item)
print(sql)
ClearLog.update_db_sql(sql)
# 更新完进行redis的值删除操作
ClearLog.CONN.hdel(ClearLog.REDIS_PID_DELETE_HASH, hash_pid_item)
except Exception as e:
pass
print('====================================') # 每一天清除一次队列信息
@staticmethod
def del_tarsier_log_pid_list():
logger.info('每一天清除一次队列信息')
res_str = ClearLog.CONN.lpop(ClearLog.REDIS_PID_DELETE_LIST)
while res_str:
try:
# 并将此数据入删除队列
# data_temp = {"pid": rsUidKey, "date": nowDate}
res_json = json.loads(res_str)
# print(res_json)
nowDate = res_json.get("date", '')
rsUidKey = res_json.get("pid", '')
redis_data_time = ClearLog.str_to_timestamp(nowDate)
now_data_time = time.time()
chufatime = now_data_time - redis_data_time
if chufatime >= 24 * 60 * 60:
# 更新完进行redis的值删除操作
ClearLog.CONN.hdel(ClearLog.REDIS_PID_HASH, rsUidKey)
# print('删除')
else:
ClearLog.CONN.rpush(ClearLog.REDIS_PID_DELETE_LIST_TEMP, json.dumps(res_json))
res_str = ClearLog.CONN.lpop(ClearLog.REDIS_PID_DELETE_LIST)
except:
pass
# print('处理队列')
res_str = ClearLog.CONN.lpop(ClearLog.REDIS_PID_DELETE_LIST_TEMP)
while res_str:
res_json = json.loads(res_str)
ClearLog.CONN.rpush(ClearLog.REDIS_PID_DELETE_LIST, json.dumps(res_json))
res_str = ClearLog.CONN.lpop(ClearLog.REDIS_PID_DELETE_LIST_TEMP)
logger.info('清除完毕') # 把时间戳转换成格式化
# 天
def timestamp_to_str_day(timestamp=None, format='%Y%m%d'):
if timestamp:
time_tuple = time.localtime(timestamp) # 把时间戳转换成时间元祖
result = time.strftime(format, time_tuple) # 把时间元祖转换成格式化好的时间
return result
else:
return time.strftime(format) # Connect to RabbitMQ and create channel
rabbit_host = "192.168.2.129"
rabbit_username = 'rshy'
rabbit_password = 'root1234@AWJSW'
queue_topic = 'logs.collect.statistics'
user = pika.PlainCredentials(rabbit_username, rabbit_password)
connection = pika.BlockingConnection(pika.ConnectionParameters(host=rabbit_host, credentials=user,)) # heartbeat=0
# connection = pika.BlockingConnection(pika.ConnectionParameters(host='192.168.2.129'))
channel = connection.channel() # Declare and listen queue
# channel.queue_declare(queue=cfg.QUEUE_TOPIC)
channel.queue_declare(queue=queue_topic) ClearLogObj = ClearLog()
def consumer(): print(' [*] Waiting for messages. To exit press CTRL+C') # Function process and print data
def callback(ch, method, properties, body):
# print("Method: {}".format(method))
# print("Properties: {}".format(properties)) data = json.loads(body)
# print("ID: {}".format(data['id']))
# print("Name: {}".format(data['name']))
# print('Description: {}'.format(data['description']))
print("--data--:", data)
ClearLogObj.main2(data)
file_name = timestamp_to_str_day()
with open('consumer' + os.sep + file_name + '.log', 'a+', encoding='utf-8') as f:
f.write(str(data) + '\n') # Listen and receive data from queue
# channel.basic_consume(cfg.QUEUE_TOPIC, callback, True)
channel.basic_consume(queue_topic, callback, True)
channel.start_consuming() if __name__ == '__main__':
consumer()
rabbit_消费者的更多相关文章
- java简单模拟生产者消费者问题
本文来自:http://www.cnblogs.com/happyPawpaw/archive/2013/01/18/2865957.html 引言 生产者和消费者问题是线程模型中的经典问题:生产者和 ...
- 队列&生产者消费者
Queue是python标准库中的线程安全的队列(FIFO)实现,提供了一个适用多线程的先进先出的数据结构,即队列,用来在生产者和消费者线程之间信息传递. 如果在多线程中,给存放数据,也就是修改同一份 ...
- rocketmq生产者和消费者
1.生产者: package com.ebways.mq.test.mq; import com.alibaba.rocketmq.client.exception.MQClientException ...
- java并发编程(十三)经典问题生产者消费者问题
生产者消费者问题是线程模型中的经典问题:生产者和消费者在同一时间段内共用同一存储空间,生产者向空间里生产数据,而消费者取走数据. 这里实现如下情况的生产--消费模型: 生产者不断交替地生产两组数据&q ...
- java 中多线程之间的通讯之生产者和消费者 (多个线程之间的通讯)
在真实开发 中关于多线程的通讯的问题用到下边的例子是比较多的 不同的地方时if 和while 的区别 如果只是两个线程之间的通讯,使用if是没有问题的. 但是在多个线程之间就会有问题 /* * 这个例 ...
- Competing Consumers Pattern (竞争消费者模式)
Enable multiple concurrent consumers to process messages received on the same messaging channel. Thi ...
- iPhone 6 被盗记录二【写在315前夕:苹果售后福州直信创邺在没有三包的情况下帮小偷翻新、助力小偷换机销赃!无视王法。让人震惊,痛心,憎恨!消费者很受伤很无奈】
投诉公司: 北京直信创邺数码科技有限公司 标题: 写在315前夕:苹果售后在没有三包的情况下帮小偷翻新.助力小偷换机销赃!无视王法.让人震惊,痛心,憎恨!消费者很受伤很无奈 期望: 还我手机,或者赔 ...
- 【Windows】用信号量实现生产者-消费者模型
线程并发的生产者-消费者模型: 1.两个进程对同一个内存资源进行操作,一个是生产者,一个是消费者. 2.生产者往共享内存资源填充数据,如果区域满,则等待消费者消费数据. 3.消费者从共享内存资源取数据 ...
- Java程序设计之消费者和生产者
新建一个Break类,表示食物数量. public class Break { public static final int MAX = 10; //最多一次性煮十个面包 Stack<Inte ...
随机推荐
- pod详解
什么是pod? 官方说明: Pod是Kubernetes应用程序的最基本执行单元-是你创建或部署Kubernetes对象模型中的最小和最简单的单元. Pod表示在集群上运行的进程.Pod封装了应用程序 ...
- kuberadm安装kubernetes
系统基础环境准备 环境信息 2台 Centos 7.5 cat /etc/hosts 192.168.100.101 k8s-master 192.168.103.102 k8s-node1 serv ...
- Spring-Cloud-Alibaba之Sentinel
微服务中为了防止某个服务出现问题,导致影响整个服务集群无法提供服务的情况,我们在系统访问量和业务量高起来了后非常有必要对服务进行熔断限流处理. 其中熔断即服务发生异常时能够更好的处理:限流是限制每个服 ...
- HUAWEI防火墙双出口据链路带宽负载分担
组网图形 组网需求 通过配置根据链路带宽负载分担,使流量按照带宽的比例分担到各链路上,保证带宽资源得到充分利用. 如图1所示,企业分别从ISP1和ISP2租用了一条链路,ISP1链路的带宽为100M, ...
- vagrant构建centos虚拟环境
vagrant搭建centos 什么是vagrant 如何使用 1.构建本地的目录 2.官方下载对应的镜像文件,官方下载地址 3.导入刚刚下载的镜像(box文件) 4.初始化 5.修改Vagrantf ...
- 基于MVC框架的JavaWeb网站开发demo项目(JSP+Servlet+JavaBean)
1.环境配置 Windows10+Eclipse2020+jdk8+Tomcat9+MySQL8+Navicat10 2.需求分析 ①用户登录注册注销(查找.增加) ②显示用户列表(查找) ③显示用户 ...
- 转载:java.math.BigDecimal 比较大小
BigDecimal a = new BigDecimal (101); BigDecimal b = new BigDecimal (111); //使用compareTo方法比较 //注意:a.b ...
- POJ3614奶牛晒阳光DINIC或者贪心
题意: n个区间,m种点,每种点有ci个,如果一个点的范围在一个区间上,那么就可以消耗掉一个区间,问最多可以消耗多少个区间,就是这n个区间中,有多少个可能被抵消掉. 思路: 方 ...
- Windows PE变形练手1-用PE自己的机器码修改自己的逻辑
PE变形练手1-用PE自己的机器码修改自己的逻辑 就是找一个PE文件,用自己的部分代码部分覆盖或者而修改自己另一个代码部分的补丁姿势(现实中使用很少,极少数破解可以用到.这次例子目的是了解PE). 第 ...
- WDK 标准数据类型
刚刚看到vs2012可以完美支持wdk开发,心中窃喜,正要下载,竟然看到xp不在其支持范围内, 这让刚刚从win7换过来的我真是DT,算了,还是和学习资料保持一致,反正学习的重点不是方便 正题: 为了 ...