阿里云收集服务器性能指标的python脚本
#!/usr/bin/python
#########################################
# Function: sample linux performance indices
# Usage: python sampler.py
# Author: CMS DEV TEAM
# Company: Aliyun Inc.
# Version: 1.1
######################################### import os
import os.path
import sys
import time
import operator
import httplib
import logging
import socket
import random
from shutil import copyfile
from subprocess import Popen, PIPE
from logging.handlers import RotatingFileHandler logger = None
REMOTE_HOST = None
REMOTE_PORT = None
REMOTE_MONITOR_URI = None
UUID = None def get_mem_usage_percent():
try:
f = open('/proc/meminfo', 'r')
for line in f:
if line.startswith('MemTotal:'):
mem_total = int(line.split()[1])
elif line.startswith('MemFree:'):
mem_free = int(line.split()[1])
elif line.startswith('Buffers:'):
mem_buffer = int(line.split()[1])
elif line.startswith('Cached:'):
mem_cache = int(line.split()[1])
elif line.startswith('SwapTotal:'):
vmem_total = int(line.split()[1])
elif line.startswith('SwapFree:'):
vmem_free = int(line.split()[1])
else:
continue
f.close()
except:
return None
physical_percent = usage_percent(mem_total - (mem_free + mem_buffer + mem_cache), mem_total)
virtual_percent = 0
if vmem_total > 0:
virtual_percent = usage_percent((vmem_total - vmem_free), vmem_total)
return physical_percent, virtual_percent black_list = ('iso9660',) def usage_percent(use, total):
try:
ret = (float(use) / total) * 100
except ZeroDivisionError:
raise Exception("ERROR - zero division error")
return ret def get_disk_partition():
return_list = []
pd = []
try:
f = open("/proc/filesystems", "r")
for line in f:
if not line.startswith("nodev"):
fs_type = line.strip()
if fs_type not in black_list:
pd.append(fs_type)
f.close() f = open('/etc/mtab', "r")
for line in f:
if line.startswith('none'):
continue
tmp = line.strip().split()
ft = tmp[2]
if ft not in pd:
continue
return_list.append(tmp[1])
f.close()
except:
return None
return return_list def check_disk():
try:
return_dict = {}
p_list = get_disk_partition()
for i in p_list:
dt = os.statvfs(i)
use = (dt.f_blocks - dt.f_bfree) * dt.f_frsize
all = dt.f_blocks * dt.f_frsize
return_dict[i] = ('%.2f' % (usage_percent(use, all),), ('%.2f' % (all * 1.0 / (1024 * 1000000))))
except:
return None
return return_dict _CLOCK_TICKS = os.sysconf("SC_CLK_TCK") def get_cpu_time():
need_sleep = False
if not os.path.isfile('/tmp/cpu_stat') or os.path.getsize('/tmp/cpu_stat') == 0:
copyfile('/proc/stat', '/tmp/cpu_stat')
need_sleep = True try:
f1 = open('/tmp/cpu_stat', 'r')
values1 = f1.readline().split()
total_time1 = 0
for i in values1[1:]:
total_time1 += int(i)
idle_time1 = int(values1[4])
iowait_time1 = int(values1[5])
finally:
f1.close() if need_sleep:
time.sleep(1) f2 = open('/proc/stat', 'r')
try:
values2 = f2.readline().split()
total_time2 = 0
for i in values2[1:]:
total_time2 += int(i)
idle_time2 = int(values2[4])
iowait_time2 = int(values2[5])
finally:
f2.close()
idle_time = idle_time2 - idle_time1
iowait_time = iowait_time2 - iowait_time1
total_time = total_time2 - total_time1 cpu_percentage = int(100.0 * (total_time - idle_time - iowait_time) / total_time)
# compensate logic
if total_time < 0 or idle_time < 0 or iowait_time < 0 or cpu_percentage < 0 or cpu_percentage > 100:
time.sleep(1)
f3 = open('/proc/stat', 'r')
try:
values3 = f3.readline().split()
total_time3 = 0
for i in values3[1:]:
total_time3 += int(i)
idle_time3 = int(values3[4])
iowait_time3 = int(values3[5])
finally:
f3.close()
idle_time = idle_time3 - idle_time2
iowait_time = iowait_time3 - iowait_time2
total_time = total_time3 - total_time2
cpu_percentage = int(100.0 * (total_time - idle_time - iowait_time) / total_time) copyfile('/proc/stat', '/tmp/cpu_stat')
return cpu_percentage def network_io_kbitps():
"""Return network I/O statistics for every network interface
installed on the system as a dict of raw tuples.
"""
f1 = open("/proc/net/dev", "r")
try:
lines1 = f1.readlines()
finally:
f1.close() retdict1 = {}
for line1 in lines1[2:]:
colon1 = line1.find(':')
assert colon1 > 0, line1
name1 = line1[:colon1].strip()
fields1 = line1[colon1 + 1:].strip().split()
bytes_recv1 = float('%.4f' % (float(fields1[0]) * 0.0078125))
bytes_sent1 = float('%.4f' % (float(fields1[8]) * 0.0078125))
retdict1[name1] = (bytes_recv1, bytes_sent1)
time.sleep(1)
f2 = open("/proc/net/dev", "r")
try:
lines2 = f2.readlines()
finally:
f2.close()
retdict2 = {}
for line2 in lines2[2:]:
colon2 = line2.find(':')
assert colon2 > 0, line2
name2 = line2[:colon2].strip()
fields2 = line2[colon2 + 1:].strip().split()
bytes_recv2 = float('%.4f' % (float(fields2[0]) * 0.0078125))
bytes_sent2 = float('%.4f' % (float(fields2[8]) * 0.0078125))
retdict2[name2] = (bytes_recv2, bytes_sent2)
retdict = merge_with(retdict2, retdict1)
return retdict def disk_io_Kbps():
iostat = Popen("iostat -d -k 1 2 | sed '/Device\|Linux\|^$/d' > /tmp/disk_io", shell=True, stdout=PIPE, stderr=PIPE)
iostat_error = iostat.communicate()[1].strip()
if iostat_error:
logger.error("iostat not exists, %s" % iostat_error)
return None retdict = {}
exception = None
try:
try:
f = open('/tmp/disk_io', 'r')
except Exception, ex:
exception = ex
logger.error(exception)
if exception:
return None
lines = f.readlines()
for line in lines:
name, _, readkps, writekps, _, _, = line.split()
if name:
readkps = float(readkps)
writekps = float(writekps)
retdict[name] = (readkps, writekps)
return retdict
finally:
f.close() def merge_with(d1, d2, fn=lambda x, y: tuple(map(operator.sub, x, y))):
res = d1.copy() # "= dict(d1)" for lists of tuples
for key, val in d2.iteritems(): # ".. in d2" for lists of tuples
try:
res[key] = fn(res[key], val)
except KeyError:
res[key] = val
return res def get_load():
try:
f = open('/proc/loadavg', 'r')
tmp = f.readline().split()
lavg_1 = float(tmp[0])
lavg_5 = float(tmp[1])
lavg_15 = float(tmp[2])
f.close()
except:
return None
return lavg_1, lavg_5, lavg_15 def get_tcp_status():
check_cmd = "command -v ss"
check_proc = Popen(check_cmd, shell=True, stdout=PIPE)
ss = check_proc.communicate()[0].rstrip('\n')
if ss:
cmd = "ss -ant | awk '{if(NR != 1) print $1}' | awk '{state=$1;arr[state]++} END{for(i in arr){printf \"%s=%s \", i,arr[i]}}' | sed 's/-/_/g' | sed 's/ESTAB=/ESTABLISHED=/g' | sed 's/FIN_WAIT_/FIN_WAIT/g'"
else:
cmd = "netstat -anp | grep tcp | awk '{print $6}' | awk '{state=$1;arr[state]++} END{for(i in arr){printf \"%s=%s \", i,arr[i]}}' | tail -n 1"
tcp_proc = Popen(cmd, shell=True, stdout=PIPE)
tcp_status = tcp_proc.communicate()[0].rstrip('\n')
return tcp_status def get_proc_number():
cmd = "ps axu | wc -l | tail -n 1"
proc_func = Popen(cmd, shell=True, stdout=PIPE)
proc_number = proc_func.communicate()[0].rstrip('\n')
return proc_number def all_index():
return (
int(time.time() * 1000),
get_cpu_time(),
get_mem_usage_percent(),
check_disk(),
disk_io_Kbps(),
network_io_kbitps(),
get_load(),
get_tcp_status(),
get_proc_number()
) def collector():
timestamp, cpu, mem, disk, disk_io, net, load, tcp_status, process_number = all_index()
disk_utilization = ''
disk_io_read = ''
disk_io_write = ''
internet_networkrx = ''
internet_networktx = ''
tcp_status_count = ''
period_1 = ''
period_5 = ''
period_15 = '' if UUID:
cpu_utilization = 'vm.CPUUtilization ' + str(timestamp) + ' ' + str(cpu) + ' ns=ACS/ECS unit=Percent instanceId=%s\n' % UUID memory_utilization = 'vm.MemoryUtilization ' + str(timestamp) + ' ' + str(mem[0]) + ' ns=ACS/ECS unit=Percent instanceId=%s\n' % UUID if load:
period_1 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[0]) + ' ns=ACS/ECS unit=count' + ' instanceId=%s period=1min\n' % UUID
period_5 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[1]) + ' ns=ACS/ECS unit=count' + ' instanceId=%s period=5min\n' % UUID
period_15 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[2]) + ' ns=ACS/ECS unit=count' + ' instanceId=%s period=15min\n' % UUID if disk:
for name, value in disk.items():
disk_utilization = disk_utilization + 'vm.DiskUtilization ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Percent instanceId=%s mountpoint=%s\n' % (UUID, name) if disk_io:
for name, value in disk_io.items():
disk_io_read = disk_io_read + 'vm.DiskIORead ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobytes/Second instanceId=%s diskname=%s\n' % (UUID, name)
disk_io_write = disk_io_write + 'vm.DiskIOWrite ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobytes/Second instanceId=%s diskname=%s\n' % (UUID, name) for name, value in net.items():
internet_networkrx = internet_networkrx + 'vm.InternetNetworkRX ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobits/Second instanceId=%s netname=%s\n' % (UUID, name)
internet_networktx = internet_networktx + 'vm.InternetNetworkTX ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobits/Second instanceId=%s netname=%s\n' % (UUID, name) if tcp_status:
status_count = tcp_status.split()
for element in status_count:
key_value = element.split('=')
tcp_status_count = tcp_status_count + 'vm.TcpCount ' + str(timestamp) + ' ' + key_value[1] + ' ns=ACS/ECS unit=Count instanceId=%s state=%s\n' % (UUID, key_value[0]) process_count = 'vm.ProcessCount ' + str(timestamp) + ' ' + process_number + ' ns=ACS/ECS unit=Count instanceId=%s\n' % UUID
else:
cpu_utilization = 'vm.CPUUtilization ' + str(timestamp) + ' ' + str(cpu) + ' ns=ACS/ECS unit=Percent\n' memory_utilization = 'vm.MemoryUtilization ' + str(timestamp) + ' ' + str(mem[0]) + ' ns=ACS/ECS unit=Percent\n' if load:
period_1 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[0]) + ' ns=ACS/ECS unit=count period=1min\n'
period_5 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[1]) + ' ns=ACS/ECS unit=count period=5min\n'
period_15 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[2]) + ' ns=ACS/ECS unit=count period=15min\n' if disk:
for name, value in disk.items():
disk_utilization = disk_utilization + 'vm.DiskUtilization ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Percent mountpoint=%s\n' % name if disk_io:
for name, value in disk_io.items():
disk_io_read = disk_io_read + 'vm.DiskIORead ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobytes/Second diskname=%s\n' % name
disk_io_write = disk_io_write + 'vm.DiskIOWrite ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobytes/Second diskname=%s\n' % name for name, value in net.items():
internet_networkrx = internet_networkrx + 'vm.InternetNetworkRX ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobits/Second netname=%s\n' % name
internet_networktx = internet_networktx + 'vm.InternetNetworkTX ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobits/Second netname=%s\n' % name if tcp_status:
status_count = tcp_status.split()
for element in status_count:
key_value = element.split('=')
tcp_status_count = tcp_status_count + 'vm.TcpCount ' + str(timestamp) + ' ' + key_value[1] + ' ns=ACS/ECS unit=Count state=%s\n' % key_value[0] process_count = 'vm.ProcessCount ' + str(timestamp) + ' ' + process_number + ' ns=ACS/ECS unit=Count\n' data_post = cpu_utilization + memory_utilization + period_1 + period_5 + period_15 + disk_utilization + disk_io_read + disk_io_write + internet_networkrx + internet_networktx + tcp_status_count + process_count
print data_post
interval = random.randint(0, 5000)
time.sleep(interval / 1000.0) headers = {"Content-Type": "text/plain", "Accept": "text/plain"}
exception = None
http_client = None
try:
try:
http_client = httplib.HTTPConnection(REMOTE_HOST, REMOTE_PORT)
http_client.request(method="POST", url=REMOTE_MONITOR_URI, body=data_post, headers=headers)
response = http_client.getresponse()
if response.status == 200:
return
else:
logger.warn("response code %d" % response.status)
logger.warn("response code %s" % response.read())
except Exception, ex:
exception = ex
finally:
if http_client:
http_client.close()
if exception:
logger.error(exception) if __name__ == '__main__':
REMOTE_HOST = 'open.cms.aliyun.com'
REMOTE_PORT = 80 # get report address
if not os.path.isfile("../cmscfg"):
pass
else:
props = {}
prop_file = file("../cmscfg", 'r')
for line in prop_file.readlines():
kv = line.split('=')
props[kv[0].strip()] = kv[1].strip()
prop_file.close()
if props.get('report_domain'):
REMOTE_HOST = props.get('report_domain')
if props.get('report_port'):
REMOTE_PORT = props.get('report_port') # get uuid
if not os.path.isfile("../aegis_quartz/conf/uuid"):
pass
else:
uuid_file = file("../aegis_quartz/conf/uuid", 'r')
UUID = uuid_file.readline()
UUID = UUID.lower() REMOTE_MONITOR_URI = "/metrics/putLines"
MONITOR_DATA_FILE_DIR = "/tmp"
LOG_FILE = "/tmp/" + "vm.log"
LOG_LEVEL = logging.INFO
LOG_FILE_MAX_BYTES = 1024 * 1024
LOG_FILE_MAX_COUNT = 3
logger = logging.getLogger('sampler')
logger.setLevel(LOG_LEVEL)
handler = RotatingFileHandler(filename=LOG_FILE, mode='a', maxBytes=LOG_FILE_MAX_BYTES,
backupCount=LOG_FILE_MAX_COUNT)
formatter = logging.Formatter(fmt='%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
socket.setdefaulttimeout(10) try:
collector()
except Exception, e:
logger.error(e)
sys.exit(1)
阿里云收集服务器性能指标的python脚本的更多相关文章
- 一段获取app性能指标的py脚本
#coding:utf-8 import os import timeimport datetimeimport subprocess ActivityManager = 'homepage.Main ...
- [阿里云部署] Ubuntu+Flask+Nginx+uWSGI+Mysql搭建阿里云Web服务器
部署地址:123.56.7.181 Ubuntu+Flask+Nginx+uWSGI+Mysql搭建阿里云Web服务器 这个标题就比之前的"ECS服务器配置Web环境的全过程及参考资料&qu ...
- 阿里云学生服务器搭建网站-Ubuntu16.04安装php开发环境
阿里云学生服务器搭建网站(2)-Ubuntu16.04安装php开发环境 优秀博文:https://www.linuxidc.com/Linux/2016-10/136327.htm https:/ ...
- 阿里云Linux服务器初探
阿里云Linux服务器初探 阿里云Linux服务器初探 因为钱包的关系,本人买了一个660元2年的1核1GB的小服务器(centos是Linux的发行版),在当初是用2核4GB(内存)的时候使用的是w ...
- 阿里云ECS服务器CentOS7.2安装Python2.7.13
阿里云ECS服务器CentOS7.2安装Python2.7.13 yum中最新的也是Python 2.6.6,只能下载Python 2.7.9的源代码自己编译安装. 操作步骤如下: 检查CentOS7 ...
- 阿里云ECS服务器购买流程
先说说什么是阿里云服务器ECS?云服务器(Elastic Compute Service,即弹性计算服务,简称ECS)是阿里云提供的性能卓越.稳定可靠.弹性扩展的IaaS(Infrastructure ...
- 阿里云ECS服务器被DDoS无解,请问我该何去何从?
阿里云ECS服务器被DDoS无解,请问我该何去何从?
- 在阿里云Linux服务器上安装MySQL
申请阿里云Linux服务器 昨天在阿里云申请了一个免费试用5天的Linux云服务器. 操作系统:Red Hat Enterprise Linux Server 5.4 64位. CPU:1核 内存:5 ...
- 初步配置阿里云ECS服务器
阿里云服务器配置记录01 购买阿里云学生服务器9.9元每月 创建ubuntu64位实例系统,注意必须添加安全组设置才可远程登入(设置课访问端口及IP范围 putty 软件在windows10下远程登入 ...
随机推荐
- JavaScript中字符串处理的一些函数
废话,不多说,直接上代码 <script type="text/javascript"> (function(){ var methods = { camelize: ...
- HDUOJ---2152
Fruit Time Limit: 1000/1000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others)Total Submi ...
- 二叉搜索树 C语言实现
1.二叉搜索树基本概念 二叉搜索树又称二叉排序树,它或者是一棵空树,或者是一棵具有如下特性的非空二叉树: (1)若它的左子树非空,则左子树上所有结点的关键字均小于根结点的关键字: (2)若它的右子树非 ...
- java 移位
java中没有2进制的数据类型,对二进制的操作,需要使用三种操作符 << 左移位操作符,算数左移 用来将一个数的二进制位序列左移若干位,高位左移后溢出,舍弃不用,右补0 ...
- RabbitMQ消息队列(五):Routing 消息路由[转]
上篇文章中,我们构建了一个简单的日志系统.接下来,我们将丰富它:能够使用不同的severity(严重程度)来监听不同等级的log.比如我们希望只有error的log才保存到磁盘上. 1. Bindin ...
- iOS “[App] if we're in the real pre-commit handler we can't actually add any new fences due
最近运行APP,发现了这个问题,本着宁可错看,不可放过的原则,上stackoverFlow学习了一下: 链接:http://stackoverflow.com/questions/38458170/i ...
- Android 中 values/strings.xml 取值
<string name="smsphone_init">15898146863</string> //取值 String strSmsPhone=getR ...
- Android 实现 HttpClient 请求Https
如题,默认下,HttpClient是不能请求Https的,需要自己获取 private static final int SET_CONNECTION_TIMEOUT = 5 * 1000; priv ...
- Linux进程概述
一.介绍 当linux系统中的一个进程运行起来的时候,总是要访问系统的资源,访问文件或者向其他的进程发送信号.系统是否允许其进行这些操作?系统是根据什么来判断该进程的权限?这些问题是和进程信任状(pr ...
- Linux时间子系统(六) POSIX timer
一.前言 在用户空间接口函数文档中,我们描述了和POSIX timer相关的操作,主要包括创建一个timer.设定timer.获取timer的状态.获取timer overrun的信息.删除timer ...