阿里云收集服务器性能指标的python脚本,阿里python,下面脚本是阿里云服务器收


下面脚本是阿里云服务器收集性能指标的脚本:

#!/usr/bin/python########################################## Function:    sample linux performance indices# Usage:       python sampler.py# Author:      CMS DEV TEAM# Company:     Aliyun Inc.# Version:     1.1#########################################import osimport os.pathimport sysimport timeimport operatorimport httplibimport loggingimport socketimport randomfrom shutil import copyfilefrom subprocess import Popen, PIPEfrom logging.handlers import RotatingFileHandlerlogger = NoneREMOTE_HOST = NoneREMOTE_PORT = NoneREMOTE_MONITOR_URI = NoneUUID = Nonedef get_mem_usage_percent():    try:        f = open('/proc/meminfo', 'r')        for line in f:            if line.startswith('MemTotal:'):                mem_total = int(line.split()[1])            elif line.startswith('MemFree:'):                mem_free = int(line.split()[1])            elif line.startswith('Buffers:'):                mem_buffer = int(line.split()[1])            elif line.startswith('Cached:'):                mem_cache = int(line.split()[1])            elif line.startswith('SwapTotal:'):                vmem_total = int(line.split()[1])            elif line.startswith('SwapFree:'):                vmem_free = int(line.split()[1])            else:                continue        f.close()    except:        return None    physical_percent = usage_percent(mem_total - (mem_free + mem_buffer + mem_cache), mem_total)    virtual_percent = 0    if vmem_total > 0:        virtual_percent = usage_percent((vmem_total - vmem_free), vmem_total)    return physical_percent, virtual_percentblack_list = ('iso9660',)def usage_percent(use, total):    try:        ret = (float(use) / total) * 100    except ZeroDivisionError:        raise Exception("ERROR - zero division error")    return retdef get_disk_partition():    return_list = []    pd = []    try:        f = open("/proc/filesystems", "r")        for line in f:            if not line.startswith("nodev"):                fs_type = line.strip()                if fs_type not in black_list:                    pd.append(fs_type)        f.close()        f = open('/etc/mtab', "r")        for line in f:            if line.startswith('none'):                continue            tmp = line.strip().split()            ft = tmp[2]            if ft not in pd:                continue            return_list.append(tmp[1])        f.close()    except:        return None    return return_listdef check_disk():    try:        return_dict = {}        p_list = get_disk_partition()        for i in p_list:            dt = os.statvfs(i)            use = (dt.f_blocks - dt.f_bfree) * dt.f_frsize            all = dt.f_blocks * dt.f_frsize            return_dict[i] = ('%.2f' % (usage_percent(use, all),), ('%.2f' % (all * 1.0 / (1024 * 1000000))))    except:        return None    return return_dict_CLOCK_TICKS = os.sysconf("SC_CLK_TCK")def get_cpu_time():    need_sleep = False    if not os.path.isfile('/tmp/cpu_stat') or os.path.getsize('/tmp/cpu_stat') == 0:        copyfile('/proc/stat', '/tmp/cpu_stat')        need_sleep = True    try:        f1 = open('/tmp/cpu_stat', 'r')        values1 = f1.readline().split()        total_time1 = 0        for i in values1[1:]:            total_time1 += int(i)        idle_time1 = int(values1[4])        iowait_time1 = int(values1[5])    finally:        f1.close()    if need_sleep:        time.sleep(1)    f2 = open('/proc/stat', 'r')    try:        values2 = f2.readline().split()        total_time2 = 0        for i in values2[1:]:            total_time2 += int(i)        idle_time2 = int(values2[4])        iowait_time2 = int(values2[5])    finally:        f2.close()    idle_time = idle_time2 - idle_time1    iowait_time = iowait_time2 - iowait_time1    total_time = total_time2 - total_time1    cpu_percentage = int(100.0 * (total_time - idle_time - iowait_time) / total_time)    # compensate logic    if total_time < 0 or idle_time < 0 or iowait_time < 0 or cpu_percentage < 0 or cpu_percentage > 100:        time.sleep(1)        f3 = open('/proc/stat', 'r')        try:            values3 = f3.readline().split()            total_time3 = 0            for i in values3[1:]:                total_time3 += int(i)            idle_time3 = int(values3[4])            iowait_time3 = int(values3[5])        finally:            f3.close()        idle_time = idle_time3 - idle_time2        iowait_time = iowait_time3 - iowait_time2        total_time = total_time3 - total_time2        cpu_percentage = int(100.0 * (total_time - idle_time - iowait_time) / total_time)    copyfile('/proc/stat', '/tmp/cpu_stat')    return cpu_percentagedef network_io_kbitps():    """Return network I/O statistics for every network interface    installed on the system as a dict of raw tuples.    """    f1 = open("/proc/net/dev", "r")    try:        lines1 = f1.readlines()    finally:        f1.close()    retdict1 = {}    for line1 in lines1[2:]:        colon1 = line1.find(':')        assert colon1 > 0, line1        name1 = line1[:colon1].strip()        fields1 = line1[colon1 + 1:].strip().split()        bytes_recv1 = float('%.4f' % (float(fields1[0]) * 0.0078125))        bytes_sent1 = float('%.4f' % (float(fields1[8]) * 0.0078125))        retdict1[name1] = (bytes_recv1, bytes_sent1)    time.sleep(1)    f2 = open("/proc/net/dev", "r")    try:        lines2 = f2.readlines()    finally:        f2.close()    retdict2 = {}    for line2 in lines2[2:]:        colon2 = line2.find(':')        assert colon2 > 0, line2        name2 = line2[:colon2].strip()        fields2 = line2[colon2 + 1:].strip().split()        bytes_recv2 = float('%.4f' % (float(fields2[0]) * 0.0078125))        bytes_sent2 = float('%.4f' % (float(fields2[8]) * 0.0078125))        retdict2[name2] = (bytes_recv2, bytes_sent2)    retdict = merge_with(retdict2, retdict1)    return retdictdef disk_io_Kbps():    iostat = Popen("iostat -d -k 1 2 | sed '/Device\|Linux\|^$/d' > /tmp/disk_io", shell=True, stdout=PIPE, stderr=PIPE)    iostat_error = iostat.communicate()[1].strip()    if iostat_error:        logger.error("iostat not exists, %s" % iostat_error)        return None    retdict = {}    exception = None     try:        try:            f = open('/tmp/disk_io', 'r')        except Exception, ex:            exception = ex            logger.error(exception)        if exception:            return None        lines = f.readlines()        for line in lines:            name, _, readkps, writekps, _, _, = line.split()            if name:                readkps = float(readkps)                writekps = float(writekps)                retdict[name] = (readkps, writekps)        return retdict    finally:        f.close()def merge_with(d1, d2, fn=lambda x, y: tuple(map(operator.sub, x, y))):    res = d1.copy() # "= dict(d1)" for lists of tuples    for key, val in d2.iteritems(): # ".. in d2" for lists of tuples        try:            res[key] = fn(res[key], val)        except KeyError:            res[key] = val    return resdef get_load():    try:        f = open('/proc/loadavg', 'r')        tmp = f.readline().split()        lavg_1 = float(tmp[0])        lavg_5 = float(tmp[1])        lavg_15 = float(tmp[2])        f.close()    except:        return None    return lavg_1, lavg_5, lavg_15def get_tcp_status():    check_cmd = "command -v ss"    check_proc = Popen(check_cmd, shell=True, stdout=PIPE)    ss = check_proc.communicate()[0].rstrip('\n')    if ss:        cmd = "ss -ant | awk '{if(NR != 1) print $1}' | awk '{state=$1;arr[state]++} END{for(i in arr){printf \"%s=%s \", i,arr[i]}}' | sed 's/-/_/g' | sed 's/ESTAB=/ESTABLISHED=/g' | sed 's/FIN_WAIT_/FIN_WAIT/g'"    else:        cmd = "netstat -anp | grep tcp | awk '{print $6}' | awk '{state=$1;arr[state]++} END{for(i in arr){printf \"%s=%s \", i,arr[i]}}' | tail -n 1"    tcp_proc = Popen(cmd, shell=True, stdout=PIPE)    tcp_status = tcp_proc.communicate()[0].rstrip('\n')    return tcp_statusdef get_proc_number():    cmd = "ps axu | wc -l | tail -n 1"    proc_func = Popen(cmd, shell=True, stdout=PIPE)    proc_number = proc_func.communicate()[0].rstrip('\n')    return proc_numberdef all_index():    return (        int(time.time() * 1000),        get_cpu_time(),        get_mem_usage_percent(),        check_disk(),        disk_io_Kbps(),        network_io_kbitps(),        get_load(),        get_tcp_status(),        get_proc_number()    )def collector():    timestamp, cpu, mem, disk, disk_io, net, load, tcp_status, process_number = all_index()    disk_utilization = ''    disk_io_read = ''    disk_io_write = ''    internet_networkrx = ''    internet_networktx = ''    tcp_status_count = ''    period_1 = ''    period_5 = ''    period_15 = ''    if UUID:        cpu_utilization = 'vm.CPUUtilization ' + str(timestamp) + ' ' + str(cpu) + ' ns=ACS/ECS unit=Percent instanceId=%s\n' % UUID        memory_utilization = 'vm.MemoryUtilization ' + str(timestamp) + ' ' + str(mem[0]) + ' ns=ACS/ECS unit=Percent instanceId=%s\n' % UUID        if load:            period_1 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[0]) + ' ns=ACS/ECS unit=count' + ' instanceId=%s period=1min\n' % UUID            period_5 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[1]) + ' ns=ACS/ECS unit=count' + ' instanceId=%s period=5min\n' % UUID            period_15 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[2]) + ' ns=ACS/ECS unit=count' + ' instanceId=%s period=15min\n' % UUID        if disk:            for name, value in disk.items():                disk_utilization = disk_utilization + 'vm.DiskUtilization ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Percent instanceId=%s mountpoint=%s\n' % (UUID, name)        if disk_io:            for name, value in disk_io.items():                disk_io_read = disk_io_read + 'vm.DiskIORead ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobytes/Second instanceId=%s diskname=%s\n' % (UUID, name)                disk_io_write = disk_io_write + 'vm.DiskIOWrite ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobytes/Second instanceId=%s diskname=%s\n' % (UUID, name)        for name, value in net.items():            internet_networkrx = internet_networkrx + 'vm.InternetNetworkRX ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobits/Second instanceId=%s netname=%s\n' % (UUID, name)            internet_networktx = internet_networktx + 'vm.InternetNetworkTX ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobits/Second instanceId=%s netname=%s\n' % (UUID, name)        if tcp_status:            status_count = tcp_status.split()            for element in status_count:                key_value = element.split('=')                tcp_status_count = tcp_status_count + 'vm.TcpCount ' + str(timestamp) + ' ' + key_value[1] + ' ns=ACS/ECS unit=Count instanceId=%s state=%s\n' % (UUID, key_value[0])        process_count = 'vm.ProcessCount ' + str(timestamp) + ' ' + process_number + ' ns=ACS/ECS unit=Count instanceId=%s\n' % UUID    else:        cpu_utilization = 'vm.CPUUtilization ' + str(timestamp) + ' ' + str(cpu) + ' ns=ACS/ECS unit=Percent\n'        memory_utilization = 'vm.MemoryUtilization ' + str(timestamp) + ' ' + str(mem[0]) + ' ns=ACS/ECS unit=Percent\n'        if load:            period_1 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[0]) + ' ns=ACS/ECS unit=count period=1min\n'            period_5 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[1]) + ' ns=ACS/ECS unit=count period=5min\n'            period_15 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[2]) + ' ns=ACS/ECS unit=count period=15min\n'        if disk:            for name, value in disk.items():                disk_utilization = disk_utilization + 'vm.DiskUtilization ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Percent mountpoint=%s\n' % name        if disk_io:            for name, value in disk_io.items():                disk_io_read = disk_io_read + 'vm.DiskIORead ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobytes/Second diskname=%s\n' % name                disk_io_write = disk_io_write + 'vm.DiskIOWrite ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobytes/Second diskname=%s\n' % name        for name, value in net.items():            internet_networkrx = internet_networkrx + 'vm.InternetNetworkRX ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobits/Second netname=%s\n' % name            internet_networktx = internet_networktx + 'vm.InternetNetworkTX ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobits/Second netname=%s\n' % name        if tcp_status:            status_count = tcp_status.split()            for element in status_count:                key_value = element.split('=')                tcp_status_count = tcp_status_count + 'vm.TcpCount ' + str(timestamp) + ' ' + key_value[1] + ' ns=ACS/ECS unit=Count state=%s\n' % key_value[0]        process_count = 'vm.ProcessCount ' + str(timestamp) + ' ' + process_number + ' ns=ACS/ECS unit=Count\n'    data_post = cpu_utilization + memory_utilization + period_1 + period_5 + period_15 + disk_utilization + disk_io_read + disk_io_write + internet_networkrx + internet_networktx + tcp_status_count + process_count    print data_post    interval = random.randint(0, 5000)    time.sleep(interval / 1000.0)    headers = {"Content-Type": "text/plain", "Accept": "text/plain"}    exception = None    http_client = None    try:        try:            http_client = httplib.HTTPConnection(REMOTE_HOST, REMOTE_PORT)            http_client.request(method="POST", url=REMOTE_MONITOR_URI, body=data_post, headers=headers)            response = http_client.getresponse()            if response.status == 200:                return            else:                logger.warn("response code %d" % response.status)                logger.warn("response code %s" % response.read())        except Exception, ex:            exception = ex    finally:        if http_client:            http_client.close()        if exception:            logger.error(exception)if __name__ == '__main__':    REMOTE_HOST = 'open.cms.aliyun.com'    REMOTE_PORT = 80    # get report address    if not os.path.isfile("../cmscfg"):        pass    else:        props = {}        prop_file = file("../cmscfg", 'r')        for line in prop_file.readlines():            kv = line.split('=')            props[kv[0].strip()] = kv[1].strip()        prop_file.close()        if props.get('report_domain'):            REMOTE_HOST = props.get('report_domain')        if props.get('report_port'):            REMOTE_PORT = props.get('report_port')    # get uuid    if not os.path.isfile("../aegis_quartz/conf/uuid"):        pass    else:        uuid_file = file("../aegis_quartz/conf/uuid", 'r')        UUID = uuid_file.readline()        UUID = UUID.lower()    REMOTE_MONITOR_URI = "/metrics/putLines"    MONITOR_DATA_FILE_DIR = "/tmp"    LOG_FILE = "/tmp/" + "vm.log"    LOG_LEVEL = logging.INFO    LOG_FILE_MAX_BYTES = 1024 * 1024    LOG_FILE_MAX_COUNT = 3    logger = logging.getLogger('sampler')    logger.setLevel(LOG_LEVEL)    handler = RotatingFileHandler(filename=LOG_FILE, mode='a', maxBytes=LOG_FILE_MAX_BYTES,                                  backupCount=LOG_FILE_MAX_COUNT)    formatter = logging.Formatter(fmt='%(asctime)s - %(levelname)s - %(message)s')    handler.setFormatter(formatter)    logger.addHandler(handler)    socket.setdefaulttimeout(10)    try:        collector()    except Exception, e:        logger.error(e)        sys.exit(1)

评论关闭