#!/usr/bin/env python2
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys, os, subprocess
from terminalsize import get_terminal_size
from time import time, sleep
import re
import fnmatch

def load_colors():
    color_script_fn = os.path.join(os.path.dirname(__file__), "color.enabled.sh")
    with open(color_script_fn) as f:
        return dict([(k,v.split("'")[1].replace('\e[', "\033[")) for k,v in [x.strip().split('=') for x in f.readlines() if x.strip() and not x.strip().startswith('#')]])

Color=load_colors()
if int(os.environ.get("HIBENCH_PRINTFULLLOG", 0)):
    Color['ret'] = os.linesep
else:
    Color['ret']='\r'

tab_matcher = re.compile("\t")
tabstop = 8
def replace_tab_to_space(s):
    def tab_replacer(match):
        pos = match.start()
        length = pos % tabstop
        if not length: length += tabstop
        return " " * length
    return tab_matcher.sub(tab_replacer, s)

class _Matcher:
    hadoop = re.compile(r"^.*map\s*=\s*(\d+)%,\s*reduce\s*=\s*(\d+)%.*$")
    hadoop2 = re.compile(r"^.*map\s+\s*(\d+)%\s+reduce\s+\s*(\d+)%.*$")
    spark = re.compile(r"^.*finished task \S+ in stage \S+ \(tid \S+\) in.*on.*\((\d+)/(\d+)\)\s*$")
    def match(self, line):
        for p in [self.hadoop, self.hadoop2]:
            m = p.match(line)
            if m:
                return (float(m.groups()[0]) + float(m.groups()[1]))/2

        for p in [self.spark]:
            m = p.match(line)
            if m:
                return float(m.groups()[0]) / float(m.groups()[1]) * 100

matcher = _Matcher()

def show_with_progress_bar(line, progress, line_width):
    """
    Show text with progress bar.

    @progress:0-100
    @line: text to show
    @line_width: width of screen
    """
    pos = int(line_width * progress / 100)
    if len(line) < line_width:
        line = line + " " * (line_width - len(line))
    line = "{On_Yellow}{line_seg1}{On_Blue}{line_seg2}{Color_Off}{ret}".format(
        line_seg1 = line[:pos], line_seg2 = line[pos:], **Color)
    sys.stdout.write(line)

def execute(workload_result_file, command_lines):
    proc = subprocess.Popen(" ".join(command_lines), shell=True, bufsize=1, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    count = 100
    last_time=0
    log_file = open(workload_result_file, 'w')
    # see http://stackoverflow.com/a/4417735/1442961
    lines_iterator = iter(proc.stdout.readline, b"")
    for line in lines_iterator:
        count += 1
        if count > 100 or time()-last_time>1: # refresh terminal size for 100 lines or each seconds
            count, last_time = 0, time()
            width, height = get_terminal_size()
            width -= 1

        try:
            line = line.rstrip()
            log_file.write(line+"\n")
            log_file.flush()
        except KeyboardInterrupt:
            proc.terminate()
            break
        line = line.decode('utf-8')
        line = replace_tab_to_space(line)
        #print "{Red}log=>{Color_Off}".format(**Color), line
        lline = line.lower()

        def table_not_found_in_log(line):
            table_not_found_pattern = "*Table * not found*"
            regex = fnmatch.translate(table_not_found_pattern)
            reobj = re.compile(regex)
            if reobj.match(line):
                return True
            else:
                return False

        def database_default_exist_in_log(line):
            database_default_already_exist = "Database default already exists"
            if database_default_already_exist in line:
                return True
            else:
                return False

        def uri_with_key_not_found_in_log(line):
            uri_with_key_not_found = "Could not find uri with key [dfs.encryption.key.provider.uri]"
            if uri_with_key_not_found in line:
                return True
            else:
                return False

        if ('error' in lline) and lline.lstrip() == lline:
            #Bypass hive 'error's and KeyProviderCache error
            bypass_error_condition = table_not_found_in_log or database_default_exist_in_log(lline) or uri_with_key_not_found_in_log(lline)
            if not bypass_error_condition:
                COLOR = "Red"
                sys.stdout.write((u"{%s}{line}{Color_Off}{ClearEnd}\n" % COLOR).format(line=line,**Color).encode('utf-8'))

        else:
            if len(line) >= width:
                line = line[:width-4]+'...'
            progress = matcher.match(lline)
            if progress is not None:
                show_with_progress_bar(line, progress, width)
            else:
                sys.stdout.write(u"{line}{ClearEnd}{ret}".format(line=line, **Color).encode('utf-8'))
        sys.stdout.flush()
    print
    log_file.close()
    try:
        proc.wait()
    except KeyboardInterrupt:
        proc.kill()
        return 1
    return proc.returncode

def test_progress_bar():
    for i in range(101):
        show_with_progress_bar("test progress : %d" % i, i, 80)
        sys.stdout.flush()

        sleep(0.05)

if __name__=="__main__":
    sys.exit(execute(workload_result_file=sys.argv[1],
                     command_lines=sys.argv[2:]))
#    test_progress_bar()

HiBench成长笔记——(10) 分析源码execute_with_log.py的更多相关文章

  1. HiBench成长笔记——(9) 分析源码monitor.py

    monitor.py 是主监控程序,将监控数据写入日志,并统计监控数据生成HTML统计展示页面: #!/usr/bin/env python2 # Licensed to the Apache Sof ...

  2. HiBench成长笔记——(8) 分析源码workload_functions.sh

    workload_functions.sh 是测试程序的入口,粘连了监控程序 monitor.py 和 主运行程序: #!/bin/bash # Licensed to the Apache Soft ...

  3. HiBench成长笔记——(11) 分析源码run.sh

    #!/bin/bash # Licensed to the Apache Software Foundation (ASF) under one or more # contributor licen ...

  4. HiBench成长笔记——(5) HiBench-Spark-SQL-Scan源码分析

    run.sh #!/bin/bash # Licensed to the Apache Software Foundation (ASF) under one or more # contributo ...

  5. Hadoop学习笔记(10) ——搭建源码学习环境

    Hadoop学习笔记(10) ——搭建源码学习环境 上一章中,我们对整个hadoop的目录及源码目录有了一个初步的了解,接下来计划深入学习一下这头神象作品了.但是看代码用什么,难不成gedit?,单步 ...

  6. CentOS 7运维管理笔记(10)----MySQL源码安装

    MySQL可以支持多种平台,如Windows,UNIX,FreeBSD或其他Linux系统.本篇随笔记录在CentOS 7 上使用源码安装MySQL的过程. 1.下载源码 选择使用北理工的镜像文件: ...

  7. memcached学习笔记——存储命令源码分析下篇

    上一篇回顾:<memcached学习笔记——存储命令源码分析上篇>通过分析memcached的存储命令源码的过程,了解了memcached如何解析文本命令和mencached的内存管理机制 ...

  8. memcached学习笔记——存储命令源码分析上篇

    原创文章,转载请标明,谢谢. 上一篇分析过memcached的连接模型,了解memcached是如何高效处理客户端连接,这一篇分析memcached源码中的process_update_command ...

  9. kernel 3.10内核源码分析--hung task机制

    kernel 3.10内核源码分析--hung task机制 一.相关知识: 长期以来,处于D状态(TASK_UNINTERRUPTIBLE状态)的进程 都是让人比较烦恼的问题,处于D状态的进程不能接 ...

随机推荐

  1. 「USACO5.5」矩形周长Picture

    题目描述 墙上贴着许多形状相同的海报.照片.它们的边都是水平和垂直的.每个矩形图片可能部分或全部的覆盖了其他图片.所有矩形合并后的边长称为周长. 编写一个程序计算周长. 如图1所示7个矩形. 如图2所 ...

  2. Spring Boot 2 实战:如何自定义 Servlet Filter

    1.前言 有些时候我们需要在 Spring Boot Servlet Web 应用中声明一些自定义的 Servlet Filter 来处理一些逻辑.比如简单的权限系统.请求头过滤.防止 XSS 攻击等 ...

  3. springboot搭建的web数据提交乱码

    修改:application.yml或者application.properties文件 将 url: jdbc:mysql://127.0.0.1:3306/shiro 修改为: url: jdbc ...

  4. centos 7中添加一个新用户并授权的步骤详解

    1.创建新用户: 创建一个用户名为:zhangbiao adduser zhangbiao 为这个用户初始化密码,linux会判断密码复杂度,不过可以强行忽略: passwd zhangbiao  更 ...

  5. MongoDB的分片数据库命令总结

    sh._adminCommand 在admin数据库运行database command ,就像db.runCommand() ,不过可以保证只在 mongos 上运行. sh._checkFullN ...

  6. luogu P2280 激光炸弹(二维前缀和)

    由题给的xi, yi范围,可以建立二维地图maze[i][j],记录i j范围上的所有目标的价值总和 即有maze[xi][yi] += wi 然后接下来的目标就是求出该二维数组的前缀和 可得到前缀和 ...

  7. 夯实Java基础系列目录

    自进入大学以来,学习的编程语言从最初的C语言.C++,到后来的Java,. NET.而在学习编程语言的同时也逐渐决定了以后自己要学习的是哪一门语言(Java).到现在为止,学习Java语言也有很长一段 ...

  8. Python学习第十三课——re(正则表达式)模块

    .的用法 import re s = 'fhsdjakaxdsancjh' # .代表一个元素,完成模糊匹配 res = re.findall("a..x", s) # 找到s中以 ...

  9. C++面试常见问题——04链表的逆序与合并

    链表的逆序与合并 链表的逆序 已知一个链表的头指针为head,将该链表逆序. #include<iostream> using namespace std; struct Node{ in ...

  10. Eclipse中java代码注释变成乱码的问题

    今天在查看曾经写过的代码时发生了一件很是让人头疼的事: 我写的所有注释全部都变成了了乱码,曾经刚入门时也是经常遇到类似的问题,解决起来很快,每天可能都会在工作空间里看到,但是随着时间的推移,写代码的规 ...