#coding:utf-8

'''

@author solq

by 2016-01-06

main(目录,正则过滤文件名)

执行完最后打印结果

'''

import json

import fnmatch

import os

import threading

from multiprocessing import cpu_count

from threadpool import *

def main(rootPath,pattern):

    for root, dirs, files in os.walk(rootPath):

        for filename in fnmatch.filter(files, pattern):

			f = os.path.join(root,filename)			#runTask(f)

			requests = makeRequests(runTask, [f],None)

			[pool.putRequest(req) for req in requests]

def runTask(name):

	file = open(str(name))

	for line in file.xreadlines():

		try:

			obj = json.loads(line)

			runCallBack(obj)

		except:

			pass

	file.close()

#充值数据

data1 ={};

#消费数据

data2 ={};

#非充值数据

data3 ={};

#充值来源

chargeReasons = {"a":0,"b":0};

#开启线程池

pool = ThreadPool(cpu_count())

#创建锁

mutex = threading.Lock()

def runCallBack(obj):

	try:

		if mutex.acquire() :

			#业务代码

	except:

		pass

	finally:

		mutex.release();

main("C:/Python27/a/","*Currency*")

pool.wait();

print(data1)

print(data2)

print(data3)

threadpool.py

# -*- coding: UTF-8 -*-

"""Easy to use object-oriented thread pool framework.

A thread pool is an object that maintains a pool of worker threads to perform

time consuming operations in parallel. It assigns jobs to the threads

by putting them in a work request queue, where they are picked up by the

next available thread. This then performs the requested operation in the

background and puts the results in another queue.

The thread pool object can then collect the results from all threads from

this queue as soon as they become available or after all threads have

finished their work. It's also possible, to define callbacks to handle

each result as it comes in.

The basic concept and some code was taken from the book "Python in a Nutshell,

2nd edition" by Alex Martelli, O'Reilly 2006, ISBN 0-596-10046-9, from section

14.5 "Threaded Program Architecture". I wrapped the main program logic in the

ThreadPool class, added the WorkRequest class and the callback system and

tweaked the code here and there. Kudos also to Florent Aide for the exception

handling mechanism.

Basic usage::

    >>> pool = ThreadPool(poolsize)

    >>> requests = makeRequests(some_callable, list_of_args, callback)

    >>> [pool.putRequest(req) for req in requests]

    >>> pool.wait()

See the end of the module code for a brief, annotated usage example.

Website : http://chrisarndt.de/projects/threadpool/

"""

__docformat__ = "restructuredtext en"

__all__ = [

    'makeRequests',

    'NoResultsPending',

    'NoWorkersAvailable',

    'ThreadPool',

    'WorkRequest',

    'WorkerThread'

]

__author__ = "Christopher Arndt"

__version__ = '1.3.2'

__license__ = "MIT license"

# standard library modules

import sys

import threading

import traceback

try:

    import Queue            # Python 2

except ImportError:

    import queue as Queue   # Python 3

# exceptions

class NoResultsPending(Exception):

    """All work requests have been processed."""

    pass

class NoWorkersAvailable(Exception):

    """No worker threads available to process remaining requests."""

    pass

# internal module helper functions

def _handle_thread_exception(request, exc_info):

    """Default exception handler callback function.

    This just prints the exception info via ``traceback.print_exception``.

    """

    traceback.print_exception(*exc_info)

# utility functions

def makeRequests(callable_, args_list, callback=None,

        exc_callback=_handle_thread_exception):

    """Create several work requests for same callable with different arguments.

    Convenience function for creating several work requests for the same

    callable where each invocation of the callable receives different values

    for its arguments.

    ``args_list`` contains the parameters for each invocation of callable.

    Each item in ``args_list`` should be either a 2-item tuple of the list of

    positional arguments and a dictionary of keyword arguments or a single,

    non-tuple argument.

    See docstring for ``WorkRequest`` for info on ``callback`` and

    ``exc_callback``.

    """

    requests = []

    for item in args_list:

        if isinstance(item, tuple):

            requests.append(

                WorkRequest(callable_, item[0], item[1], callback=callback,

                    exc_callback=exc_callback)

            )

        else:

            requests.append(

                WorkRequest(callable_, [item], None, callback=callback,

                    exc_callback=exc_callback)

            )

    return requests

# classes

class WorkerThread(threading.Thread):

    """Background thread connected to the requests/results queues.

    A worker thread sits in the background and picks up work requests from

    one queue and puts the results in another until it is dismissed.

    """

    def __init__(self, requests_queue, results_queue, poll_timeout=5, **kwds):

        """Set up thread in daemonic mode and start it immediatedly.

        ``requests_queue`` and ``results_queue`` are instances of

        ``Queue.Queue`` passed by the ``ThreadPool`` class when it creates a

        new worker thread.

        """

        threading.Thread.__init__(self, **kwds)

        self.setDaemon(1)

        self._requests_queue = requests_queue

        self._results_queue = results_queue

        self._poll_timeout = poll_timeout

        self._dismissed = threading.Event()

        self.start()

    def run(self):

        """Repeatedly process the job queue until told to exit."""

        while True:

            if self._dismissed.isSet():

                # we are dismissed, break out of loop

                break

            # get next work request. If we don't get a new request from the

            # queue after self._poll_timout seconds, we jump to the start of

            # the while loop again, to give the thread a chance to exit.

            try:

                request = self._requests_queue.get(True, self._poll_timeout)

            except Queue.Empty:

                continue

            else:

                if self._dismissed.isSet():

                    # we are dismissed, put back request in queue and exit loop

                    self._requests_queue.put(request)

                    break

                try:

                    result = request.callable(*request.args, **request.kwds)

                    self._results_queue.put((request, result))

                except:

                    request.exception = True

                    self._results_queue.put((request, sys.exc_info()))

    def dismiss(self):

        """Sets a flag to tell the thread to exit when done with current job.

        """

        self._dismissed.set()

class WorkRequest:

    """A request to execute a callable for putting in the request queue later.

    See the module function ``makeRequests`` for the common case

    where you want to build several ``WorkRequest`` objects for the same

    callable but with different arguments for each call.

    """

    def __init__(self, callable_, args=None, kwds=None, requestID=None,

            callback=None, exc_callback=_handle_thread_exception):

        """Create a work request for a callable and attach callbacks.

        A work request consists of the a callable to be executed by a

        worker thread, a list of positional arguments, a dictionary

        of keyword arguments.

        A ``callback`` function can be specified, that is called when the

        results of the request are picked up from the result queue. It must

        accept two anonymous arguments, the ``WorkRequest`` object and the

        results of the callable, in that order. If you want to pass additional

        information to the callback, just stick it on the request object.

        You can also give custom callback for when an exception occurs with

        the ``exc_callback`` keyword parameter. It should also accept two

        anonymous arguments, the ``WorkRequest`` and a tuple with the exception

        details as returned by ``sys.exc_info()``. The default implementation

        of this callback just prints the exception info via

        ``traceback.print_exception``. If you want no exception handler

        callback, just pass in ``None``.

        ``requestID``, if given, must be hashable since it is used by

        ``ThreadPool`` object to store the results of that work request in a

        dictionary. It defaults to the return value of ``id(self)``.

        """

        if requestID is None:

            self.requestID = id(self)

        else:

            try:

                self.requestID = hash(requestID)

            except TypeError:

                raise TypeError("requestID must be hashable.")

        self.exception = False

        self.callback = callback

        self.exc_callback = exc_callback

        self.callable = callable_

        self.args = args or []

        self.kwds = kwds or {}

    def __str__(self):

        return "<WorkRequest id=%s args=%r kwargs=%r exception=%s>" % \

            (self.requestID, self.args, self.kwds, self.exception)

class ThreadPool:

    """A thread pool, distributing work requests and collecting results.

    See the module docstring for more information.

    """

    def __init__(self, num_workers, q_size=0, resq_size=0, poll_timeout=5):

        """Set up the thread pool and start num_workers worker threads.

        ``num_workers`` is the number of worker threads to start initially.

        If ``q_size > 0`` the size of the work *request queue* is limited and

        the thread pool blocks when the queue is full and it tries to put

        more work requests in it (see ``putRequest`` method), unless you also

        use a positive ``timeout`` value for ``putRequest``.

        If ``resq_size > 0`` the size of the *results queue* is limited and the

        worker threads will block when the queue is full and they try to put

        new results in it.

        .. warning:

            If you set both ``q_size`` and ``resq_size`` to ``!= 0`` there is

            the possibilty of a deadlock, when the results queue is not pulled

            regularly and too many jobs are put in the work requests queue.

            To prevent this, always set ``timeout > 0`` when calling

            ``ThreadPool.putRequest()`` and catch ``Queue.Full`` exceptions.

        """

        self._requests_queue = Queue.Queue(q_size)

        self._results_queue = Queue.Queue(resq_size)

        self.workers = []

        self.dismissedWorkers = []

        self.workRequests = {}

        self.createWorkers(num_workers, poll_timeout)

    def createWorkers(self, num_workers, poll_timeout=5):

        """Add num_workers worker threads to the pool.

        ``poll_timout`` sets the interval in seconds (int or float) for how

        ofte threads should check whether they are dismissed, while waiting for

        requests.

        """

        for i in range(num_workers):

            self.workers.append(WorkerThread(self._requests_queue,

                self._results_queue, poll_timeout=poll_timeout))

    def dismissWorkers(self, num_workers, do_join=False):

        """Tell num_workers worker threads to quit after their current task."""

        dismiss_list = []

        for i in range(min(num_workers, len(self.workers))):

            worker = self.workers.pop()

            worker.dismiss()

            dismiss_list.append(worker)

        if do_join:

            for worker in dismiss_list:

                worker.join()

        else:

            self.dismissedWorkers.extend(dismiss_list)

    def joinAllDismissedWorkers(self):

        """Perform Thread.join() on all worker threads that have been dismissed.

        """

        for worker in self.dismissedWorkers:

            worker.join()

        self.dismissedWorkers = []

    def putRequest(self, request, block=True, timeout=None):

        """Put work request into work queue and save its id for later."""

        assert isinstance(request, WorkRequest)

        # don't reuse old work requests

        assert not getattr(request, 'exception', None)

        self._requests_queue.put(request, block, timeout)

        self.workRequests[request.requestID] = request

    def poll(self, block=False):

        """Process any new results in the queue."""

        while True:

            # still results pending?

            if not self.workRequests:

                raise NoResultsPending

            # are there still workers to process remaining requests?

            elif block and not self.workers:

                raise NoWorkersAvailable

            try:

                # get back next results

                request, result = self._results_queue.get(block=block)

                # has an exception occured?

                if request.exception and request.exc_callback:

                    request.exc_callback(request, result)

                # hand results to callback, if any

                if request.callback and not \

                       (request.exception and request.exc_callback):

                    request.callback(request, result)

                del self.workRequests[request.requestID]

            except Queue.Empty:

                break

    def wait(self):

        """Wait for results, blocking until all have arrived."""

        while 1:

            try:

                self.poll(True)

            except NoResultsPending:

                break

################

# USAGE EXAMPLE

################

if __name__ == '__main__':

    import random

    import time

    # the work the threads will have to do (rather trivial in our example)

    def do_something(data):

        time.sleep(random.randint(1,5))

        result = round(random.random() * data, 5)

        # just to show off, we throw an exception once in a while

        if result > 5:

            raise RuntimeError("Something extraordinary happened!")

        return result

    # this will be called each time a result is available

    def print_result(request, result):

        print("**** Result from request #%s: %r" % (request.requestID, result))

    # this will be called when an exception occurs within a thread

    # this example exception handler does little more than the default handler

    def handle_exception(request, exc_info):

        if not isinstance(exc_info, tuple):

            # Something is seriously wrong...

            print(request)

            print(exc_info)

            raise SystemExit

        print("**** Exception occured in request #%s: %s" % \

          (request.requestID, exc_info))

    # assemble the arguments for each job to a list...

    data = [random.randint(1,10) for i in range(20)]

    # ... and build a WorkRequest object for each item in data

    requests = makeRequests(do_something, data, print_result, handle_exception)

    # to use the default exception handler, uncomment next line and comment out

    # the preceding one.

    #requests = makeRequests(do_something, data, print_result)

    # or the other form of args_lists accepted by makeRequests: ((,), {})

    data = [((random.randint(1,10),), {}) for i in range(20)]

    requests.extend(

        makeRequests(do_something, data, print_result, handle_exception)

        #makeRequests(do_something, data, print_result)

        # to use the default exception handler, uncomment next line and comment

        # out the preceding one.

    )

    # we create a pool of 3 worker threads

    print("Creating thread pool with 3 worker threads.")

    main = ThreadPool(3)

    # then we put the work requests in the queue...

    for req in requests:

        main.putRequest(req)

        print("Work request #%s added." % req.requestID)

    # or shorter:

    # [main.putRequest(req) for req in requests]

    # ...and wait for the results to arrive in the result queue

    # by using ThreadPool.wait(). This would block until results for

    # all work requests have arrived:

    # main.wait()

    # instead we can poll for results while doing something else:

    i = 0

    while True:

        try:

            time.sleep(0.5)

            main.poll()

            print("Main thread working...")

            print("(active worker threads: %i)" % (threading.activeCount()-1, ))

            if i == 10:

                print("**** Adding 3 more worker threads...")

                main.createWorkers(3)

            if i == 20:

                print("**** Dismissing 2 worker threads...")

                main.dismissWorkers(2)

            i += 1

        except KeyboardInterrupt:

            print("**** Interrupted!")

            break

        except NoResultsPending:

            print("**** No pending results.")

            break

    if main.dismissedWorkers:

        print("Joining all dismissed worker threads...")

        main.joinAllDismissedWorkers()

python 大数据分析的更多相关文章

金三银四科学找工作，用python大数据分析一线城市1000多份岗位招聘需求
文章每周持续更新,各位的「三连」是对我最大的肯定.可以微信搜索公众号「后端技术学堂」第一时间阅读(一般比博客早更新一到两篇) 每年的三四月份是招聘高峰,也常被大家称为金三银四黄金招聘期,这时候上一 ...
python大数据工作流程
本文作者:hhh5460 大数据分析,内存不够用怎么办? 当然,你可以升级你的电脑为超级电脑. 另外,你也可以采用硬盘操作. 本文示范了硬盘操作的一种可能的方式. 本文基于:win10(64) + p ...
2 python大数据挖掘系列之淘宝商城数据预处理实战
preface 在上一章节我们聊了python大数据分析的基本模块,下面就说说2个项目吧,第一个是进行淘宝商品数据的挖掘,第二个是进行文本相似度匹配.好了,废话不多说,赶紧上车. 淘宝商品数据挖掘数 ...
记2019年目标之一没有996的大数据分析BI实战历程
本文略长,阅读大约需要10分钟. 懵懵懂懂的学习了python,然后一发不可收拾的爱上了python大数据分析,慢慢的走进了大数据的学堂,学习如何大数据挖掘,大数据分析,到BI系统建设使用. 大数据的 ...
Python金融大数据分析PDF
Python金融大数据分析(高清版)PDF 百度网盘链接:https://pan.baidu.com/s/1CF2NhbgpMroLhW2sTm7IJQ 提取码:clmt 复制这段内容后打开百度网盘 ...
向大家介绍我的新书：《基于股票大数据分析的Python入门实战》
我在公司里做了一段时间Python数据分析和机器学习的工作后,就尝试着写一本Python数据分析方面的书.正好去年有段时间股票题材比较火,就在清华出版社夏老师指导下构思了这本书.在这段特殊时期内,夏老 ...
《Python金融大数据分析》高清PDF版|百度网盘免费下载|Python数据分析
<Python金融大数据分析>高清PDF版|百度网盘免费下载|Python数据分析提取码:mfku 内容简介唯一一本详细讲解使用Python分析处理金融大数据的专业图书:金融应用开发领 ...
python金融大数据分析PDF高清完整版免费下载|百度云盘|Python基础教程免费电子书
点击获取提取码:7k4b 内容简介唯一一本详细讲解使用Python分析处理金融大数据的专业图书:金融应用开发领域从业人员必读. Python凭借其简单.易读.可扩展性以及拥有巨大而活跃的科学计算社区 ...
基于股票大数据分析的Python入门实战（视频教学版）的精彩插图汇总
在我写的这本书,<基于股票大数据分析的Python入门实战(视频教学版)>里,用能吸引人的股票案例,带领大家入门Python的语法,数据分析和机器学习. 京东链接是这个:https://i ...

随机推荐

JQuery文本框水印插件的简单实现
采用JQuery实现文本框的水印效果非常容易,效果如下: 代码片段,定义要应用水印效果的文本框的样式: .watermark { color: #cccccc; } 将JavaScript代码封装成J ...
狗日的js的闭包
一.变量的作用域要懂得闭包,起首必须懂得Javascript特别的变量作用域. 变量的作用域无非就是两种:全局变量和局部变量. Javascript说话的特别之处,就在于函数内部可以直接读取全局变量 ...
Leetcode 13 Roman to Integer 字符串处理+STL
题意:将罗马数字1到3999转化成自然数字,这里用了STL库map将罗马字符映射到自然数字. I,V,X,L,C,D,M -> 1,5,10,50,100,500,1000 m[s[i]]< ...
详解eNSP下的PPP之MP、PAP/CHAP认证实验配置
一.PPP MP实验(用虚拟模板配置) 1.拓扑图
[原创]推荐一款强大的.NET程序内存分析工具.NET Memory Profiler
[原创]推荐一款强大的.NET程序内存分析工具.NET Memory Profiler 1 官方网站:http://memprofiler.com/2 下载地址:http://memprofiler. ...
阿里云产品介绍（三）：云数据库RDS
写完云服务器ECS,本来想先写负载均衡的. 因为发现很多客户,都是直接将单台云服务器应用对外提供访问,如果云服务器宕机,应用就会停止服务.云服务器标称有99.95%的可用率,一年下来宕机四个多小时也是 ...
Revit中如何自定义快捷键
最佳的绘图方式是左手键盘,右手鼠标,使用快捷键将大大提高绘图效率,Revit同样提供了自定义绘图工具快捷键的功能(Revit2011及以后版本),有两种方式调出自定义快捷键窗口,第一种是Revit窗口 ...
通过apt-get安装nvidia驱动
标签:NVIDIA Driver apt 早前安装的NVIDIA显卡驱动在启动X Server的时候提示版本太新了,要求必须使用340.96的,而新的驱动都到了367 https://wiki.deb ...
VMware Workstation安装RedHat Linux 9
RedHatLinux是目前世界上使用最多的Linux操作系统.因为它具备最好的图形界面无论是安装.配置还是使用都十分方便.下面我将介绍使用VMware Workstation安装RedHat Lin ...
Oracle Goldengate REPLICAT启动时报正在运行解决办法
stop replicate时报ERROR: opening port for REPLICAT MYREP (TCP/IP error: Connection refused). start rep ...

python 大数据分析

threadpool.py

python 大数据分析的更多相关文章

随机推荐

热门专题