python线程池（转）

ThreadPool:

#! /usr/bin/env python

# -*- coding: utf-8 -*-

import threadpool

import time

def sayhello (a):

    print("hello: "+a)

    time.sleep(2)

def main():

    global result

    seed=["a","b","c"]

    start=time.time()

    task_pool=threadpool.ThreadPool(5)

    requests=threadpool.makeRequests(sayhello,seed)

    for req in requests:

        task_pool.putRequest(req)

    task_pool.wait()

    end=time.time()

    time_m = end-start

    print("time: "+str(time_m))

    start1=time.time()

    for each in seed:

        sayhello(each)

    end1=time.time()

    print("time1: "+str(end1-start1))

if __name__ == '__main__':

    main()

Futures:

#! /usr/bin/env python

# -*- coding: utf-8 -*-

from concurrent.futures import ThreadPoolExecutor

import time

def sayhello(a):

    print("hello: "+a)

    time.sleep(1)

def main():

    seed=["a","b","c","d","e","f"]

    start1=time.time()

    for each in seed:

        sayhello(each)

    end1=time.time()

    print("time1: "+str(end1-start1))

    start2=time.time()

    with ThreadPoolExecutor(6) as executor:

        for each in seed:

            executor.submit(sayhello,each)

    end2=time.time()

    print("time2: "+str(end2-start2))

    start3=time.time()

    with ThreadPoolExecutor(6) as executor1:

        executor1.map(sayhello,seed)

    end3=time.time()

    print("time3: "+str(end3-start3))

if __name__ == '__main__':

    main()

重构 (在线程池运行时向里面添加新任务)

#! /usr/bin/env python

# -*- coding: utf-8 -*-

import threading

import Queue

import hashlib

import logging

from utils.progress import PrintProgress

from utils.save import SaveToSqlite

class ThreadPool(object):

    def __init__(self, thread_num, args):

        self.args = args

        self.work_queue = Queue.Queue()

        self.save_queue = Queue.Queue()

        self.threads = []

        self.running = 0

        self.failure = 0

        self.success = 0

        self.tasks = {}

        self.thread_name = threading.current_thread().getName()

        self.__init_thread_pool(thread_num)

    # 线程池初始化

    def __init_thread_pool(self, thread_num):

        # 下载线程

        for i in range(thread_num):

            self.threads.append(WorkThread(self))

        # 打印进度信息线程

        self.threads.append(PrintProgress(self))

        # 保存线程

        self.threads.append(SaveToSqlite(self, self.args.dbfile))

    # 添加下载任务

    def add_task(self, func, url, deep):

        # 记录任务，判断是否已经下载过

        url_hash = hashlib.new('md5', url.encode("utf8")).hexdigest()

        if not url_hash in self.tasks:

            self.tasks[url_hash] = url

            self.work_queue.put((func, url, deep))

            logging.info("{0} add task {1}".format(self.thread_name, url.encode("utf8")))

    # 获取下载任务

    def get_task(self):

        # 从队列里取元素，如果block=True,则一直阻塞到有可用元素为止。

        task = self.work_queue.get(block=False)

        return task

    def task_done(self):

        # 表示队列中的某个元素已经执行完毕。

        self.work_queue.task_done()

    # 开始任务

    def start_task(self):

        for item in self.threads:

            item.start()

        logging.debug("Work start")

    def increase_success(self):

        self.success += 1

    def increase_failure(self):

        self.failure += 1

    def increase_running(self):

        self.running += 1

    def decrease_running(self):

        self.running -= 1

    def get_running(self):

        return self.running

    # 打印执行信息

    def get_progress_info(self):

        progress_info = {}

        progress_info['work_queue_number'] = self.work_queue.qsize()

        progress_info['tasks_number'] = len(self.tasks)

        progress_info['save_queue_number'] = self.save_queue.qsize()

        progress_info['success'] = self.success

        progress_info['failure'] = self.failure

        return progress_info

    def add_save_task(self, url, html):

        self.save_queue.put((url, html))

    def get_save_task(self):

        save_task = self.save_queue.get(block=False)

        return save_task

    def wait_all_complete(self):

        for item in self.threads:

            if item.isAlive():

                # join函数的意义，只有当前执行join函数的线程结束，程序才能接着执行下去

                item.join()

# WorkThread 继承自threading.Thread

class WorkThread(threading.Thread):

    # 这里的thread_pool就是上面的ThreadPool类

    def __init__(self, thread_pool):

        threading.Thread.__init__(self)

        self.thread_pool = thread_pool

    #定义线程功能方法，即，当thread_1，...，thread_n，调用start（）之后，执行的操作。

    def run(self):

        print (threading.current_thread().getName())

        while True:

            try:

                # get_task()获取从工作队列里获取当前正在下载的线程，格式为func,url,deep

                do, url, deep = self.thread_pool.get_task()

                self.thread_pool.increase_running()

                # 判断deep，是否获取新的链接

                flag_get_new_link = True

                if deep >= self.thread_pool.args.deep:

                    flag_get_new_link = False

                # 此处do为工作队列传过来的func，返回值为一个页面内容和这个页面上所有的新链接

                html, new_link = do(url, self.thread_pool.args, flag_get_new_link)

                if html == '':

                    self.thread_pool.increase_failure()

                else:

                    self.thread_pool.increase_success()

                    # html添加到待保存队列

                    self.thread_pool.add_save_task(url, html)

                # 添加新任务，即，将新页面上的不重复的链接加入工作队列。

                if new_link:

                    for url in new_link:

                        self.thread_pool.add_task(do, url, deep + 1)

                self.thread_pool.decrease_running()

                # self.thread_pool.task_done()

            except Queue.Empty:

                if self.thread_pool.get_running() <= 0:

                    break

            except Exception, e:

                self.thread_pool.decrease_running()

                # print str(e)

                break

python线程池（转）的更多相关文章

自定义高级版python线程池
基于简单版创建类对象过多,现自定义高级版python线程池,代码如下 #高级线程池 import queue import threading import time StopEvent = obje ...
对Python线程池
本文对Python线程池进行详细说明介绍,IDE选择及编码的解决方案进行了一番详细的描述,实为Python初学者必读的Python学习经验心得. AD: 干货来了,不要等!WOT2015 北京站演讲P ...
Python 线程池(小节)
Python 线程池(小节) from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor import os,time, ...
python线程池ThreadPoolExecutor（上）（38）
在前面的文章中我们已经介绍了很多关于python线程相关的知识点,比如线程互斥锁Lock / 线程事件Event / 线程条件变量Condition 等等,而今天给大家讲解的是线程池ThreadP ...
python线程池及其原理和使用
python线程池及其原理和使用 2019-05-29 17:05:20 whatday 阅读数 576 系统启动一个新线程的成本是比较高的,因为它涉及与操作系统的交互.在这种情形下,使用线程池可以很 ...
python线程池示例
使用with方式创建线程池,任务执行完毕之后,会自动关闭资源 , 否则就需要手动关闭线程池资源 import threading, time from concurrent.futures impo ...
Python线程池与进程池
Python线程池与进程池前言前面我们已经将线程并发编程与进程并行编程全部摸了个透,其实我第一次学习他们的时候感觉非常困难甚至是吃力.因为概念实在是太多了,各种锁,数据共享同步,各种方法等等让人十 ...
python线程池实现
python 的线程池主要有threadpool,不过它并不是内置的库,每次使用都需要安装,而且使用起来也不是那么好用,所以自己写了一个线程池实现,每次需要使用直接import即可.其中还可以根据传入 ...
《转》python线程池
线程池的概念是什么? 在IBM文档库中这样的一段描写:“在面向对象编程中,创建和销毁对象是很费时间的,因为创建一个对象要获取内存资源或者其它更多资源.在Java中更是如此,虚拟机将试图跟踪每一个对象 ...
一个简单的python线程池框架
初学python,实现了一个简单的线程池框架,线程池中除Wokers(工作线程)外,还单独创建了一个日志线程,用于日志的输出.线程间采用Queue方式进行通信. 代码如下:(不足之处,还请高手指正) ...

随机推荐

vue 关于vuex
1.引入vue和vuex import Vue from 'vue'import Vuex from 'vuex'Vue.use(Vu ...
.NET Core WebAPI IIS 部署问题
虽然建了 .NET Core 的项目,基本的一些功能也实现了,运行什么的也没有问题,但是一直没有直接发布. 今天就进行了发布测试,结果问题还是来了,只是你不去做自然就不会出现. 一.基本发布 1.先是 ...
go常量的定义和枚举类型
const a,b int = 1,2 const a,b = 1,2 const ( a = "hello" b,c =3,4 ) 常量数值可作为各种类型使用枚举类型的 ...
微信小程序导入Vant报错
作者:如也_d1c0链接:https://www.jianshu.com/p/0d2332984f8c来源:简书简书著作权归作者所有,任何形式的转载都请联系作者获得授权并注明出处. 先放出来Vant ...
2019-2020-1 20199312《Linux内核原理与分析》第三周作业
计算机的三大法宝:程序存储计算机.函数调用.中断堆栈的作用:记录函数调用框架.传递函数参数.保存返回值地址.提供函数内部局部便量的存储空间. 堆栈相关的寄存器 ESP:堆栈指针,指向堆栈栈顶 EBP ...
codeforces#571Div2 D---Vus the Cossack and Numbers【贪心】
题目:http://codeforces.com/contest/1186/problem/D 题意:给定一个大小为$n$的浮点序列,这$n$个数的和为0. 现在对这个序列中的每个数,进行向上取整或向 ...
根据参数显示类别（三级联动，需要JSON数据）
根据参数显示类别(三级联动,需要JSON数据) Scripts/Category.js 调用方法: $(function () { BindCategory(); //默认绑定文本框中的值 BindC ...
jquery统计输入文字的个数并对其进行判断
<textarea placeholder="该产品满足你的期待吗?说说你的使用心得,分享给同样看中的他们吧"></textarea> <span ...
Vue 定义全局变量
main.js 中定义 import Ws from './lib/ws' import ElementUI from 'element-ui'; import GlobalFunc from './ ...
Airtest真机链接（一）
确认ADB是否能够正常连接到手机 windows系统下: 用USB线连好手机后,进入AirtestIDE文件夹,在 AirtestIDE_2019-05-09_py3_win64/airtest/co ...

python线程池（转）

python线程池（转）的更多相关文章

随机推荐

热门专题