python3-----多进程、多线程、多协程

目前计算机程序一般会遇到两类I/O：硬盘I/O和网络I/O。我就针对网络I/O的场景分析下python3下进程、线程、协程效率的对比。进程采用multiprocessing.Pool进程池，线程是自己封装的进程池，协程采用gevent的库。用python3自带的urlllib.request和开源的requests做对比。代码如下：

import urllib.request

import requests

import time

import multiprocessing

import threading

import queue

def startTimer():

    return time.time()

def ticT(startTime):

    useTime = time.time() - startTime

    return round(useTime, 3)

#def tic(startTime, name):

#    useTime = time.time() - startTime

#    print('[%s] use time: %1.3f' % (name, useTime))

def download_urllib(url):

    req = urllib.request.Request(url,

            headers={'user-agent': 'Mozilla/5.0'})

    res = urllib.request.urlopen(req)

    data = res.read()

    try:

        data = data.decode('gbk')

    except UnicodeDecodeError:

        data = data.decode('utf8', 'ignore')

    return res.status, data

def download_requests(url):

    req = requests.get(url,

            headers={'user-agent': 'Mozilla/5.0'})

    return req.status_code, req.text

class threadPoolManager:

    def __init__(self,urls, workNum=10000,threadNum=20):

        self.workQueue=queue.Queue()

        self.threadPool=[]

        self.__initWorkQueue(urls)

        self.__initThreadPool(threadNum)

    def __initWorkQueue(self,urls):

        for i in urls:

            self.workQueue.put((download_requests,i))

    def __initThreadPool(self,threadNum):

        for i in range(threadNum):

            self.threadPool.append(work(self.workQueue))

    def waitAllComplete(self):

        for i in self.threadPool:

            if i.isAlive():

                i.join()

class work(threading.Thread):

    def __init__(self,workQueue):

        threading.Thread.__init__(self)

        self.workQueue=workQueue

        self.start()

    def run(self):

        while True:

            if self.workQueue.qsize():

                do,args=self.workQueue.get(block=False)

                do(args)

                self.workQueue.task_done()

            else:

                break

urls = ['http://www.ustchacker.com'] * 10

urllibL = []

requestsL = []

multiPool = []

threadPool = []

N = 20

PoolNum = 100

for i in range(N):

    print('start %d try' % i)

    urllibT = startTimer()

    jobs = [download_urllib(url) for url in urls]

    #for status, data in jobs:

    #    print(status, data[:10])

    #tic(urllibT, 'urllib.request')

    urllibL.append(ticT(urllibT))

    print('')

    requestsT = startTimer()

    jobs = [download_requests(url) for url in urls]

    #for status, data in jobs:

    #    print(status, data[:10])

    #tic(requestsT, 'requests')

    requestsL.append(ticT(requestsT))

    print('')

    requestsT = startTimer()

    pool = multiprocessing.Pool(PoolNum)

    data = pool.map(download_requests, urls)

    pool.close()

    pool.join()

    multiPool.append(ticT(requestsT))

    print('')

    requestsT = startTimer()

    pool = threadPoolManager(urls, threadNum=PoolNum)

    pool.waitAllComplete()

    threadPool.append(ticT(requestsT))

    print('')

import matplotlib.pyplot as plt

x = list(range(1, N+1))

plt.plot(x, urllibL, label='urllib')

plt.plot(x, requestsL, label='requests')

plt.plot(x, multiPool, label='requests MultiPool')

plt.plot(x, threadPool, label='requests threadPool')

plt.xlabel('test number')

plt.ylabel('time(s)')

plt.legend()

plt.show()

运行结果如下：

从上图可以看出，python3自带的urllib.request效率还是不如开源的requests，multiprocessing进程池效率明显提升，但还低于自己封装的线程池，有一部分原因是创建、调度进程的开销比创建线程高（测试程序中我把创建的代价也包括在里面）。

在Windows上要想使用进程模块，就必须把有关进程的代码写在当前.py文件的if __name__ == ‘__main__’ :语句的下面，才能正常使用Windows下的进程模块。Unix/Linux下则不需要。

下面是gevent的测试代码：

import urllib.request

import requests

import time

import gevent.pool

import gevent.monkey

gevent.monkey.patch_all()

def startTimer():

    return time.time()

def ticT(startTime):

    useTime = time.time() - startTime

    return round(useTime, 3)

#def tic(startTime, name):

#    useTime = time.time() - startTime

#    print('[%s] use time: %1.3f' % (name, useTime))

def download_urllib(url):

    req = urllib.request.Request(url,

            headers={'user-agent': 'Mozilla/5.0'})

    res = urllib.request.urlopen(req)

    data = res.read()

    try:

        data = data.decode('gbk')

    except UnicodeDecodeError:

        data = data.decode('utf8', 'ignore')

    return res.status, data

def download_requests(url):

    req = requests.get(url,

            headers={'user-agent': 'Mozilla/5.0'})

    return req.status_code, req.text

urls = ['http://www.ustchacker.com'] * 10

urllibL = []

requestsL = []

reqPool = []

reqSpawn = []

N = 20

PoolNum = 100

for i in range(N):

    print('start %d try' % i)

    urllibT = startTimer()

    jobs = [download_urllib(url) for url in urls]

    #for status, data in jobs:

    #    print(status, data[:10])

    #tic(urllibT, 'urllib.request')

    urllibL.append(ticT(urllibT))

    print('')

    requestsT = startTimer()

    jobs = [download_requests(url) for url in urls]

    #for status, data in jobs:

    #    print(status, data[:10])

    #tic(requestsT, 'requests')

    requestsL.append(ticT(requestsT))

    print('')

    requestsT = startTimer()

    pool = gevent.pool.Pool(PoolNum)

    data = pool.map(download_requests, urls)

    #for status, text in data:

    #    print(status, text[:10])

    #tic(requestsT, 'requests with gevent.pool')

    reqPool.append(ticT(requestsT))

    print('')

    requestsT = startTimer()

    jobs = [gevent.spawn(download_requests, url) for url in urls]

    gevent.joinall(jobs)

    #for i in jobs:

    #    print(i.value[0], i.value[1][:10])

    #tic(requestsT, 'requests with gevent.spawn')

    reqSpawn.append(ticT(requestsT))

    print('')

import matplotlib.pyplot as plt

x = list(range(1, N+1))

plt.plot(x, urllibL, label='urllib')

plt.plot(x, requestsL, label='requests')

plt.plot(x, reqPool, label='requests geventPool')

plt.plot(x, reqSpawn, label='requests Spawn')

plt.xlabel('test number')

plt.ylabel('time(s)')

plt.legend()

plt.show()

运行结果如下：

从上图可以看到，对于I/O密集型任务，gevent还是能对性能做很大提升的，由于协程的创建、调度开销都比线程小的多，所以可以看到不论使用gevent的Spawn模式还是Pool模式，性能差距不大。

因为在gevent中需要使用monkey补丁，会提高gevent的性能，但会影响multiprocessing的运行，如果要同时使用，需要如下代码：

gevent.monkey.patch_all(thread=False, socket=False, select=False)

可是这样就不能充分发挥gevent的优势，所以不能把multiprocessing Pool、threading Pool、gevent Pool在一个程序中对比。不过比较两图可以得出结论，线程池和gevent的性能最优的，其次是进程池。附带得出个结论，requests库比urllib.request库性能要好一些哈:-)

转载请注明：转自http://blog.csdn.net/littlethunder/article/details/40983031

python3-----多进程、多线程、多协程的更多相关文章

也说性能测试，顺便说python的多进程+多线程、协程
最近需要一个web系统进行接口性能测试,这里顺便说一下性能测试的步骤吧,大概如下一.分析接口频率根据系统的复杂程度,接口的数量有多有少,应该优先对那些频率高,数据库操作频繁的接口进行性能测试,所以 ...
python3多进程进程池协程并发
一.进程我们电脑的应用程序,都是进程,进程是资源分配的单位.进程切换需要的资源最大,效率低. 进程之间相互独立 cpu密集的时候适合用多进程 #多 ...
python基础整理5——多进程多线程和协程
进程与线程 1.进程我们电脑的应用程序,都是进程,假设我们用的电脑是单核的,cpu同时只能执行一个进程.当程序处于I/O阻塞的时候,CPU如果和程序一起等待,那就太浪费了,cpu会去执行其他的程序, ...
python 多进程，多线程，协程
在我们实际编码中,会遇到一些并行的任务,因为单个任务无法最大限度的使用计算机资源.使用并行任务,可以提高代码效率,最大限度的发挥计算机的性能.python实现并行任务可以有多进程,多线程,协程等方式. ...
深入浅析python中的多进程、多线程、协程
深入浅析python中的多进程.多线程.协程我们都知道计算机是由硬件和软件组成的.硬件中的CPU是计算机的核心,它承担计算机的所有任务. 操作系统是运行在硬件之上的软件,是计算机的管理者,它负责资源 ...
python3 - 多线程和协程速率测试对比
多线程和协程都属于IO密集型,我通过以下用例测试多线程和协程的实际速率对比. 实例:通过socket客户端以多线程并发模式请求不同服务器端(这里服务器端分2种写法:第一种服务器通过协程实现,第二种服务 ...
Python并发编程——多线程与协程
Pythpn并发编程--多线程与协程目录 Pythpn并发编程--多线程与协程 1. 进程与线程 1.1 概念上 1.2 多进程与多线程--同时执行多个任务 2. 并发和并行 3. Python多线 ...
python单线程，多线程和协程速度对比
在某些应用场景下,想要提高python的并发能力,可以使用多线程,或者协程.比如网络爬虫,数据库操作等一些IO密集型的操作.下面对比python单线程,多线程和协程在网络爬虫场景下的速度. 一,单线程 ...
Python多进程、多线程和协程简介
一.进程和线程进程是一个执行中的程序.每个进程都拥有自己的地址空间.内存.数据栈以及其他用于跟踪执行的辅助数据.在单核CPU系统中的多进程,内存中可以有许多程序,但在给定一个时刻只有一个程序在运行: ...
Python多进程、多线程、协程
转载:https://www.cnblogs.com/huangguifeng/p/7632799.html 首先我们来了解下python中的进程,线程以及协程! 从计算机硬件角度: 计算机的核心是C ...

随机推荐

Yum自动下载RPM包及其所有依赖的包
前几天我尝试去创建一个仅包含我们经常在 CentOS 7 下使用的软件的本地仓库.当然,我们可以使用 curl 或者 wget 下载任何软件包,然而这些命令并不能下载要求的依赖软件包.你必须去花一些时 ...
k倍区间前缀和【蓝桥杯2017 C/C++ B组】
标题: k倍区间给定一个长度为N的数列,A1, A2, ... AN,如果其中一段连续的子序列Ai, Ai+1, ... Aj(i <= j)之和是K的倍数,我们就称这个区间[i, j]是K倍 ...
在fedora23中安装virtualbox, 然后实现虚拟机irtualbox 或者 vmware 下的xp操作系统
参考: http://blog.csdn.net/statdm/article/details/7756788 参考: http://www.cnblogs.com/fengbohello/p/488 ...
POJ-1038 Bugs Integrated, Inc. （状压+滚动数组+深搜的动态规划）
本题的题眼很明显,N (1 <= N <= 150), M (1 <= M <= 10),摆明了是想让你用状态压缩dp. 整个思路如下:由于要填2*3或者3*2的芯片,那么就要 ...
(zhuan) Attention in Long Short-Term Memory Recurrent Neural Networks
Attention in Long Short-Term Memory Recurrent Neural Networks by Jason Brownlee on June 30, 2017 in ...
PredNet --- Deep Predictive coding networks for video prediction and unsupervised learning --- 论文笔记
PredNet --- Deep Predictive coding networks for video prediction and unsupervised learning ICLR 20 ...
.net core 问题：413 Request Entity Too Large nginx
https://stackoverflow.com/questions/38698350/increase-upload-file-size-in-asp-net-core The other ans ...
QQ帐户的申请与登陆-（字符串操作）
题目: 实现QQ新帐户申请和老帐户登陆的简化版功能.最大挑战是:据说现在的QQ号码已经有10位数了. 输入格式: 输入首先给出一个正整数N(≤10^5),随后给出N行指令.每行指令的格式为:“命令符( ...
浅谈 equals 和 == 的区别
在初学Java时,可能会经常碰到下面的代码: 1 String str1 = new String("hello"); 2 String str2 = new String(&qu ...
因样式冲突引起的div消失问题
工作需要,搭建一个网站的模型,简单分成三个部分,标题栏,导航栏,主界面,效果如图: 但是点击界面的任意地方,中间的div块消失了,如图所示: 调试,发现在点击界面其他地方的时候display属性有变化 ...

python3-----多进程、多线程、多协程

python3-----多进程、多线程、多协程的更多相关文章

随机推荐

热门专题