第十章：Python高级编程-多线程、多进程和线程池编程

第十章：Python高级编程-多线程、多进程和线程池编程

10.1 Python中的GIL

"""

gil global interpreter lock (cpython)

Python中一个线程对应于C语言中的一个线程

gil是的同一时刻只有一个线程在一个cpu上执行字节码

"""

# GIL会根据执行的字节码行数以及时间片释放， GIL遇到IO操作的时候会主动释放

import dis

def add(a):

    a = a + 1

    return a

print(dis.dis(add))

# ================ demo start =====================

total = 0

def add():

    global total

    for i in range(1000000):

        total -= 1

def desc():

    global total

    for i in range(10000000):

        total -= 1

import threading

thread1 = threading.Thread(target=add)

thread2 = threading.Thread(target=desc)

thread1.start()

thread2.start()

thread1.join()

thread2.join()

print(total)  # GIL是会释放的

10.2 多线程编程-threading

# 对应IO操作来说，多线程和多进程性能差别不大

# 1.通过Thread类实例化

import time

import threading

def get_detail_html(url):

    print("get detail html started")

    time.sleep(2)

    print("get detail html end")

def get_detail_url(url):

    print("get detail url started")

    time.sleep(4)

    print("get detail url end")

#2. 通过集成Thread来实现多线程

class GetDetailHtml(threading.Thread):

    def __init__(self, name):

        super().__init__(name=name)

    def run(self):

        print("get detail html started")

        time.sleep(2)

        print("get detail html end")

class GetDetailUrl(threading.Thread):

    def __init__(self, name):

        super().__init__(name=name)

    def run(self):

        print("get detail url started")

        time.sleep(4)

        print("get detail url end")

if  __name__ == "__main__":

    thread1 = GetDetailHtml("get_detail_html")

    thread2 = GetDetailUrl("get_detail_url")

    start_time = time.time()

    thread1.start()

    thread2.start()

   	# thread1.setDaemon(True)  # 设置为守护线程，主线程结束其立刻结束

    # thread2.setDaemon(True)

    thread1.join()

    thread2.join()

    #当主线程退出的时候， 子线程kill掉

    print ("last time: {}".format(time.time()-start_time))

10.3 线程间通信-共享变量和Queue

"""

1. 线程通信方式-共享变量

"""

import threading

DETAIL_URL_LIST = []

def get_detail_html():

    # 爬取文章详情页

    global DETAIL_URL_LIST

    print("get detail html started")

    url = DEATIL_URL_LIST.pop()

    time.sleep(2)

    print("get detail html end")

def get_detail_url():

    # 爬取文章列表页

    global DETAIL_URL_LIST

    print("get detail url started")

    time.sleep(4)

    for i in range(20):

        DETAIL_URL_LIST.append("http://projectsedu.com/{id}".format(id=i))

    print("get detail url end")

if __name__ == "__main__":

    thread_detail_url = threading.Thread(target=get_detail_url)

    for i in range(10):

        html_thread = threading.Thread(target=get_detail_html)

        html_thread.start()

# =====================================================================

# 通过queue的方式进行线程间同步

from queue import Queue

import time

import threading

def get_detail_html(queue):

    # 爬取文章详情页

    while True:

        url = queue.get()  # 线程安全的，取不到阻塞

        # for url in detail_url_list:

        print("get detail html started")

        time.sleep(2)

        print("get detail html end")

def get_detail_url(queue):

    # 爬取文章列表页

    while True:

        print("get detail url started")

        time.sleep(4)

        for i in range(20):

            queue.put("http://projectsedu.com/{id}".format(id=i))

        print("get detail url end")

# 1. 线程通信方式- 共享变量

if  __name__ == "__main__":

    detail_url_queue = Queue(maxsize=1000)

    thread_detail_url = threading.Thread(target=get_detail_url, args=(detail_url_queue,))

    for i in range(10):

        html_thread = threading.Thread(target=get_detail_html, args=(detail_url_queue,))

        html_thread.start()

    # # thread2 = GetDetailUrl("get_detail_url")

    start_time = time.time()

    # thread_detail_url.start()

    # thread_detail_url1.start()

    #

    # thread1.join()

    # thread2.join()

    detail_url_queue.task_done()

    detail_url_queue.join()  # 阻塞，等待task_done

    # 当主线程退出的时候， 子线程kill掉

    print ("last time: {}".format(time.time()-start_time))

10.4 线程同步-Lock、Rlock

from threading import Lock, RLock, Condition  # 可重入的锁

# 在同一个线程里面，可以连续调用多次acquire， 一定要注意acquire的次数要和release的次数相等

total = 0

lock = RLock()

def add():

    # 1. dosomething1

    # 2. io操作

    # 1. dosomething3

    global lock

    global total

    for i in range(1000000):

        lock.acquire()

        lock.acquire()

        total += 1

        lock.release()

        lock.release()

def desc():

    global total

    global lock

    for i in range(1000000):

        lock.acquire()

        total -= 1

        lock.release()

import threading

thread1 = threading.Thread(target=add)

thread2 = threading.Thread(target=desc)

thread1.start()

thread2.start()

thread1.join()

thread2.join()

print(total)

# 1. 用锁会影响性能

# 2. 锁会引起死锁

# 死锁的情况 A（a，b）

"""

A(a、b)

acquire (a)

acquire (b)

B(a、b)

acquire (a)

acquire (b)

"""

10.5 线程同步-condition使用以及源码分析

import threading

#条件变量， 用于复杂的线程间同步

# class XiaoAi(threading.Thread):

#     def __init__(self, lock):

#         super().__init__(name="小爱")

#         self.lock = lock

#

#     def run(self):

#         self.lock.acquire()

#         print("{} : 在 ".format(self.name))

#         self.lock.release()

#

#         self.lock.acquire()

#         print("{} : 好啊 ".format(self.name))

#         self.lock.release()

#

# class TianMao(threading.Thread):

#     def __init__(self, lock):

#         super().__init__(name="天猫精灵")

#         self.lock = lock

#

#     def run(self):

#

#         self.lock.acquire()

#         print("{} : 小爱同学 ".format(self.name))

#         self.lock.release()

#

#         self.lock.acquire()

#         print("{} : 我们来对古诗吧 ".format(self.name))

#         self.lock.release()

#通过condition完成协同读诗

class XiaoAi(threading.Thread):

    def __init__(self, cond):

        super().__init__(name="小爱")

        self.cond = cond

    def run(self):

        with self.cond:

            self.cond.wait()

            print("{} : 在 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 好啊 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 君住长江尾 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 共饮长江水 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 此恨何时已 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 定不负相思意 ".format(self.name))

            self.cond.notify()

class TianMao(threading.Thread):

    def __init__(self, cond):

        super().__init__(name="天猫精灵")

        self.cond = cond

    def run(self):

        with self.cond:

            print("{} : 小爱同学 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 我们来对古诗吧 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 我住长江头 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 日日思君不见君 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 此水几时休 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 只愿君心似我心 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

if __name__ == "__main__":

    from concurrent import futures

    cond = threading.Condition()

    xiaoai = XiaoAi(cond)

    tianmao = TianMao(cond)

    #启动顺序很重要

    #在调用with cond之后才能调用wait或者notify方法

    #condition有两层锁， 一把底层锁会在线程调用了wait方法的时候释放， 上面的锁会在每次调用wait的时候分配一把并放入到cond的等待队列中，等到notify方法的唤醒

    xiaoai.start()

    tianmao.start()

10.6 线程同步-Semaphore使用及源码分析

# Semaphore 是用于控制进入数量的锁

# 文件， 读、写， 写一般只是用于一个线程写，读可以允许有多个

# 做爬虫

import threading

import time

class HtmlSpider(threading.Thread):

    def __init__(self, url, sem):

        super().__init__()

        self.url = url

        self.sem = sem

    def run(self):

        time.sleep(2)

        print("got html text success")

        self.sem.release()

class UrlProducer(threading.Thread):

    def __init__(self, sem):

        super().__init__()

        self.sem = sem

    def run(self):

        for i in range(20):

            self.sem.acquire()

            html_thread = HtmlSpider("https://baidu.com/{}".format(i), self.sem)

            html_thread.start()

if __name__ == "__main__":

    sem = threading.Semaphore(3)

    url_producer = UrlProducer(sem)

    url_producer.start()

10.7 ThreadPoolExecutor线程池



from concurrent.futures import ThreadPoolExecutor, as_completed, wait, FIRST_COMPLETED

from concurrent.futures import Future

from multiprocessing import Pool

#未来对象，task的返回容器

#线程池， 为什么要线程池

#主线程中可以获取某一个线程的状态或者某一个任务的状态，以及返回值

#当一个线程完成的时候我们主线程能立即知道

#futures可以让多线程和多进程编码接口一致

import time

def get_html(times):

    time.sleep(times)

    print("get page {} success".format(times))

    return times

executor = ThreadPoolExecutor(max_workers=2)

#通过submit函数提交执行的函数到线程池中, submit 是立即返回

# task1 = executor.submit(get_html, (3))

# task2 = executor.submit(get_html, (2))

#要获取已经成功的task的返回

urls = [3,2,4]

all_task = [executor.submit(get_html, (url)) for url in urls]

wait(all_task, return_when=FIRST_COMPLETED)

print("main")

# for future in as_completed(all_task):

#     data = future.result()

#     print("get {} page".format(data))

#通过executor的map获取已经完成的task的值

# for data in executor.map(get_html, urls):

#     print("get {} page".format(data))

# #done方法用于判定某个任务是否完成

# print(task1.done())

# print(task2.cancel())

# time.sleep(3)

# print(task1.done())

#

# #result方法可以获取task的执行结果

# print(task1.result())

10.8 多进程和多线程对比

import time

from concurrent.futures import ThreadPoolExecutor, as_completed

from concurrent.futures import ProcessPoolExecutor

#多进程编程

#耗cpu的操作，用多进程编程， 对于io操作来说， 使用多线程编程，进程切换代价要高于线程

#1. 对于耗费cpu的操作，多进程由于多线程

# def fib(n):

#     if n<=2:

#         return 1

#     return fib(n-1)+fib(n-2)

#

# if __name__ == "__main__":

#     with ThreadPoolExecutor(3) as executor:

#         all_task = [executor.submit(fib, (num)) for num in range(25,40)]

#         start_time = time.time()

#         for future in as_completed(all_task):

#             data = future.result()

#             print("exe result: {}".format(data))

#

#         print("last time is: {}".format(time.time()-start_time))

#2. 对于io操作来说，多线程优于多进程

def random_sleep(n):

    time.sleep(n)

    return n

if __name__ == "__main__":

    with ProcessPoolExecutor(3) as executor:

        all_task = [executor.submit(random_sleep, (num)) for num in [2]*30]

        start_time = time.time()

        for future in as_completed(all_task):

            data = future.result()

            print("exe result: {}".format(data))

        print("last time is: {}".format(time.time()-start_time))

10.9 multiprocessing多进程编程

# import os

# #fork只能用于linux/unix中

# pid = os.fork()

# print("bobby")

# if pid == 0:

#   print('子进程 {} ，父进程是： {}.' .format(os.getpid(), os.getppid()))

# else:

#   print('我是父进程：{}.'.format(pid))

import multiprocessing

#多进程编程

import time

def get_html(n):

    time.sleep(n)

    print("sub_progress success")

    return n

if __name__ == "__main__":

    # progress = multiprocessing.Process(target=get_html, args=(2,))

    # print(progress.pid)

    # progress.start()

    # print(progress.pid)

    # progress.join()

    # print("main progress end")

    #使用线程池

    pool = multiprocessing.Pool(multiprocessing.cpu_count())

    # result = pool.apply_async(get_html, args=(3,))

    #

    # #等待所有任务完成

    # pool.close()

    # pool.join()

    #

    # print(result.get())

    #imap

    # for result in pool.imap(get_html, [1,5,3]):

    #     print("{} sleep success".format(result))

    for result in pool.imap_unordered(get_html, [1,5,3]):

        print("{} sleep success".format(result))

10.10 进程间通信-Queue、Pipe、Manager

import time

from multiprocessing import Process, Queue, Pool, Manager, Pipe

# def producer(queue):

#     queue.put("a")

#     time.sleep(2)

#

# def consumer(queue):

#     time.sleep(2)

#     data = queue.get()

#     print(data)

#

# if __name__ == "__main__":

#     queue = Queue(10)

#     my_producer = Process(target=producer, args=(queue,))

#     my_consumer = Process(target=consumer, args=(queue,))

#     my_producer.start()

#     my_consumer.start()

#     my_producer.join()

#     my_consumer.join()

#共享全局变量通信

#共享全局变量不能适用于多进程编程，可以适用于多线程

# def producer(a):

#     a += 100

#     time.sleep(2)

#

# def consumer(a):

#     time.sleep(2)

#     print(a)

#

# if __name__ == "__main__":

#     a = 1

#     my_producer = Process(target=producer, args=(a,))

#     my_consumer = Process(target=consumer, args=(a,))

#     my_producer.start()

#     my_consumer.start()

#     my_producer.join()

#     my_consumer.join()

#multiprocessing中的queue不能用于pool进程池

#pool中的进程间通信需要使用manager中的queue

# def producer(queue):

#     queue.put("a")

#     time.sleep(2)

#

# def consumer(queue):

#     time.sleep(2)

#     data = queue.get()

#     print(data)

#

# if __name__ == "__main__":

#     queue = Manager().Queue(10)

#     pool = Pool(2)

#

#     pool.apply_async(producer, args=(queue,))

#     pool.apply_async(consumer, args=(queue,))

#

#     pool.close()

#     pool.join()

#通过pipe实现进程间通信

#pipe的性能高于queue

# def producer(pipe):

#     pipe.send("bobby")

#

# def consumer(pipe):

#     print(pipe.recv())

#

# if __name__ == "__main__":

#     recevie_pipe, send_pipe = Pipe()

#     #pipe只能适用于两个进程

#     my_producer= Process(target=producer, args=(send_pipe, ))

#     my_consumer = Process(target=consumer, args=(recevie_pipe,))

#

#     my_producer.start()

#     my_consumer.start()

#     my_producer.join()

#     my_consumer.join()

def add_data(p_dict, key, value):

    p_dict[key] = value

if __name__ == "__main__":

    progress_dict = Manager().dict()

    from queue import PriorityQueue

    first_progress = Process(target=add_data, args=(progress_dict, "bobby1", 22))

    second_progress = Process(target=add_data, args=(progress_dict, "bobby2", 23))

    first_progress.start()

    second_progress.start()

    first_progress.join()

    second_progress.join()

    print(progress_dict)

第十章：Python高级编程-多线程、多进程和线程池编程的更多相关文章

gj11 多线程、多进程和线程池编程
11.1 python中的GIL # coding=utf-8 # gil global interpreter lock (cpython) # python中一个线程对应于c语言中的一个线程 # ...
Python进阶：多线程、多进程和线程池编程/协程和异步io/asyncio并发编程
gil: gil使得同一个时刻只有一个线程在一个CPU上执行字节码,无法将多个线程映射到多个CPU上执行 gil会根据执行的字节码行数以及时间片释放gil,gil在遇到io的操作时候主动释放 thre ...
python高级之多线程
python高级之多线程本节内容线程与进程定义及区别 python全局解释器锁线程的定义及使用互斥锁线程死锁和递归锁条件变量同步(Condition) 同步条件(Event) 信号量队列 ...
linux C 多线程/线程池编程同步实例
在多线程.线程池编程中经常会遇到同步的问题. 1.创建线程函数原型:int pthread_create(pthread_t *thread, const pthread_attr_t *attr, ...
[Java并发编程（一）] 线程池 FixedThreadPool vs CachedThreadPool ...
[Java并发编程(一)] 线程池 FixedThreadPool vs CachedThreadPool ... 摘要介绍 Java 并发包里的几个主要 ExecutorService . 正文 ...
Java多线程系列--“JUC线程池”06之 Callable和Future
概要本章介绍线程池中的Callable和Future.Callable 和 Future 简介示例和源码分析(基于JDK1.7.0_40) 转载请注明出处:http://www.cnblogs.co ...
Java多线程系列--“JUC线程池”02之线程池原理(一)
概要在上一章"Java多线程系列--“JUC线程池”01之线程池架构"中,我们了解了线程池的架构.线程池的实现类是ThreadPoolExecutor类.本章,我们通过分析Th ...
Java多线程系列--“JUC线程池”03之线程池原理(二)
概要在前面一章"Java多线程系列--“JUC线程池”02之线程池原理(一)"中介绍了线程池的数据结构,本章会通过分析线程池的源码,对线程池进行说明.内容包括:线程池示例参考代 ...
Java多线程系列--“JUC线程池”04之线程池原理(三)
转载请注明出处:http://www.cnblogs.com/skywang12345/p/3509960.html 本章介绍线程池的生命周期.在"Java多线程系列--“基础篇”01之基 ...

随机推荐

Python发送http请求时遇到问题总结
1.报错信息为“ERROR 'str' object has no attribute 'endwith'”,排查发现endswith方法名写错了,少了s,写成了 'endwith' if inter ...
实现一个简单的基于动态代理的 AOP
实现一个简单的基于动态代理的 AOP Intro 上次看基于动态代理的 AOP 框架实现,立了一个 Flag, 自己写一个简单的 AOP 实现示例,今天过来填坑了目前的实现是基于 Emit 来做的, ...
华为鲲鹏服务器安装 k3s+rancher
华为鲲鹏服务器安装 k3s+rancher 华为鲲鹏服务器华为鲲鹏服务器采用华为自研cpu ARMv8架构,提供 Windows 和多个Linux 系统,作为服务器使用我一直使用Centos系统(不 ...
负载均衡服务之HAProxy基础配置（三）
前文我们聊到了haproxy的代理配置段中比较常用的配置指令的用法以及说明,回顾请参考https://www.cnblogs.com/qiuhom-1874/p/12770930.html:今天我们来 ...
不使用tomcat,仅适用javaSE手写服务器--模拟登陆
1.搭建框架我们只是简单模拟,框架简单分三个模块 a,服务器端server包 b,servlet,根据不同的请求url,利用反射生产对应的servlet c,IO工具包,用来关闭IO流 d,编写we ...
线程Event
版本一: from threading import Event,current_thread,Thread import time event=Event() #造一个对象,内部维护一个全局变量,状 ...
在Thinkphp中微信公众号JsApi支付
由于网站使用的微信Native扫码支付,现在公众号需要接入功能,怎么办呢,看这官方文档,参考着demo进行写吧.直接进入正题进入公众号(服务号)设置--->功能设置--->网页授权域名配 ...
Junit借助Groboutils Core进行并发测试
本文参考:http://www.voidcn.com/article/p-ybnvuffh-ke.html:转载请注明出处 junit是无法进行并发测试,但是又有需要并发测试的场景怎么办呢?此时可以借 ...
利用jsDeliver+github实现免费CDN
title: 利用jsDeliver+github实现免费CDN jsDeliver jsDelivr 是一个免费开源的 CDN 解决方案,用于帮助开发者和站长.包含 JavaScript 库.jQu ...
zabbix管理，添加监控主机
一:添加本机为监控主机二.监控其他Linux主机agent端 1.环境部署 [root@localhost ~]# hostname agent.zabbix.com[root@localhost ...

第十章：Python高级编程-多线程、多进程和线程池编程

第十章：Python高级编程-多线程、多进程和线程池编程

10.1 Python中的GIL

10.2 多线程编程-threading

10.3 线程间通信-共享变量和Queue

10.4 线程同步-Lock、Rlock

10.5 线程同步-condition使用以及源码分析

10.6 线程同步-Semaphore使用及源码分析

10.7 ThreadPoolExecutor线程池

10.8 多进程和多线程对比

10.9 multiprocessing多进程编程

10.10 进程间通信-Queue、Pipe、Manager

第十章：Python高级编程-多线程、多进程和线程池编程的更多相关文章

随机推荐

热门专题