python 多进程与多线程配合拷贝文件目录

版本一:使用shutil进行拷贝

 # -*- coding: utf-8 -*-

 # @author: Tele

 # @Time  : 2019/04/02 下午 3:09

 # 待改进:

 # 1.拷贝逻辑使用原生的io

 # 2.针对大文件在进程内部实现多线程方式进行拷贝

 import time

 import re

 import os

 import shutil

 import multiprocessing

 # 遍历文件夹

 def walk_file(file):

     file_list = list()

     for root, dirs, files in os.walk(file):

         # 遍历文件

         for f in files:

             file_list.append(f)

     return file_list

 # 计算文件数量

 def get_file_count(dir):

     return len(walk_file(dir))

 def copy(src, target, queue):

     target_number = 1

     if os.path.isdir(src):

         target_number = get_file_count(src)

         shutil.copytree(src, target)

     else:

         shutil.copyfile(src, target)

     # 将拷贝完成的文件数量放入队列中

     queue.put(target_number)

 def copy_dir(src, desc):

     total_number = get_file_count(src)

     # 分隔符检测

     src = check_separator(src)

     desc = check_separator(desc)

     # print("src:",src)

     # print("desc:",desc)

     file_dir_list = [src + "/" + i for i in os.listdir(src)]

     if os.path.exists(desc):

         shutil.rmtree(desc)

     pool = multiprocessing.Pool(3)

     # 创建队列

     queue = multiprocessing.Manager().Queue()

     # 一个文件/目录开启一个进程去拷贝

     for f_name in file_dir_list:

         target = desc + "/" + f_name[index_list("/", f_name)[1] + 1:]

         # print(target)

         # 创建target目录

         parent_path = os.path.split(target)[0]

         if not os.path.exists(parent_path):

             os.makedirs(parent_path)

         pool.apply_async(copy, args=(f_name, target, queue,))

     start = time.time()

     pool.close()

     #    pool.join()

     count = 0

     while True:

         count += queue.get()

         # 格式化输出时两个%输出一个%,不换行,每次定位到行首,实现覆盖

         print("\r拷贝进度为 %.2f %%" % (count * 100 / total_number), end="")

         if count >= total_number:

             break

     end = time.time()

     print()

     print("耗时-----", (end - start), "s")

 # 查找指定字符出现的全部索引位置

 def index_list(c, s):

     return [i.start() for i in re.finditer(c, s)]

 # 检测目录结尾是否有 "/"

 def check_separator(path):

     if path.rindex("/") == len(path) - 1:

         return path[0:path.rindex("/")]

     return path

 def main():

     copy_dir("f:/ftp_mypc/", "e:/ftp_mypc/")

 if __name__ == '__main__':

     main()

这样做仍然有些小问题,对于大文件可以在进程内部采用多线程的方式,可以看到使用shutil进行拷贝时我们没有办法实现字节切割,于是有了下面的版本二

版本二:

 # -*- coding: utf-8 -*-

 # @author: Tele

 # @Time  : 2019/04/02 下午 3:09

 # 使用多进程拷贝文件夹,对于大文件进程内部又使用了多线程进行拷贝

 # 使用进程池实现多进程时,使用的消息队列要使用multiprocessing.Manager().Queue()创建

 import time

 import re

 import os

 import shutil

 import multiprocessing

 import math

 from concurrent.futures import ThreadPoolExecutor, wait

 # 设置单个文件的最大值:209715200 200M

 MAX_SINGLE_FILE_SIZE = 209715200

 mutex = multiprocessing.Lock()

 executor = ThreadPoolExecutor(max_workers=3)

 # 遍历文件夹

 def walk_file(file):

     file_list = list()

     for root, dirs, files in os.walk(file):

         # 遍历文件

         for f in files:

             file_list.append(f)

         # 空文件夹处理

         for d in dirs:

             if len(os.listdir(os.path.join(root, d))) == 0:

                 file_list.append(d)

     return file_list

 # 计算文件数量

 def get_file_count(dir):

     return len(walk_file(dir))

 def copy(src, target, queue):

     target_number = 1

     buffer = 1024

     # 文件夹

     if os.path.isdir(src):

         target_number = get_file_count(src)

         for root, dirs, files in os.walk(src):

             # 遍历文件

             for f in files:

                 drive = os.path.splitdrive(target)[0]

                 target = drive + os.path.splitdrive(os.path.join(root, f))[1]

                 copy_single_file(buffer, os.path.join(root, f), target)

             # 空文件夹

             for d in dirs:

                 drive = os.path.splitdrive(target)[0]

                 target = drive + os.path.splitdrive(os.path.join(root, d))[1]

                 # 检查文件的层级目录

                 if not os.path.exists(target):

                     os.makedirs(target)

     else:

         copy_single_file(buffer, src, target)

     # 将拷贝完成的文件数量放入队列中

     queue.put(target_number)

 # 拷贝单文件

 def copy_single_file(buffer, src, target):

     file_size = os.path.getsize(src)

     rs = open(src, "rb")

     # 检查文件的层级目录

     parent_path = os.path.split(target)[0]

     if not os.path.exists(parent_path):

         os.makedirs(parent_path)

     ws = open(target, "wb")

     # 小文件直接读写

     if file_size <= MAX_SINGLE_FILE_SIZE:

         while True:

             content = rs.read(buffer)

             ws.write(content)

             if len(content) == 0:

                 break

         ws.flush()

     else:

         # 设置每个线程拷贝的字节数 50M

         PER_THREAD_SIZE = 52428800

         # 构造参数并执行

         task_list = list()

         for i in range(math.ceil(file_size / PER_THREAD_SIZE)):

             byte_size = PER_THREAD_SIZE

             # 最后一个线程拷贝的字节数应该是取模

             if i == math.ceil(file_size / PER_THREAD_SIZE) - 1:

                 byte_size = file_size % PER_THREAD_SIZE

             start = i * PER_THREAD_SIZE + i

             t = executor.submit(copy_file_thread, start, byte_size, rs, ws)

             task_list.append(t)

         wait(task_list)

     if rs:

         rs.close()

     if ws:

         ws.close()

 # 多线程拷贝

 def copy_file_thread(start, byte_size, rs, ws):

     mutex.acquire()

     buffer = 1024

     count = 0

     rs.seek(start)

     ws.seek(start)

     while True:

         if count + buffer <= byte_size:

             content = rs.read(buffer)

             count += len(content)

             write(content, ws)

         else:

             content = rs.read(byte_size % buffer)

             count += len(content)

             write(content, ws)

             break

     # global total_count

     # total_count += byte_size

     # print("\r拷贝进度为%.2f %%" % (total_count * 100 / file_size), end="")

     mutex.release()

 def write(content, ws):

     ws.write(content)

     ws.flush()

 def copy_dir(src, desc):

     # 获得待拷贝的文件总数(含空文件夹)

     total_number = get_file_count(src)

     # 分隔符检测

     src = check_separator(src)

     desc = check_separator(desc)

     # print("src:",src)

     # print("desc:",desc)

     file_dir_list = [src + "/" + i for i in os.listdir(src)]

     if os.path.exists(desc):

         shutil.rmtree(desc)

     # 进程池

     pool = multiprocessing.Pool(3)

     # 创建队列

     queue = multiprocessing.Manager().Queue()

     # 一个文件/目录开启一个进程去拷贝

     for f_name in file_dir_list:

         target = os.path.splitdrive(desc)[0] + "/" + os.path.splitdrive(f_name)[1]

         # target = desc + "/" + f_name[index_list("/", f_name)[1] + 1:]

         # print(target)

         # 创建target目录

         parent_path = os.path.split(target)[0]

         if not os.path.exists(parent_path):

             os.makedirs(parent_path)

         pool.apply_async(copy, args=(f_name, target, queue))

     start = time.time()

     pool.close()

     # pool.join()

     count = 0

     while True:

         count += queue.get()

         # 格式化输出时两个%输出一个%,不换行,每次定位到行首,实现覆盖

         print("\r当前进度为 %.2f %%" % (count * 100 / total_number), end="")

         if count >= total_number:

             break

     executor.shutdown()

     end = time.time()

     print()

     print("耗时-----", (end - start), "s")

 # 查找指定字符出现的全部索引位置

 def index_list(c, s):

     return [i.start() for i in re.finditer(c, s)]

 # 检测目录结尾是否有 "/"

 def check_separator(path):

     if path.rindex("/") == len(path) - 1:

         return path[0:path.rindex("/")]

     return path

 def main():

     copy_dir("f:/ftp_mypc/", "e:/ftp_mypc/")

 if __name__ == '__main__':

     main()

python 多进程与多线程配合拷贝文件目录的更多相关文章

python多进程与多线程编程
进程(process)和线程(thread)是非常抽象的概念.多线程与多进程编程对于代码的并发执行,提升代码运行效率和缩短运行时间至关重要.下面介绍一下python的multiprocess和thre ...
Python多进程与多线程编程及GIL详解
介绍如何使用python的multiprocess和threading模块进行多线程和多进程编程. Python的多进程编程与multiprocess模块 python的多进程编程主要依靠multip ...
Python多进程和多线程是鸡肋嘛？【转】
GIL是什么 Python的代码执行由 Python虚拟机(也叫解释器主循环,CPython版本)来控制,Python在设计之初就考虑到在解释器的主循环中,同时只有一个线程在运行.即每个CPU在任意时 ...
python 多进程和多线程的区别
了解线程和进程进程程序:磁盘上的可执行二进制文件,并无运行状态. 进程:就是一个正在运行的任务实例(存活在内存里). 获取当前电脑的CPU核心数: pip install psutil >& ...
python多进程和多线程
多任务才有多进程和线程: 线程是最小的执行单元,而进程由至少一个线程组成.如何调度进程和线程,完全由操作系统决定,程序自己不能决定什么时候执行,执行多长时间. 多进程和多线程的程序涉及到同步.数据共享 ...
Python多进程vs多线程
多任务的两种方式:多进程和多线程. 如果用多进程实现Master-Worker,主进程就是Master,其他进程就是Worker. 如果用多线程实现Master-Worker,主线程就是Master, ...
【转】【Python】Python多进程与多线程
1.1 multiprocessing multiprocessing是多进程模块,多进程提供了任务并发性,能充分利用多核处理器.避免了GIL(全局解释锁)对资源的影响. 有以下常用类: 类描述 P ...
Python 多进程、多线程效率比较
Python 界有条不成文的准则: 计算密集型任务适合多进程,IO 密集型任务适合多线程.本篇来作个比较. 通常来说多线程相对于多进程有优势,因为创建一个进程开销比较大,然而因为在 python 中有 ...
python 多进程，多线程，协程
在我们实际编码中,会遇到一些并行的任务,因为单个任务无法最大限度的使用计算机资源.使用并行任务,可以提高代码效率,最大限度的发挥计算机的性能.python实现并行任务可以有多进程,多线程,协程等方式. ...

随机推荐

HTML的SEO（搜索引擎优化）标准
HTML的SEO(搜索引擎优化)标准一.总结这个做seo的时候要多看,做网站优化的时候 1. SEO(搜索引擎优化):通过总结搜索引擎的排名规律,对网站进行合理优化,使你的网站在百度和Google ...
21.Spring Boot 使用Java代码创建Bean并注册到Spring中
转自:https://blog.csdn.net/catoop/article/details/50558333
1.8 Python基础知识 - 数值类型
一.int类型(任意精度整数) 整型类型(int)是表示整数的数据类型.与其他计算机语言有精度限制不同,Python的整数位数可以为任意长度位数(只受限制于计算机内存) 数字字符串即整型常量. pyt ...
【习题 6-8 UVA - 806】Spatial Structures
[链接] 我是链接,点我呀:) [题意] 在这里输入题意 [题解] 写两个dfs模拟就好. 注意每12个数字输出一个换行.. [代码] /* 1.Shoud it use long long ? 2. ...
PatentTips - Apparatus and method for a generic, extensible and efficient data manager for virtual peripheral component interconnect devices (VPCIDs)
BACKGROUND A single physical platform may be segregated into a plurality of virtual networks. Here, ...
iOS开发之CocoaPods（objective-c第三方库管理工具）
介绍: iOS开发中,大多数情况下,我们都须要集成一些第三方依赖库.对于一个稍大的项目,用到的第三方依赖库的数量也很可观.CocoaPods是objective-c第三方库管理工具,方便第三方库的管理 ...
[寒江孤叶丶的Cocos2d-x之旅_36]用LUA实现UTF8的字符串基本操作 UTF8字符串长度，UTF8字符串剪裁等
原创文章,欢迎转载,转载请注明:文章来自[寒江孤叶丶的Cocos2d-x之旅系列] 博客地址:http://blog.csdn.net/qq446569365 一个用于UTF8字符串操作的类.功能比較 ...
amazeui学习笔记--js插件（UI增强）--警告框Alert
amazeui学习笔记--js插件(UI增强)--警告框Alert 一.总结 1.警告框基本样式:用am-alert声明div容器, <div class="am-alert" ...
HDU 1166 敌兵布阵树状数组||线段树
http://acm.hdu.edu.cn/showproblem.php?pid=1166 题目大意: 给定n个数的区间N<=50000,还有Q个询问(Q<=40000)求区间和. 每个 ...
Java ThreadLocal Example（java中的ThreadLocal例子）
Java ThreadLocal is used to create thread local variables. We know that all threads of an Object sha ...

python 多进程与多线程配合拷贝文件目录

python 多进程与多线程配合拷贝文件目录的更多相关文章

随机推荐

热门专题