1. split file into several files

 """
this is aa customizable version of the standard unix split command-line
utility;because it is written in python,it also works on windows and can be
easily modifyed;because it export a function,its logic can also be imported
and resued in other applications
"""
import sys,os
kilobytes =1024
megabytes = kilobytes*1000
chunksize = int(1.4* megabytes) #default roughtly a floppy def split(fromfile,todir,chunksize=chunksize):
if not os.path.exists(todir): # caller handlers errors
os.mkdir(todir) #make dir,read/write parts
else:
for fname in os.listdir(todir):
os.remove(os.path.join(todir,fname)) #delete any exiting files
partnum =0
input = open(fromfile,'rb')
while True:
chunk = input.read(chunksize)
if not chunk:break
partnum +=1
filename = os.path.join(todir,('part%04d' % partnum))
fileobj = open(filename,'wb')
fileobj.write(chunk)
fileobj.close()
input.close()
assert partnum<=9999
return partnum if __name__ =='__main__':
if len(sys.argv) == 2 and sys.argv[1]== '-help':
print('use:split.py [file to split target-dir [chunksize]]')
else:
if len(sys.argv) <3:
interactive =True
fromfile =input('File to be split?')
todir = input('directory to store part files?')
else:
interactive = False
fromfile,todir = sys.argv[1:3]
if len(sys.argv) == 4:chunksize =int(sys.argv[3])
absfrom,absto = map(os.path.abspath,[fromfile,todir])
print('splitting',absfrom,'to',absto,'by',chunksize)
try:
parts = split(fromfile,todir,chunksize)
except:
print('error during split:')
print(sys.exc_info()[0],sys.exc_info()[1])
else:
print('split finished:',parts,'parts are in ',absto)
if interactive:
input('press enter key') #pause if clicked

split to 200k

2.CopyAllFiles:

 """
Usage: 'python cpall.py dirFrom dirTo'
recursive copy of a directory tree. works like a 'cp -r dirFrom/* dirTo'
unix command,and assumes that dirFrom and dirTo are both directory.
was written to get around fatal error messages under windows drag-and-drop
copies(the first bad file ends the entire copy operation immediately).
but also allows for coding more customized copy operations in python
""" import os,sys
maxfileload =100000
blksize =1024*500 def copyfile(pathFrom,pathTo,maxfileload =maxfileload):
"""
copy one file pathFrom to pathTo,byte for byte;
use binary file mode to supress unicde decode and endline transform
"""
if os.path.getsize(pathFrom) <=maxfileload:
bytesFrom = open(pathFrom,'rb').read()
open(pathTo,'wb').write(bytesFrom)
else:
fileFrom = open(pathFrom,'rb')
fileTo = open(pathTo,'wb')
while True:
bytesFrom = fileFrom.read(blksize)
if not bytesFrom:
break
fileTo.write(bytesFrom) def copytree(dirFrom,dirTo,verbose=0):
"""
copy contents of dirFrom and below to dirTo ,return(files,dirs) counts;
may need to use bytes for dirnames if undecodable on other platforms;
may need to do more file type checking on unix:skip links,fifos,etc;
"""
fcount = dcount =0
for filename in os.listdir(dirFrom):
pathFrom = os.path.join(dirFrom,filename)
pathTo = os.path.join(dirTo,filename)
if not os.path.isdir(pathFrom):
try:
if verbose >1:
print('copying',pathFrom,'to',pathTo)
copyfile(pathFrom,pathTo)
fcount +=1
except:
print('error copying',pathFrom,'to',pathTo,'--skipped')
print(sys.exc_info()[0],sys.exc_info()[1])
else:
if verbose:
print('copying dir',pathFrom,'to',pathTo)
try:
os.mkdir(pathTo)
below = copytree(pathFrom,pathTo)
fcount += below[0]
dcount += below[1]
dcount+=1
except:
print('error creating',pathTo,'--skipped')
print(sys.exc_info()[0],sys.exc_info()[1])
return (fcount,dcount) def getargs():
"""
get and verify directory name arguments ,return default none on errors
"""
try:
dirFrom,dirTo = sys.argv[1:]
except:
print('usage error:cpall.py dirFrom dirTo')
else:
if not os.path.isdir(dirFrom):
print('error:dirFrom is not a dir')
elif not os.path.exists(dirTo):
os.mkdir(dirTo)
print('note:dirTo was created')
return (dirfrom,dirTo)
else:
print('warning:dirto already xists')
if hasattr(os.path,'samefile'):
same = os.path.samefile(dirFrom,dirTo)
else:
same = os.path.abspath(dirFrom) == os.path.abspath(dirTo)
if same:
print('error :dirfrom same as dirTo')
else:
return (dirFrom,dirTo) if __name__ =='__main__':
import time
distuple =getargs()
if distuple:
print('copying...')
start = time.clock()
fcount,dcount = copytree(*distuple)
print('copied',fcount,'files,',dcount,'directories')
print('in ',time.clock()-start,' seconds')

3. compare directory and list all files if not same

 """
############################################################################
usage :python dirdiff.py dir1-path dir2-path
compare two directories to find files that exist in one but not the other
this version uses the os.listdir function and list difference. note that
this script checks only filenames,not file contents --see diffall.py for an
extension that does the latter by comparing .read() results
#############################################################################
""" import os,sys def reportdiffs(unique1,unique2,dir1,dir2):
"""
generate diffs report for one dir: part of comparedirs output
"""
if not (unique1 or unique2):
print('directory lists are identical')
else:
if unique1:
print('files unique to',dir1)
for file in unique1:
print('......',file)
if unique2:
print('files unique to',dir2)
for file in unique2:
print('......',file) def difference(seq1,seq2):
"""
return all items in seq1 only
a set(seq1) - set(seq2) would work too,but sets are randomly
ordered,so any platform-depent directory order would be lost
"""
return [item for item in seq1 if item not in seq2] def comparedirs(dir1,dir2,files1=None,files2=None):
"""
compare directory contents,but not actual files;
may need bytes listdir arg for undecodable filenames on some platforms
"""
print('comparing',dir1,'to',dir2)
files1 = os.listdir(dir1) if files1 is None else files1
files2 = os.listdir(dir2) if files2 is None else files2
unique1 = difference(files1,files2)
unique2 = difference(files2,files1)
reportdiffs(unique1,unique2,dir1,dir2)
return not (unique1,unique2) def getargs():
"args for command line mode"
try:
dir1,dir2 = sys.argv[1:]
except:
print('usage:dirdiff.py dir1 dir2')
sys.exit(1)
else:
return dir1,dir2 if __name__=='__main__':
dir1,dir2 = getargs()
comparedirs(dir1,dir2)

splitFile2SmallFile的更多相关文章

随机推荐

  1. Mac下各种网络命令的使用

    Mac下各种网络命令的使用(http://blog.51yip.com/linux/745.html) pingwww.baidu.com 会一直ping下去,和windows不一样, windows ...

  2. 初玩Linux部署项目

    1,先安装虚拟机2,安装centOS3,安装mysql 安装mysql客户端: yum install mysql 安装mysql 服务器端: yum install mysql-server yum ...

  3. Pow(x, n)

    Implement pow(x, n). public class Solution { public double pow(double x, int n) { //判断x是不是0 if(Math. ...

  4. (学)解决VMware Taking ownership of this virtual machine failed

    原文:http://blog.csdn.net/fisher_jiang/article/details/6992588背景: 一次crash可能会造成虚拟机锁死的情况发生现象:点击take owne ...

  5. pict(Pairwise Independent Combinatorial Testing)工具使用

    PICT工具就是在微软公司内部使用的一款成对组合的命令行生成工具,现在已经对外提供,可以在互联网上下载到. 要把输入类型和其对应的参数输入到一个CSV格式(CSV: 以逗号分割的纯文本文件,不带有任何 ...

  6. js中==和===的区别

    ==用来判断值是否相等: ===用来判断值和类型是否相等

  7. PHP 接入芝麻信用 注意 事项

    芝麻官方下载的SDK,跑不起来,百度搜索一番也没有发现太多的文章 ,只有一个CSDN博客写的一篇文章,比较有参考价值 详细查阅文档+几天测试整理以下几点注意事项: 接入芝麻API接口,应该分2步: 第 ...

  8. php进程占用大量cpu优化

    使用TOP 命令发现php进程占用大量的cpu,达到100%,需要优化. 1 ll /proc/6264/fd 查看进程正在使用的资源 2 strace -p 6264 追踪进程正在做的事情 引用 h ...

  9. HTML5 头部标签定义

    <!DOCTYPE html> <!-- 使用 HTML5 doctype,不区分大小写 --> <html lang="zh-cmn-Hans"&g ...

  10. 奥迪--A3

    -型号:A3 -价格:18-28W -动力:1.4T/1.8T -变速箱:7挡双离合 -长宽高:4.32,1.79,1.43(Limousine:4.46,1.80,1.42) -油箱:50L -发动 ...