splitFile2SmallFile
1. split file into several files
"""
this is aa customizable version of the standard unix split command-line
utility;because it is written in python,it also works on windows and can be
easily modifyed;because it export a function,its logic can also be imported
and resued in other applications
"""
import sys,os
kilobytes =1024
megabytes = kilobytes*1000
chunksize = int(1.4* megabytes) #default roughtly a floppy def split(fromfile,todir,chunksize=chunksize):
if not os.path.exists(todir): # caller handlers errors
os.mkdir(todir) #make dir,read/write parts
else:
for fname in os.listdir(todir):
os.remove(os.path.join(todir,fname)) #delete any exiting files
partnum =0
input = open(fromfile,'rb')
while True:
chunk = input.read(chunksize)
if not chunk:break
partnum +=1
filename = os.path.join(todir,('part%04d' % partnum))
fileobj = open(filename,'wb')
fileobj.write(chunk)
fileobj.close()
input.close()
assert partnum<=9999
return partnum if __name__ =='__main__':
if len(sys.argv) == 2 and sys.argv[1]== '-help':
print('use:split.py [file to split target-dir [chunksize]]')
else:
if len(sys.argv) <3:
interactive =True
fromfile =input('File to be split?')
todir = input('directory to store part files?')
else:
interactive = False
fromfile,todir = sys.argv[1:3]
if len(sys.argv) == 4:chunksize =int(sys.argv[3])
absfrom,absto = map(os.path.abspath,[fromfile,todir])
print('splitting',absfrom,'to',absto,'by',chunksize)
try:
parts = split(fromfile,todir,chunksize)
except:
print('error during split:')
print(sys.exc_info()[0],sys.exc_info()[1])
else:
print('split finished:',parts,'parts are in ',absto)
if interactive:
input('press enter key') #pause if clicked



split to 200k


2.CopyAllFiles:
"""
Usage: 'python cpall.py dirFrom dirTo'
recursive copy of a directory tree. works like a 'cp -r dirFrom/* dirTo'
unix command,and assumes that dirFrom and dirTo are both directory.
was written to get around fatal error messages under windows drag-and-drop
copies(the first bad file ends the entire copy operation immediately).
but also allows for coding more customized copy operations in python
""" import os,sys
maxfileload =100000
blksize =1024*500 def copyfile(pathFrom,pathTo,maxfileload =maxfileload):
"""
copy one file pathFrom to pathTo,byte for byte;
use binary file mode to supress unicde decode and endline transform
"""
if os.path.getsize(pathFrom) <=maxfileload:
bytesFrom = open(pathFrom,'rb').read()
open(pathTo,'wb').write(bytesFrom)
else:
fileFrom = open(pathFrom,'rb')
fileTo = open(pathTo,'wb')
while True:
bytesFrom = fileFrom.read(blksize)
if not bytesFrom:
break
fileTo.write(bytesFrom) def copytree(dirFrom,dirTo,verbose=0):
"""
copy contents of dirFrom and below to dirTo ,return(files,dirs) counts;
may need to use bytes for dirnames if undecodable on other platforms;
may need to do more file type checking on unix:skip links,fifos,etc;
"""
fcount = dcount =0
for filename in os.listdir(dirFrom):
pathFrom = os.path.join(dirFrom,filename)
pathTo = os.path.join(dirTo,filename)
if not os.path.isdir(pathFrom):
try:
if verbose >1:
print('copying',pathFrom,'to',pathTo)
copyfile(pathFrom,pathTo)
fcount +=1
except:
print('error copying',pathFrom,'to',pathTo,'--skipped')
print(sys.exc_info()[0],sys.exc_info()[1])
else:
if verbose:
print('copying dir',pathFrom,'to',pathTo)
try:
os.mkdir(pathTo)
below = copytree(pathFrom,pathTo)
fcount += below[0]
dcount += below[1]
dcount+=1
except:
print('error creating',pathTo,'--skipped')
print(sys.exc_info()[0],sys.exc_info()[1])
return (fcount,dcount) def getargs():
"""
get and verify directory name arguments ,return default none on errors
"""
try:
dirFrom,dirTo = sys.argv[1:]
except:
print('usage error:cpall.py dirFrom dirTo')
else:
if not os.path.isdir(dirFrom):
print('error:dirFrom is not a dir')
elif not os.path.exists(dirTo):
os.mkdir(dirTo)
print('note:dirTo was created')
return (dirfrom,dirTo)
else:
print('warning:dirto already xists')
if hasattr(os.path,'samefile'):
same = os.path.samefile(dirFrom,dirTo)
else:
same = os.path.abspath(dirFrom) == os.path.abspath(dirTo)
if same:
print('error :dirfrom same as dirTo')
else:
return (dirFrom,dirTo) if __name__ =='__main__':
import time
distuple =getargs()
if distuple:
print('copying...')
start = time.clock()
fcount,dcount = copytree(*distuple)
print('copied',fcount,'files,',dcount,'directories')
print('in ',time.clock()-start,' seconds')

3. compare directory and list all files if not same
"""
############################################################################
usage :python dirdiff.py dir1-path dir2-path
compare two directories to find files that exist in one but not the other
this version uses the os.listdir function and list difference. note that
this script checks only filenames,not file contents --see diffall.py for an
extension that does the latter by comparing .read() results
#############################################################################
""" import os,sys def reportdiffs(unique1,unique2,dir1,dir2):
"""
generate diffs report for one dir: part of comparedirs output
"""
if not (unique1 or unique2):
print('directory lists are identical')
else:
if unique1:
print('files unique to',dir1)
for file in unique1:
print('......',file)
if unique2:
print('files unique to',dir2)
for file in unique2:
print('......',file) def difference(seq1,seq2):
"""
return all items in seq1 only
a set(seq1) - set(seq2) would work too,but sets are randomly
ordered,so any platform-depent directory order would be lost
"""
return [item for item in seq1 if item not in seq2] def comparedirs(dir1,dir2,files1=None,files2=None):
"""
compare directory contents,but not actual files;
may need bytes listdir arg for undecodable filenames on some platforms
"""
print('comparing',dir1,'to',dir2)
files1 = os.listdir(dir1) if files1 is None else files1
files2 = os.listdir(dir2) if files2 is None else files2
unique1 = difference(files1,files2)
unique2 = difference(files2,files1)
reportdiffs(unique1,unique2,dir1,dir2)
return not (unique1,unique2) def getargs():
"args for command line mode"
try:
dir1,dir2 = sys.argv[1:]
except:
print('usage:dirdiff.py dir1 dir2')
sys.exit(1)
else:
return dir1,dir2 if __name__=='__main__':
dir1,dir2 = getargs()
comparedirs(dir1,dir2)

splitFile2SmallFile的更多相关文章
随机推荐
- ACM: SCU 4438 Censor - KMP
SCU 4438 Censor Time Limit:0MS Memory Limit:0KB 64bit IO Format:%lld & %llu Practice D ...
- BZOJ1068: [SCOI2007]压缩
... 1068: [SCOI2007]压缩 Time Limit: 1 Sec Memory Limit: 128 MBSubmit: 909 Solved: 566[Submit][Statu ...
- 【系统篇】从C/C++语言到进程启动背后的故事
我们需要运行一个程序或者软件,双击之即可完成.不过从你双击到程序的窗口产生的这“短暂”的时间内,Windows为你做了很多的工作. 首先,系统有一个进程监测到了你的双击操作,这个进程就是系统shell ...
- lua中的中文乱码
最近在用lua, 发现一个有点意思的槽点啊-____-! 那就是lua貌似会使用系统所用的字符集. 具体点说, 就是在windows上, 它会使用cp936来表示代码中的中文. 来个例子: print ...
- Unity: Passing Constructor Parameters to Resolve
In this tutorial we will go through of couple different ways of using custom constructor parameters ...
- c#实现简单金山打字小游戏(源码)
using GameDemo.Utils;using System;using System.Collections.Generic;using System.Linq;using System.Te ...
- Android课程---关于下拉列表与状态栏提示的学习
activity_ui7.xml <?xml version="1.0" encoding="utf-8"?> <LinearLayout x ...
- Java学习记录-Jdk包简单介绍
java.applet Java语言编写的一些小应用程序 java.awt AWT 是Abstract Window ToolKit (抽象窗口工具包)的缩写,这个工具包提供了一套与本地图形界面进行交 ...
- soui中,列表控件动态高度的使用注意
1.listview的模板template中,需要增加defHeight属性,即默认高度,同时,不能出现itemHeight属性,否则动态高度会失效 2.数据适配器中,重写getViewDesired ...
- js中列表控件排序箭头,在wke中不支持的解决办法
列表中箭头,实际使用的在线css样式,wke不支持排序箭头(实际是字体)的在线css样式,可以客户端安装对应的字体,之后显示就正常了.