splitFile2SmallFile
1. split file into several files
"""
this is aa customizable version of the standard unix split command-line
utility;because it is written in python,it also works on windows and can be
easily modifyed;because it export a function,its logic can also be imported
and resued in other applications
"""
import sys,os
kilobytes =1024
megabytes = kilobytes*1000
chunksize = int(1.4* megabytes) #default roughtly a floppy def split(fromfile,todir,chunksize=chunksize):
if not os.path.exists(todir): # caller handlers errors
os.mkdir(todir) #make dir,read/write parts
else:
for fname in os.listdir(todir):
os.remove(os.path.join(todir,fname)) #delete any exiting files
partnum =0
input = open(fromfile,'rb')
while True:
chunk = input.read(chunksize)
if not chunk:break
partnum +=1
filename = os.path.join(todir,('part%04d' % partnum))
fileobj = open(filename,'wb')
fileobj.write(chunk)
fileobj.close()
input.close()
assert partnum<=9999
return partnum if __name__ =='__main__':
if len(sys.argv) == 2 and sys.argv[1]== '-help':
print('use:split.py [file to split target-dir [chunksize]]')
else:
if len(sys.argv) <3:
interactive =True
fromfile =input('File to be split?')
todir = input('directory to store part files?')
else:
interactive = False
fromfile,todir = sys.argv[1:3]
if len(sys.argv) == 4:chunksize =int(sys.argv[3])
absfrom,absto = map(os.path.abspath,[fromfile,todir])
print('splitting',absfrom,'to',absto,'by',chunksize)
try:
parts = split(fromfile,todir,chunksize)
except:
print('error during split:')
print(sys.exc_info()[0],sys.exc_info()[1])
else:
print('split finished:',parts,'parts are in ',absto)
if interactive:
input('press enter key') #pause if clicked



split to 200k


2.CopyAllFiles:
"""
Usage: 'python cpall.py dirFrom dirTo'
recursive copy of a directory tree. works like a 'cp -r dirFrom/* dirTo'
unix command,and assumes that dirFrom and dirTo are both directory.
was written to get around fatal error messages under windows drag-and-drop
copies(the first bad file ends the entire copy operation immediately).
but also allows for coding more customized copy operations in python
""" import os,sys
maxfileload =100000
blksize =1024*500 def copyfile(pathFrom,pathTo,maxfileload =maxfileload):
"""
copy one file pathFrom to pathTo,byte for byte;
use binary file mode to supress unicde decode and endline transform
"""
if os.path.getsize(pathFrom) <=maxfileload:
bytesFrom = open(pathFrom,'rb').read()
open(pathTo,'wb').write(bytesFrom)
else:
fileFrom = open(pathFrom,'rb')
fileTo = open(pathTo,'wb')
while True:
bytesFrom = fileFrom.read(blksize)
if not bytesFrom:
break
fileTo.write(bytesFrom) def copytree(dirFrom,dirTo,verbose=0):
"""
copy contents of dirFrom and below to dirTo ,return(files,dirs) counts;
may need to use bytes for dirnames if undecodable on other platforms;
may need to do more file type checking on unix:skip links,fifos,etc;
"""
fcount = dcount =0
for filename in os.listdir(dirFrom):
pathFrom = os.path.join(dirFrom,filename)
pathTo = os.path.join(dirTo,filename)
if not os.path.isdir(pathFrom):
try:
if verbose >1:
print('copying',pathFrom,'to',pathTo)
copyfile(pathFrom,pathTo)
fcount +=1
except:
print('error copying',pathFrom,'to',pathTo,'--skipped')
print(sys.exc_info()[0],sys.exc_info()[1])
else:
if verbose:
print('copying dir',pathFrom,'to',pathTo)
try:
os.mkdir(pathTo)
below = copytree(pathFrom,pathTo)
fcount += below[0]
dcount += below[1]
dcount+=1
except:
print('error creating',pathTo,'--skipped')
print(sys.exc_info()[0],sys.exc_info()[1])
return (fcount,dcount) def getargs():
"""
get and verify directory name arguments ,return default none on errors
"""
try:
dirFrom,dirTo = sys.argv[1:]
except:
print('usage error:cpall.py dirFrom dirTo')
else:
if not os.path.isdir(dirFrom):
print('error:dirFrom is not a dir')
elif not os.path.exists(dirTo):
os.mkdir(dirTo)
print('note:dirTo was created')
return (dirfrom,dirTo)
else:
print('warning:dirto already xists')
if hasattr(os.path,'samefile'):
same = os.path.samefile(dirFrom,dirTo)
else:
same = os.path.abspath(dirFrom) == os.path.abspath(dirTo)
if same:
print('error :dirfrom same as dirTo')
else:
return (dirFrom,dirTo) if __name__ =='__main__':
import time
distuple =getargs()
if distuple:
print('copying...')
start = time.clock()
fcount,dcount = copytree(*distuple)
print('copied',fcount,'files,',dcount,'directories')
print('in ',time.clock()-start,' seconds')

3. compare directory and list all files if not same
"""
############################################################################
usage :python dirdiff.py dir1-path dir2-path
compare two directories to find files that exist in one but not the other
this version uses the os.listdir function and list difference. note that
this script checks only filenames,not file contents --see diffall.py for an
extension that does the latter by comparing .read() results
#############################################################################
""" import os,sys def reportdiffs(unique1,unique2,dir1,dir2):
"""
generate diffs report for one dir: part of comparedirs output
"""
if not (unique1 or unique2):
print('directory lists are identical')
else:
if unique1:
print('files unique to',dir1)
for file in unique1:
print('......',file)
if unique2:
print('files unique to',dir2)
for file in unique2:
print('......',file) def difference(seq1,seq2):
"""
return all items in seq1 only
a set(seq1) - set(seq2) would work too,but sets are randomly
ordered,so any platform-depent directory order would be lost
"""
return [item for item in seq1 if item not in seq2] def comparedirs(dir1,dir2,files1=None,files2=None):
"""
compare directory contents,but not actual files;
may need bytes listdir arg for undecodable filenames on some platforms
"""
print('comparing',dir1,'to',dir2)
files1 = os.listdir(dir1) if files1 is None else files1
files2 = os.listdir(dir2) if files2 is None else files2
unique1 = difference(files1,files2)
unique2 = difference(files2,files1)
reportdiffs(unique1,unique2,dir1,dir2)
return not (unique1,unique2) def getargs():
"args for command line mode"
try:
dir1,dir2 = sys.argv[1:]
except:
print('usage:dirdiff.py dir1 dir2')
sys.exit(1)
else:
return dir1,dir2 if __name__=='__main__':
dir1,dir2 = getargs()
comparedirs(dir1,dir2)

splitFile2SmallFile的更多相关文章
随机推荐
- js汉字与拼音互转终极方案,附简单的JS拼音输入法【转】
github项目地址:https://github.com/liuxianan/pinyinjs 完整demo演示:http://demo.liuxianan.com/pinyinjs/ 汉字转拼音: ...
- 数论 - Vanya and Computer Game
Vanya and his friend Vova play a computer game where they need to destroy n monsters to pass a level ...
- grunt 检测js配置
module.exports = function(grunt) { // 项目配置 grunt.initConfig({ pkg: grunt.file.readJSON('package.json ...
- ibatis #于 $区别
系统框架用ibatis,开发中ibatis配置文件中执行order by #orderByClause# ,怎么搞都没有效果, 后面改成 order by $orderByClause$,OK,问题解 ...
- Screen Orientation for Windows Phone
http://msdn.microsoft.com/en-us/library/windows/apps/jj207002(v=vs.105).aspx
- 利用JS实现自定义滚动条
一般默认的滚动条会比较丑,我们可以用简单的js实现自定义滚动条的功能: 代码如下: <!doctype html> <html> <head> <meta c ...
- (四)G1 garbage collector
g1专为大内存,多内核机型设计.可以兼顾高吞吐量和低暂停时间. g1将堆分为多个相同大小内存块,并发的标记线程,使得g1掌握了各个内存块的活对象数量, 内存回收阶段,g1根据用户指定的暂停时间,选择部 ...
- swift-重写方法和属性、禁止重写
/*子类可以为继承来的实例方法,类方法,实例属性,或下标提供自己定制的实现.我们把这种行为叫重写. 如果要重写某个特性,你需要在重写定义的前面加上 关键字.这么做,你就表明了你是想提供一个重写 版本, ...
- JavaScript Array 常用函数整理
按字母顺序整理 索引 Array.prototype.concat() Array.prototype.filter() Array.prototype.indexOf() Array.prototy ...
- rabbitmq之消息重入队列
说起消息重入队列还得从队列注册消费者说起,客户端在向队列注册消费者之后,创建的channel也会被主队列进程monitor,当channel挂掉后,主队列进程(rabbit_amqqueue_proc ...