trcd_extract_EDCD_new
# -*- coding:utf-8 -*-
import re '''
适应新版本
''' year='17A'#用户自定义
ss='./data/'#根目录
filename = ss+'EDCD%s.txt'%year#输入文件名 def trcd_nonote(): p1 = r"^\s{4}(?:X|\W)\s{2}(C\d\d\d)\s.+\n"
p2 = r"^\s{4}(?:X|\W)\s{2}C\d\d\d\s(.+)\n"
p3 = r"^\s{7}Desc:\s(.+\.)\n"
p4 = r"^\s{7}Desc:\s(.+[^\.])\n"
p5 = r"^\s{13}(.+[^\.])\n"
p6 = r"^\s{13}(.+\.)\n"
pattern1 = re.compile(p1)
pattern2 = re.compile(p2)
pattern3 = re.compile(p3)
pattern4 = re.compile(p4)
pattern5 = re.compile(p5)
pattern6 = re.compile(p6)
fr = open(filename)
temp = ();
flag = 0
for line in fr.readlines():
matcher1 = re.findall(pattern1,line)
matcher2 = re.findall(pattern2,line)
matcher3 = re.findall(pattern3,line)
matcher4 = re.findall(pattern4,line)
matcher5 = re.findall(pattern5,line)
matcher6 = re.findall(pattern6,line)
#print matcher
w2 = open(ss+'trcd_nonote%s.txt'%year,'a')#a代表追加 w代表重写
if matcher1:
flag = 1
w2.write("\n")
for j in matcher1:
for k in j:
w2.write(k)
#for k in g:
#w2.write(k)
#continue;
if ((matcher2!=[])and(flag ==1)):
flag = 2
#print type(tup1)
#print tup1
#flag = 2
w2.write(",")
for j in matcher2:
for k in j:
w2.write(k)
if ((matcher3!=[])and(flag ==2)):
flag = 3
w2.write(",\"")
for j in matcher3:
for k in j:
w2.write(k)
w2.write("\"")
if (matcher4!=[]):
w2.write(",\"")
for j in matcher4:
for k in j:
w2.write(k)
flag = 4
if ((matcher5!=[])and(flag ==4)):
flag = 5
w2.write(" ")
for j in matcher5:
for k in j:
w2.write(k)
if ((matcher6!=[])and(flag ==4 or 5)):
flag = 6
w2.write(" ")
for j in matcher6:
for k in j:
w2.write(k)
w2.write("\"")
w2.close( ) def trcd_note(): p1 = r"^(?:\s{7}|X\s{6}|\W\s{6})([A-Z][0-9]{3})\s[A-Z].+$"#匹配1001
p2 = r"^\s{7}Note:\s\n"#Note
p3= r"^\s{13}([^ ].+)\n"#Note内容
p4= r"^(?:-|컴)+\n"
pattern1 = re.compile(p1)
pattern2 = re.compile(p2)
pattern3 = re.compile(p3)
pattern4 = re.compile(p4) fr = open(filename)
w2 = open(ss+'trcd_note%s.txt'%year,'a')#a代表追加 w代表重写
# temp = ();
flag = 0
flag1=0
for line in fr.readlines():
matcher1 = re.findall(pattern1,line)
matcher2 = re.findall(pattern2,line)
matcher3 = re.findall(pattern3,line)
matcher4 = re.findall(pattern4,line) #print matcher if matcher1!=[]:
flag = 1
w2.write("\n")
# for j in matcher1: # w2.write(j) if ((matcher2!=[])and(flag == 1)):
flag = 2
flag1=1
# w2.write(",")
if flag1==1:
if ((matcher3!=[])and(flag ==2 or 3)):
flag = 3
w2.write(" ")
for j in matcher3: w2.write(j)
# w2.write(")
if ((matcher4!=[])and(flag == 3)):
flag=0
flag1=0
w2.write("\n")
w2.close( )
fr.close() def join(): f1= open(ss+'trcd_note%s.txt'%year)
f2 =open(ss+'trcd_nonote%s.txt'%year) list_note=[]
for line1 in f1:
# print(line1)
if line1.isspace():
list_note.append('')
else:
list_note.append(line1) f1.close() # print(list_note)
f2_w= open(ss+'trcd%s.csv'%year,'a')
# for i in range(len(list_note)):
i=0
# f2_r = open(ss+'/new/%s_w.txt'%list_tag[i])
for line2 in f2: str11="%s,\"%s\"\n"%(line2.strip('\n'),list_note[i].strip('\n'))
i=i+1
# print(i)
# print(str11)
f2_w.write(str11) f2_w.close()
f2.close()
if __name__ == '__main__':
trcd_nonote()
trcd_note()
join()
trcd_extract_EDCD_new的更多相关文章
随机推荐
- Egret - timer
			
相关:http://edn.egret.com/cn/index.php/article/index/id/154 1.Timer 的使用方法非常简单,我们只需要关心两个属性,三个方法和两个事件即可. ...
 - block元素和inline元素的特点
			
一.block元素的特点 1.处于常规流中时,如果width没有设置,会自动填充满父容器 2.可以设置height/width及margin/padding 3.处于常规流中时,布局在前后元素位置之间 ...
 - 数据恢复软件extundelete介绍
			
linux下文件系统一般由文件名.Inode.Block三部分组成.当一个用户在Linux系统中试图访问一个文件时,系统会先根据文件名去查找它的inode,看该用户是否具有访问这个文件的权限.如果有, ...
 - redis数据迁移
			
redis的备份和还原,借助了第三方的工具---redis-dump, redis中使用redis-dump导出.导入.还原数据实例 1.安装redis-dump # yum install rub ...
 - oracle启动过程
			
Oracle 的启动需要经历四个状态,SHUTDOWN .NOMOUNT .MOUNT .OPEN. SHUTDOWN状态 ...
 - 迷你MVVM框架 avalonjs 1.3.6发布
			
本版本是一次重要的升级,考虑要介绍许多东西,也有许多东西对大家有用,也发到首页上来了. 本来是没有1.36的,先把基于静态收集依赖的1.4设计出来后,发现改动太多,为了平缓升级起见,才减少了一部分新特 ...
 - JavaScipt测试调研
			
JavaScript测试调研 前言 与其他语言相似,JavaScript的测试也会包括代码审查.单元测试等内容.本文就JavaScript的测试调研了一些测试工具和测试框架. 相对于其他很多高级语言语 ...
 - 部分真验货客户未取进FP      IN_SALES_ORDER表有数据,前台规划页面没显示
			
描述:部分真验货客户未取进FP,检查发现IN_SALES_ORDER表有数据630\600\610行项目数据,但前台只显示630数据,600和610前台没有显示 1.查看IN_SALES_ORDER表 ...
 - Ubuntu dns
			
在Ubuntu系统网络设备启动的流程中,会依赖/etc/network/interface的配置文件初始化网络接口,所以直接在/etc/network/interface之中配置好对应的dns服务器会 ...
 - python的多线程和守护线程
			
1.创建一个多线程 import threading import time ''' def threading_func(num): print("running on number:%s ...