# -*- coding:utf-8 -*-
import re '''
适应新版本
''' year='17A'#用户自定义
ss='./data/'#根目录
filename = ss+'EDCD%s.txt'%year#输入文件名 def trcd_nonote(): p1 = r"^\s{4}(?:X|\W)\s{2}(C\d\d\d)\s.+\n"
p2 = r"^\s{4}(?:X|\W)\s{2}C\d\d\d\s(.+)\n"
p3 = r"^\s{7}Desc:\s(.+\.)\n"
p4 = r"^\s{7}Desc:\s(.+[^\.])\n"
p5 = r"^\s{13}(.+[^\.])\n"
p6 = r"^\s{13}(.+\.)\n"
pattern1 = re.compile(p1)
pattern2 = re.compile(p2)
pattern3 = re.compile(p3)
pattern4 = re.compile(p4)
pattern5 = re.compile(p5)
pattern6 = re.compile(p6)
fr = open(filename)
temp = ();
flag = 0
for line in fr.readlines():
matcher1 = re.findall(pattern1,line)
matcher2 = re.findall(pattern2,line)
matcher3 = re.findall(pattern3,line)
matcher4 = re.findall(pattern4,line)
matcher5 = re.findall(pattern5,line)
matcher6 = re.findall(pattern6,line)
#print matcher
w2 = open(ss+'trcd_nonote%s.txt'%year,'a')#a代表追加 w代表重写
if matcher1:
flag = 1
w2.write("\n")
for j in matcher1:
for k in j:
w2.write(k)
#for k in g:
#w2.write(k)
#continue;
if ((matcher2!=[])and(flag ==1)):
flag = 2
#print type(tup1)
#print tup1
#flag = 2
w2.write(",")
for j in matcher2:
for k in j:
w2.write(k)
if ((matcher3!=[])and(flag ==2)):
flag = 3
w2.write(",\"")
for j in matcher3:
for k in j:
w2.write(k)
w2.write("\"")
if (matcher4!=[]):
w2.write(",\"")
for j in matcher4:
for k in j:
w2.write(k)
flag = 4
if ((matcher5!=[])and(flag ==4)):
flag = 5
w2.write(" ")
for j in matcher5:
for k in j:
w2.write(k)
if ((matcher6!=[])and(flag ==4 or 5)):
flag = 6
w2.write(" ")
for j in matcher6:
for k in j:
w2.write(k)
w2.write("\"")
w2.close( ) def trcd_note(): p1 = r"^(?:\s{7}|X\s{6}|\W\s{6})([A-Z][0-9]{3})\s[A-Z].+$"#匹配1001
p2 = r"^\s{7}Note:\s\n"#Note
p3= r"^\s{13}([^ ].+)\n"#Note内容
p4= r"^(?:-|컴)+\n"
pattern1 = re.compile(p1)
pattern2 = re.compile(p2)
pattern3 = re.compile(p3)
pattern4 = re.compile(p4) fr = open(filename)
w2 = open(ss+'trcd_note%s.txt'%year,'a')#a代表追加 w代表重写
# temp = ();
flag = 0
flag1=0
for line in fr.readlines():
matcher1 = re.findall(pattern1,line)
matcher2 = re.findall(pattern2,line)
matcher3 = re.findall(pattern3,line)
matcher4 = re.findall(pattern4,line) #print matcher if matcher1!=[]:
flag = 1
w2.write("\n")
# for j in matcher1: # w2.write(j) if ((matcher2!=[])and(flag == 1)):
flag = 2
flag1=1
# w2.write(",")
if flag1==1:
if ((matcher3!=[])and(flag ==2 or 3)):
flag = 3
w2.write(" ")
for j in matcher3: w2.write(j)
# w2.write(")
if ((matcher4!=[])and(flag == 3)):
flag=0
flag1=0
w2.write("\n")
w2.close( )
fr.close() def join(): f1= open(ss+'trcd_note%s.txt'%year)
f2 =open(ss+'trcd_nonote%s.txt'%year) list_note=[]
for line1 in f1:
# print(line1)
if line1.isspace():
list_note.append('')
else:
list_note.append(line1) f1.close() # print(list_note)
f2_w= open(ss+'trcd%s.csv'%year,'a')
# for i in range(len(list_note)):
i=0
# f2_r = open(ss+'/new/%s_w.txt'%list_tag[i])
for line2 in f2: str11="%s,\"%s\"\n"%(line2.strip('\n'),list_note[i].strip('\n'))
i=i+1
# print(i)
# print(str11)
f2_w.write(str11) f2_w.close()
f2.close()
if __name__ == '__main__':
trcd_nonote()
trcd_note()
join()

trcd_extract_EDCD_new的更多相关文章

随机推荐

  1. 如何在idea中导入本地所需要的jar包

    今天遇到一个问题,在idea创建普通java工程时不知道如何导入jar包,上网差了一下,也算是一个整理.

  2. springMVC学习记录3-拦截器和文件上传

    拦截器和文件上传算是springmvc中比较高级一点的内容了吧,让我们一起看一下. 下面先说说拦截器.拦截器和过滤器有点像,都可以在请求被处理之前和请求被处理之到做一些额外的操作. 1. 实现Hand ...

  3. NTP时间服务器搭建

    系统时区设置::: 查看当前时区# date -R修改系统时区# timeconfig  或# cp  /usr/share/zoneinfo/Asia/Shanghai  /etc/localtim ...

  4. 使用css技术代替传统的frame技术

    http://www.dynamicdrive.com/style/layouts/item/css-left-frame-layout/ <!--Force IE6 into quirks m ...

  5. 上传 第三方jar包 nexus

    手动上传第三方jar包到nexus的步奏: 1)按下图方式进行 2)按下图完成上传 3)点击保存完成

  6. 元素的定位tag_name,link_text,class_name

    tag_name 就是根据HTML的标签的名称来定位的: 案例:打开我要自学网,会有用户名和密码的输入框 例如:拿51zxw.net为例 from time import sleep #加载浏览器驱动 ...

  7. Petya and Graph(最小割,最大权闭合子图)

    Petya and Graph http://codeforces.com/contest/1082/problem/G time limit per test 2 seconds memory li ...

  8. spring配置数据库连接池druid

    连接池原理 连接池基本的思想是在系统初始化的时候,将数据库连接作为对象存储在内存中,当用户需要访问数据库时,并非建立一个新的连接,而是从连接池中取出一个已建立的空闲连接对象.使用完毕后,用户也并非将连 ...

  9. 遍历XML文件

    #encoding=utf-8 from xml.etree import ElementTree as ET #要找出所有人的年龄 per=ET.parse('d:\\1.xml') p=per.f ...

  10. python创建独立虚拟工作环境方法

    前言: python的组件非常之多,有时这个项目依赖m个组件,有时那个项目依赖n个组件,时间一长很容易导致系统python环境的臃肿不堪,由此便有了virtualenv.virtualenvwrapp ...