# -*- coding:utf-8 -*-
import re '''
适应新版本
''' year='17A'#用户自定义
ss='./data/'#根目录
filename = ss+'EDCD%s.txt'%year#输入文件名 def trcd_nonote(): p1 = r"^\s{4}(?:X|\W)\s{2}(C\d\d\d)\s.+\n"
p2 = r"^\s{4}(?:X|\W)\s{2}C\d\d\d\s(.+)\n"
p3 = r"^\s{7}Desc:\s(.+\.)\n"
p4 = r"^\s{7}Desc:\s(.+[^\.])\n"
p5 = r"^\s{13}(.+[^\.])\n"
p6 = r"^\s{13}(.+\.)\n"
pattern1 = re.compile(p1)
pattern2 = re.compile(p2)
pattern3 = re.compile(p3)
pattern4 = re.compile(p4)
pattern5 = re.compile(p5)
pattern6 = re.compile(p6)
fr = open(filename)
temp = ();
flag = 0
for line in fr.readlines():
matcher1 = re.findall(pattern1,line)
matcher2 = re.findall(pattern2,line)
matcher3 = re.findall(pattern3,line)
matcher4 = re.findall(pattern4,line)
matcher5 = re.findall(pattern5,line)
matcher6 = re.findall(pattern6,line)
#print matcher
w2 = open(ss+'trcd_nonote%s.txt'%year,'a')#a代表追加 w代表重写
if matcher1:
flag = 1
w2.write("\n")
for j in matcher1:
for k in j:
w2.write(k)
#for k in g:
#w2.write(k)
#continue;
if ((matcher2!=[])and(flag ==1)):
flag = 2
#print type(tup1)
#print tup1
#flag = 2
w2.write(",")
for j in matcher2:
for k in j:
w2.write(k)
if ((matcher3!=[])and(flag ==2)):
flag = 3
w2.write(",\"")
for j in matcher3:
for k in j:
w2.write(k)
w2.write("\"")
if (matcher4!=[]):
w2.write(",\"")
for j in matcher4:
for k in j:
w2.write(k)
flag = 4
if ((matcher5!=[])and(flag ==4)):
flag = 5
w2.write(" ")
for j in matcher5:
for k in j:
w2.write(k)
if ((matcher6!=[])and(flag ==4 or 5)):
flag = 6
w2.write(" ")
for j in matcher6:
for k in j:
w2.write(k)
w2.write("\"")
w2.close( ) def trcd_note(): p1 = r"^(?:\s{7}|X\s{6}|\W\s{6})([A-Z][0-9]{3})\s[A-Z].+$"#匹配1001
p2 = r"^\s{7}Note:\s\n"#Note
p3= r"^\s{13}([^ ].+)\n"#Note内容
p4= r"^(?:-|컴)+\n"
pattern1 = re.compile(p1)
pattern2 = re.compile(p2)
pattern3 = re.compile(p3)
pattern4 = re.compile(p4) fr = open(filename)
w2 = open(ss+'trcd_note%s.txt'%year,'a')#a代表追加 w代表重写
# temp = ();
flag = 0
flag1=0
for line in fr.readlines():
matcher1 = re.findall(pattern1,line)
matcher2 = re.findall(pattern2,line)
matcher3 = re.findall(pattern3,line)
matcher4 = re.findall(pattern4,line) #print matcher if matcher1!=[]:
flag = 1
w2.write("\n")
# for j in matcher1: # w2.write(j) if ((matcher2!=[])and(flag == 1)):
flag = 2
flag1=1
# w2.write(",")
if flag1==1:
if ((matcher3!=[])and(flag ==2 or 3)):
flag = 3
w2.write(" ")
for j in matcher3: w2.write(j)
# w2.write(")
if ((matcher4!=[])and(flag == 3)):
flag=0
flag1=0
w2.write("\n")
w2.close( )
fr.close() def join(): f1= open(ss+'trcd_note%s.txt'%year)
f2 =open(ss+'trcd_nonote%s.txt'%year) list_note=[]
for line1 in f1:
# print(line1)
if line1.isspace():
list_note.append('')
else:
list_note.append(line1) f1.close() # print(list_note)
f2_w= open(ss+'trcd%s.csv'%year,'a')
# for i in range(len(list_note)):
i=0
# f2_r = open(ss+'/new/%s_w.txt'%list_tag[i])
for line2 in f2: str11="%s,\"%s\"\n"%(line2.strip('\n'),list_note[i].strip('\n'))
i=i+1
# print(i)
# print(str11)
f2_w.write(str11) f2_w.close()
f2.close()
if __name__ == '__main__':
trcd_nonote()
trcd_note()
join()

trcd_extract_EDCD_new的更多相关文章

随机推荐

  1. C# 事件 event 【转】

    C#事件(event)解析   事件(event),这个词儿对于初学者来说,往往总是显得有些神秘,不易弄懂.而这些东西却往往又是编程中常用且非常重要的东西.大家都知道windows消息处理机制的重要, ...

  2. SQL数据库简单的建立与操作

    数据类型 符号标志 整数型 bigint,int,smallint,mediumint,tinyint 精确数值型 decimal,numeric 浮点型 float,real,double 位型 b ...

  3. OpenOffice Word文档转换成Html格式

    为什么会想起来将上传的word文档转换成html格式呢?设想,如果一个系统需要发布在页面的文章都是来自word文档,一般会执行下面的流程:使用word打开文档,Ctrl+A,进入发布文章页面,Ctrl ...

  4. hibernate经常报的几个不起眼的错误, 都是因为不细心或者手滑了输入有误造成了

    最近会经常用到hibernate了, 也经常报错, 看着屏幕上一根大红条是非常不爽的, 这几天集合了一下经常报的错误, 整合一下放到这里, 以后再出现这个错误直接去解决就好了 1, org.hiber ...

  5. LibreOJ 6277 数列分块入门 1(分块)

    题解:感谢hzwer学长和loj让本蒟蒻能够找到如此合适的入门题做. 这是一道非常标准的分块模板题,本来用打标记的线段树不知道要写多少行,但是分块只有这么几行,极其高妙. 代码如下: #include ...

  6. C#.net随机数函数

    (1)Random rnd = new Random(); int rndNum = rnd.Next();           //int 取值范围内的随机数 int rndNum = rnd.Ne ...

  7. [leetcode]113. Path Sum II路径和(返回路径)

    Given a binary tree and a sum, find all root-to-leaf paths where each path's sum equals the given su ...

  8. [leetcode]403. Frog Jump青蛙过河

    A frog is crossing a river. The river is divided into x units and at each unit there may or may not ...

  9. RedisUtil工具类

    转载:http://blog.csdn.net/liuxiao723846/article/details/50401406 1.使用了jedis客户端,对redis进行了封装,包括: 1)使用了re ...

  10. yii使用gii创建后台模块与widget使用

    yii使用gii创建后台模块与widget使用 1.在protected/config/main.php中打开gii的配置属性. 'gii'=>array( 'class'=>'syste ...