trcd_extract_EDCD_new
# -*- coding:utf-8 -*-
import re '''
适应新版本
''' year='17A'#用户自定义
ss='./data/'#根目录
filename = ss+'EDCD%s.txt'%year#输入文件名 def trcd_nonote(): p1 = r"^\s{4}(?:X|\W)\s{2}(C\d\d\d)\s.+\n"
p2 = r"^\s{4}(?:X|\W)\s{2}C\d\d\d\s(.+)\n"
p3 = r"^\s{7}Desc:\s(.+\.)\n"
p4 = r"^\s{7}Desc:\s(.+[^\.])\n"
p5 = r"^\s{13}(.+[^\.])\n"
p6 = r"^\s{13}(.+\.)\n"
pattern1 = re.compile(p1)
pattern2 = re.compile(p2)
pattern3 = re.compile(p3)
pattern4 = re.compile(p4)
pattern5 = re.compile(p5)
pattern6 = re.compile(p6)
fr = open(filename)
temp = ();
flag = 0
for line in fr.readlines():
matcher1 = re.findall(pattern1,line)
matcher2 = re.findall(pattern2,line)
matcher3 = re.findall(pattern3,line)
matcher4 = re.findall(pattern4,line)
matcher5 = re.findall(pattern5,line)
matcher6 = re.findall(pattern6,line)
#print matcher
w2 = open(ss+'trcd_nonote%s.txt'%year,'a')#a代表追加 w代表重写
if matcher1:
flag = 1
w2.write("\n")
for j in matcher1:
for k in j:
w2.write(k)
#for k in g:
#w2.write(k)
#continue;
if ((matcher2!=[])and(flag ==1)):
flag = 2
#print type(tup1)
#print tup1
#flag = 2
w2.write(",")
for j in matcher2:
for k in j:
w2.write(k)
if ((matcher3!=[])and(flag ==2)):
flag = 3
w2.write(",\"")
for j in matcher3:
for k in j:
w2.write(k)
w2.write("\"")
if (matcher4!=[]):
w2.write(",\"")
for j in matcher4:
for k in j:
w2.write(k)
flag = 4
if ((matcher5!=[])and(flag ==4)):
flag = 5
w2.write(" ")
for j in matcher5:
for k in j:
w2.write(k)
if ((matcher6!=[])and(flag ==4 or 5)):
flag = 6
w2.write(" ")
for j in matcher6:
for k in j:
w2.write(k)
w2.write("\"")
w2.close( ) def trcd_note(): p1 = r"^(?:\s{7}|X\s{6}|\W\s{6})([A-Z][0-9]{3})\s[A-Z].+$"#匹配1001
p2 = r"^\s{7}Note:\s\n"#Note
p3= r"^\s{13}([^ ].+)\n"#Note内容
p4= r"^(?:-|컴)+\n"
pattern1 = re.compile(p1)
pattern2 = re.compile(p2)
pattern3 = re.compile(p3)
pattern4 = re.compile(p4) fr = open(filename)
w2 = open(ss+'trcd_note%s.txt'%year,'a')#a代表追加 w代表重写
# temp = ();
flag = 0
flag1=0
for line in fr.readlines():
matcher1 = re.findall(pattern1,line)
matcher2 = re.findall(pattern2,line)
matcher3 = re.findall(pattern3,line)
matcher4 = re.findall(pattern4,line) #print matcher if matcher1!=[]:
flag = 1
w2.write("\n")
# for j in matcher1: # w2.write(j) if ((matcher2!=[])and(flag == 1)):
flag = 2
flag1=1
# w2.write(",")
if flag1==1:
if ((matcher3!=[])and(flag ==2 or 3)):
flag = 3
w2.write(" ")
for j in matcher3: w2.write(j)
# w2.write(")
if ((matcher4!=[])and(flag == 3)):
flag=0
flag1=0
w2.write("\n")
w2.close( )
fr.close() def join(): f1= open(ss+'trcd_note%s.txt'%year)
f2 =open(ss+'trcd_nonote%s.txt'%year) list_note=[]
for line1 in f1:
# print(line1)
if line1.isspace():
list_note.append('')
else:
list_note.append(line1) f1.close() # print(list_note)
f2_w= open(ss+'trcd%s.csv'%year,'a')
# for i in range(len(list_note)):
i=0
# f2_r = open(ss+'/new/%s_w.txt'%list_tag[i])
for line2 in f2: str11="%s,\"%s\"\n"%(line2.strip('\n'),list_note[i].strip('\n'))
i=i+1
# print(i)
# print(str11)
f2_w.write(str11) f2_w.close()
f2.close()
if __name__ == '__main__':
trcd_nonote()
trcd_note()
join()
trcd_extract_EDCD_new的更多相关文章
随机推荐
- 迷你MVVM框架 avalonjs 1.3.9发布
本次升级,avalon改进了许多内部方法,大大提升性能,并且带来异步刷新视图的新功能. ms-html内部不再使用异步 head元素中的avalon元素加入ms-skip指令 重构计算属性,现在超级轻 ...
- Hbase—学习笔记(一)
此文的目的: 1.重点理解Hbase的整体工作机制 2.熟悉编程api,能够用来写程序 1. 什么是HBASE 1.1. 概念特性 HBASE是一个数据库----可以提供数据的实时随机读写 HB ...
- SpringBoot yml properties文件
一.在SpringBoot实现属性注入: 1).添加pom依赖jar包: 1 <!-- 支持 @ConfigurationProperties 注解 --> 2 <!-- https ...
- Easyui-datagrid显示时间的格式化代码
{field: 'Time', title: '时间', formatter: function (value, row, index) { var date = new Date(value); v ...
- 第六章 图(c)广度优先搜索
- 如何学习mybatis
最近几天学习了mybatis框架,我是mybatis视频学习的.看这篇文章,我建议首先要会熟练使用MVC架构,再学习这个框架. 在我们写传统的MVC模式写Bean,Dao,Servlet时,我们每次调 ...
- phpStudy3——往数据库中添加数据
前言: 前边介绍了查询数据库的方法,这里介绍下往数据库中添加数据的方法. 项目需求: 用户在前端页面输入的用户名和手机号码,点击提交后后端判断手机号码是否已经存在.如果不存在,那么插入数据库到数据库, ...
- OOP的几个不常用的方法
from OOP_多态 import cat c = cat("cat") print(c.__doc__) print(cat.__doc__) # # 打印类的描述信息,也就是 ...
- Linux跑火车,提升趣味性
實現跑火車[可陶冶情操,愉悦心情]##下载yum源[root@localhost ~]# wget http://mirror.centos.org/centos/7/extras/x86_64/P ...
- ScrollView listView gridView 之间的冲突问题
在ScrollView中的listView gridView添加适配器之后添加//设置gridView整体的高度 public void setListViewHeightBasedOnChildre ...