tred_extract_EDED_new
# -*- coding:utf-8 -*-
import re '''
适应新版本
''' year='17a'#用户自定义
ss='./data/'#根目录
filename = ss+'EDED%s.txt'%year#输入文件名 def tred_nonote(): p1 = r"^(?:\s{5}|X\s{4}|\W\s{4})(\d\d\d\d)\s\s[A-Z].+\]$"#匹配1001
p2 = r"^(?:\s{5}|X\s{4}|\W\s{4})\d\d\d\d\s\s([A-Z].+)\s+\[[A-Z]\]$"
p3 = r"^(?:\s{5}|X\s{4}|\W\s{4})\d\d\d\d\s\s[A-Z].+\s+\[([A-Z])\]$"
p4 = r"^\s{5}Desc:\s(.+\w\w\.)\n" p5 = r"^\s{5}Desc:\s(.+[^\.]|.+\.g\.)\n"#非以.结尾的Desc
p6 = r"^\s{11}(.+\.)\n"#非以.结尾的Desc的第二行
p7 = r"^\s{5}Repr:\s(.+)\n"#Repr pattern1 = re.compile(p1)
pattern2 = re.compile(p2)
pattern3 = re.compile(p3)
pattern4 = re.compile(p4)
pattern5 = re.compile(p5)
pattern6 = re.compile(p6)
pattern7 = re.compile(p7) fr = open(filename)
temp = ();
flag = 0
for line in fr.readlines():
matcher1 = re.findall(pattern1,line)
matcher2 = re.findall(pattern2,line)
matcher3 = re.findall(pattern3,line)
matcher4 = re.findall(pattern4,line)
matcher5 = re.findall(pattern5,line)
matcher6 = re.findall(pattern6,line)
matcher7 = re.findall(pattern7,line) w2 = open(ss+'tred_nonote%s.txt'%year,'a')#a代表追加 w代表重写
if matcher1:
flag = 1
w2.write("\n")
for j in matcher1:
for k in j:
w2.write(k) if ((matcher2!=[])and(flag ==1)):
flag = 2 w2.write(",")
for j in matcher2:
for k in j:
w2.write(k)
if ((matcher3!=[])and(flag ==2)):
flag = 3
# w2.write(",")
for j in matcher3:
for k in j:
w2.write(k)
if ((matcher4!=[])and(flag ==3)):
flag = 4
w2.write(",\"")
for j in matcher4:
for k in j:
w2.write(k)
w2.write("\"")
if ((matcher5!=[])and(flag ==3 or 5)):
flag = 5
w2.write(",\"")
for j in matcher5:
for k in j:
w2.write(k)
if ((matcher6!=[])and(flag ==5)):
flag = 6
w2.write(" ")
for j in matcher6:
for k in j:
w2.write(k)
w2.write("\"")
if ((matcher7!=[])and(flag ==4 or 6)):
flag = 7
w2.write(",")
for j in matcher7:
for k in j:
w2.write(k) w2.close( ) def tred_note(): p1 = r"^(?:\s{5}|X\s{4}|\W\s{4})(\d\d\d\d)\s\s[A-Z].+\]$"#匹配1001
p2 = r"^\s{5}Note:\s\n"#Note
p3= r"^\s{11}([^ ].+)\n"#Note内容
p4= r"^(?:-|컴)+\n"
pattern1 = re.compile(p1)
pattern2 = re.compile(p2)
pattern3 = re.compile(p3)
pattern4 = re.compile(p4) fr = open(filename)
w2 = open(ss+'tred_note%s.txt'%year,'a')#a代表追加 w代表重写
# temp = ();
flag = 0
flag1=0
for line in fr.readlines():
matcher1 = re.findall(pattern1,line)
matcher2 = re.findall(pattern2,line)
matcher3 = re.findall(pattern3,line)
matcher4 = re.findall(pattern4,line) #print matcher if matcher1!=[]:
flag = 1
w2.write("\n")
# for j in matcher1: # w2.write(j) if ((matcher2!=[])and(flag == 1)):
flag = 2
flag1=1
# w2.write(",")
if flag1==1:
if ((matcher3!=[])and(flag ==2 or 3)):
flag = 3
w2.write(" ")
for j in matcher3: w2.write(j)
# w2.write(")
if ((matcher4!=[])and(flag == 3)):
flag=0
flag1=0
w2.write("\n")
w2.close( )
fr.close() def join(): f1= open(ss+'tred_note%s.txt'%year)
f2 =open(ss+'tred_nonote%s.txt'%year) list_note=[]
for line1 in f1:
# print(line1)
if line1.isspace():
list_note.append('')
else:
list_note.append(line1) f1.close() # print(list_note)
f2_w= open(ss+'tred%s.csv'%year,'a')
# for i in range(len(list_note)):
i=0
# f2_r = open(ss+'/new/%s_w.txt'%list_tag[i])
for line2 in f2: str11="%s,\"%s\"\n"%(line2.strip('\n'),list_note[i].strip('\n'))
i=i+1
# print(i)
# print(str11)
f2_w.write(str11) f2_w.close()
f2.close()
if __name__ == '__main__':
tred_nonote()
tred_note()
join()
tred_extract_EDED_new的更多相关文章
随机推荐
- jenkins 修改工作目录
修改Jenkins路径 Jenkins的默认安装路径是/var/lib/jenkins 现在由于这个根目录的磁盘太小,所以切换到/data 目录下. Jenkins目录.端口.工作目录等信息在/etc ...
- 通过beego快速创建一个Restful风格API项目及API文档自动化(转)
通过beego快速创建一个Restful风格API项目及API文档自动化 本文演示如何快速(一分钟内,不写一行代码)的根据数据库及表创建一个Restful风格的API项目,及提供便于在线测试API的界 ...
- Haskell语言学习笔记(51)Comonad
Comonad class Functor w => Comonad w where extract :: w a -> a duplicate :: w a -> w (w a) ...
- ssh 免密码登录,以及 本地和远端用户名不一致 问题
ssh 远程登录 ssh -l u1 u1@192.168.0.7 ssh u1@192.168.0.7 每次远程都要输入 用户名,密码 比较麻烦.所以比较好的是免密码登录 1.安装ssh服务器 su ...
- web项目生成web.xml的两种方式
做了很多的项目,今天着手写个小demo发现做web项目的时候还需要从别的地方去拷贝,那么如果没有地方可以拷贝,要怎么办呢?下边介绍三种方式生成web.xml文件. 一.maven项目情况:(STS版) ...
- Cookie的Domain属性
Cookie 加了Domain后就写不进去了(不加domain就可以写进去了) 本地测试的时候需要把domain换成localhost cookie跨域的问题,意思就是说A.com下能访问B.com域 ...
- PTA 习题集5-18 打印选课学生名单(哈希)
假设全校有最多40000名学生和最多2500门课程.现给出每个学生的选课清单,要求输出每门课的选课学生名单. 输入格式: 输入的第一行是两个正整数:N(≤40000),为全校学生总数:K(≤2500) ...
- React Native 首次加载白屏优化
RN首次加载都会有个白屏过程,一般都会有500ms+的白屏时间,原生页面开发同样的页面会能够快速显示而在RN页面中有个明显的等待过程,这个会影响用户体验. 1.使用过渡页面 简单处理可以在白屏过程中加 ...
- C#数字类型输出字符串时保留指定小数位数的方法
1.使用占位符: 1)float f = 321.12345F;f.ToString("0.00");这样做无论f是不是整数,都将加上2位小数. 2)float f = 321.1 ...
- discuz回贴通知插件实现-显示用户状态设置
一.完善用户是否开启回贴通知 回贴通知插件建立模块时,指定了核心文件post_set.inc.php 1.创立好数据库 建立独立数据表(不建议直接修改原有的discuz数据库,防止discuz升级 ...