trsd_extract_EDSD_new
# -*- coding:utf-8 -*-
import re '''
适应新版本
''' year='17A'#用户自定义
ss='./data/'#根目录
filename = ss+'EDSD%s.txt'%year#输入文件名 def trsd_nonote(): p1 = r"^\s{4}(?:X|\W)\s{2}([A-Z]{3})\s\s.+\n"#TCC
p2 = r"\s{4}(?:X|\W)\s{2}[A-Z]{3}\s\s(.+)\n"
"""
Function: To specify information regarding the transport
such as mode of transport, means of transport,
its conveyance reference number and the
identification of the means of transport.
"""
p3 = r"^\s{7}Function:\s(.+\w\w\.)\n"
p4 = r"^\s{7}Function:\s(.+\.g\.|.+[^\.])\n"
# p4 = r"^\s{7}Function:\s(.+[\.g\.|[^\.]])\n"
p5 = r"^\s{17}(\w.+[^\.])\n"
p6 = r"^\s{17}(.+\.)\n" #Note
# p7 = r"^\s{7}Note:\s\n"#Note
# p8= r"^\s{12}([A-Z].+\.)\n"#Note内容只有1行
# p9 = r"^\s{12}(.+[^\.]|)\n"#Note内容只多行的非最后行
# p10 = r"^\s{12}(.+\.)\n"#Note内容只多行的最后行 pattern1 = re.compile(p1)
pattern2 = re.compile(p2)
pattern3 = re.compile(p3)
pattern4 = re.compile(p4)
pattern5 = re.compile(p5)
pattern6 = re.compile(p6)
fr = open(filename)
# temp = "";
flag = 0
for line in fr.readlines():
matcher1 = re.findall(pattern1,line)
matcher2 = re.findall(pattern2,line)
matcher3 = re.findall(pattern3,line)
matcher4 = re.findall(pattern4,line)
matcher5 = re.findall(pattern5,line)
matcher6 = re.findall(pattern6,line)
#print matcher
w2 = open(ss+'trsd_nonote%s.txt'%year,'a')#a代表追加 w代表重写
if matcher1:
flag = 1
w2.write("\n")
for j in matcher1:
# for k in j:
w2.write(j)
if ((matcher2!=[])and(flag ==1)):
flag = 2
w2.write(",")
for j in matcher2:
# for k in j:
w2.write(j)
if ((matcher3!=[])and(flag ==2)):
flag = 3
#防止有逗号,用双引号括起
w2.write(",\"")
for j in matcher3:
# for k in j:
w2.write(j)
w2.write("\"")
if ((matcher4!=[])and(flag ==2)):
flag = 4
w2.write(",\"")
for j in matcher4:
# for k in j:
w2.write(j)
if ((matcher5!=[])and(flag ==4 or 5)):
flag = 5
w2.write(" ")
for j in matcher5:
# for k in j:
w2.write(j)
# w2.write("\"")
if ((matcher6!=[])and(flag ==4 or flag==5)):
flag = 6
w2.write(" ")
for j in matcher6:
# for k in j:
w2.write(j)
w2.write("\"")
w2.close( ) def trsd_note(): p1 = r"^(?:\s{7}|X\s{6}|\W\s{6})([A-Z]{3})\s\s[A-Z].+$"#匹配1001
p2 = r"^\s{7}Note:\s\n"#Note
p3= r"^\s{12}([^ ].+)\n"#Note内容
p4= r"^(?:-|컴)+\n"
pattern1 = re.compile(p1)
pattern2 = re.compile(p2)
pattern3 = re.compile(p3)
pattern4 = re.compile(p4) fr = open(filename)
w2 = open(ss+'trsd_note%s.txt'%year,'a')#a代表追加 w代表重写
# temp = ();
flag = 0
flag1=0
for line in fr.readlines():
matcher1 = re.findall(pattern1,line)
matcher2 = re.findall(pattern2,line)
matcher3 = re.findall(pattern3,line)
matcher4 = re.findall(pattern4,line) #print matcher if matcher1!=[]:
flag = 1
w2.write("\n")
# for j in matcher1: # w2.write(j) if ((matcher2!=[])and(flag == 1)):
flag = 2
flag1=1
# w2.write(",")
if flag1==1:
if ((matcher3!=[])and(flag ==2 or 3)):
flag = 3
w2.write(" ")
for j in matcher3: w2.write(j)
# w2.write(")
if ((matcher4!=[])and(flag == 3)):
flag=0
flag1=0
w2.write("\n")
w2.close( )
fr.close() def join(): f1= open(ss+'trsd_note%s.txt'%year)
f2 =open(ss+'trsd_nonote%s.txt'%year) list_note=[]
for line1 in f1:
# print(line1)
if line1.isspace():
list_note.append('')
else:
list_note.append(line1) f1.close() # print(list_note)
f2_w= open(ss+'trsd%s.csv'%year,'a')
# for i in range(len(list_note)):
i=0
# f2_r = open(ss+'/new/%s_w.txt'%list_tag[i])
for line2 in f2: str11="%s,\"%s\"\n"%(line2.strip('\n'),list_note[i].strip('\n'))
i=i+1
# print(i)
# print(str11)
f2_w.write(str11) f2_w.close()
f2.close()
if __name__ == '__main__':
trsd_nonote()
trsd_note()
join()
trsd_extract_EDSD_new的更多相关文章
随机推荐
- Python之类属性的增删改查
#类属性又称为静态变量,或者是静态数据,这些数据是他们所属的类对象绑定的,不依赖于任何类实例 class ChinesePeople: country = 'china' def __init__(s ...
- 更改linux swappiness 提高物理内存使用率
swappiness的值的大小对如何使用swap分区是有着很大的联系的. swappiness=0的时候表示最大限度使用物理内存,然后才是 swap空间,swappiness=100的时候表示积极的使 ...
- Haskell语言学习笔记(58)Bifoldable
Bifoldable class Bifoldable p where bifold :: Monoid m => p m m -> m bifold = bifoldMap id id ...
- 抽象类(abstract class)
package com.bjsxt.oop.abstractClass; //抽象类 public abstract class Animal { //因为父类的方法总是被重写 那就没写的必要了 但是 ...
- c#栈的习题2
—.单项选择题1.栈和队列具有相同的( ). A.抽象数据类型 B.逻辑结构 C.存储结构 D.运算2.栈是(). A.顺序存储的线性结构 B.链式存储的非线性结 ...
- linux 安装SAMtools,bcftools,htslib,sratoolkit,bedtools,GATK,TrimGalore,qualimap,vcftools,bwa
--------------------安装Samtools---------------------------------------------------------------------- ...
- python获取当前日期
今天群里一个人问了怎么获取当前时间的问题,以前接触过计算日期之差的,具体代码如下: import datetime d1=datetime.datetime(2014,3,14) d2=datetim ...
- 微信公众平台开发之基于百度 BAE3.0 的开发环境搭建(采用 Baidu Eclipse)
3.通过 SVN 检入工程 在 bae 上的应用添加部署成功后,如图 7 点击“点击查看”按钮,会打开一个新页面,页面上会打印 “hello world” ,这是因为我们的应用包含有示 ...
- 排序矩阵中的从小到大第k个数 · Kth Smallest Number In Sorted Matrix
[抄题]: 在一个排序矩阵中找从小到大的第 k 个整数. 排序矩阵的定义为:每一行递增,每一列也递增. [思维问题]: 不知道应该怎么加,因为不是一维单调的. [一句话思路]: 周围两个数给x或y挪一 ...
- 【转】VS2012 中文版转英文版 英文版转中文版 界面语言切换
[1]下载VS2012的语言包,各种语言包都有,下载对应的即可. 微软官网衔接地址:vs2012 语言包 http://www.microsoft.com/zh-CN/download/detail ...