# -*- coding:utf-8 -*-
import re ss="./data/"
year = '17A'
filename = ss+'EDSD%s.txt'%year '''
适应新版本 ''' p1 = r"^\s{4}(?:X|\W)\s{2}([A-Z]{3})\s\s.+\n"#TC
p2 = r"(^\d{3})\s{4}C\d{3}.+[CM]\s+\d\n"#
p3 = r"^\d{3}\s{4}(C\d{3}).+[CM]\s+\d\n"#C552
p4 = r"^\d{3}\s{4}C\d{3}.+([CM])\s+\d\n"#M
p5 = r"^\d{3}\s{4}C\d{3}.+[CM]\s+(\d)\n"#
p6= r"(^\d{3})\s{4}\d{4}.+[CM]\s{4}\d\s.*\.\.\d+\n|(^\d{3})\s{4}\w\d{3}\s.+[^\d]$\n"#单独的030
p7 =r"^\d{3}\s{4}(\d{4}).+[CM]\s{4}\d\s.*\.\.\d+\n|^\d{3}\s{4}(\w\d{3})\s.+[^\d]$\n"#单独的3286
p8 =r"^\d{3}\s{4}\d{4}.+([CM])\s{4}\d\s.*\.\.\d+\n|^\s{12}[A-Z].+([CM])\s{4}\d\s.*\.\.\d+\n"#单独的M
p9 =r"^\d{3}\s{4}\d{4}.+[CM]\s{4}(\d)\s.*\.\.\d+\n|^\s{12}[A-Z].+[CM]\s{4}(\d)\s.*\.\.\d+\n"#单独的1
p10 =r"^\d{3}\s{4}\d{4}.+[CM]\s{4}\d\s(.*\.\.\d+)\n|^\s{12}[A-Z].+[CM]\s{4}\d\s(.*\.\.\d+)\n"#单独的an..35 pattern1 = re.compile(p1)
pattern2 = re.compile(p2)
pattern3 = re.compile(p3)
pattern4 = re.compile(p4)
pattern5 = re.compile(p5)
pattern6 = re.compile(p6)
pattern7 = re.compile(p7)
pattern8 = re.compile(p8)
pattern9 = re.compile(p9)
pattern10 = re.compile(p10) fr = open(filename)
temp = ();
flag = 0
for line in fr.readlines():
matcher1 = re.findall(pattern1,line)
matcher2 = re.findall(pattern2,line)
matcher3 = re.findall(pattern3,line)
matcher4 = re.findall(pattern4,line)
matcher5 = re.findall(pattern5,line)
matcher6 = re.findall(pattern6,line)
matcher7 = re.findall(pattern7,line)
matcher8 = re.findall(pattern8,line)
matcher9 = re.findall(pattern9,line)
matcher10 = re.findall(pattern10,line)
#print matcher
w2 = open(ss+'b2_%s.csv'%year,'a')#a代表追加 w代表重写
if (matcher1!=[]):
for g in matcher1:
flag = 1
temp = g
if ((matcher2!=[])and(flag ==1 or 2)):
flag = 2
w2.write("\n"+temp+",")
for j in matcher2:
for k in j:
w2.write(k)
if ((matcher3!=[])and(flag ==2)):
flag = 3
w2.write(",")
for j in matcher3:
for k in j:
w2.write(k)
#复合的缺省为0000
w2.write(",0000")
if ((matcher4!=[])and(flag ==3)):
flag = 4
w2.write(",")
for j in matcher4:
for k in j:
w2.write(k)
#增加固定列year
w2.write(","+year)
if ((matcher5!=[])and(flag ==4)):
flag = 5
w2.write(",")
for j in matcher5:
for k in j:
w2.write(k)
w2.write(", ")
# print len(matcher6)
if(len(matcher6)==1 and matcher6!=[''] ): flag = 6
w2.write("\n"+temp+",")
for j in matcher6:
for k in j:
w2.write(k)
#单独的缺省为C000
w2.write(",C000")
if ((matcher7!=[])and(flag ==6)):
flag = 7
w2.write(",")
for j in matcher7:
for k in j:
w2.write(k)
if ((matcher8!=[])and(flag ==7)):
flag = 8
w2.write(",")
for j in matcher8:
for k in j:
w2.write(k)
#增加固定列year
w2.write(","+year)
if ((matcher9!=[])and(flag ==8)):
flag = 9
w2.write(",")
for j in matcher9:
for k in j:
w2.write(k)
if ((matcher10!=[])and(flag ==9)):
flag = 10
w2.write(",")
for j in matcher10:
for k in j:
w2.write(k)
w2.close( ) """
特殊情况 """

b2_trsd_EDSD_new的更多相关文章

随机推荐

  1. 11 python shutil 模块

      shutil 模块 高级的 文件.文件夹.压缩包 处理模块 1.将文件内容拷贝到另一个文件中 import shutil f1 = open('os_模块.py','r',encoding='ut ...

  2. tomcat的catalina.out日志文件过大

    今天发现一个服务器的/opt目录数据过大,最后发现是tomcat中的catalina.out日志过大引起的 用du命令查看opt下一层的数据文件大小 [root@ccssapportalp opt]# ...

  3. 两个关于URL解析的例子

    例一: /* 解析URL查寻串中的name=value参数对 将name=value对存储在对象属性中,并返回对象 alert(getQuery().name) */ function getQuer ...

  4. PowerEdge服务器生命周期控制器:Lifecycle Controller

    戴尔从第11代服务器开始推出生命周期控制器(简称LC,即Lifecycle Controller).生命周期控制器(LC)通过在主板上部署的控制芯片和闪存,与BMC以及iDRAC卡配合,在服务器的整个 ...

  5. UGUI 实例预制对象位置不对

    public static Object Instantiate(Object original, Transform parent, bool instantiateInWorldSpace); / ...

  6. ASP.NET MVC 项目文件夹结构

    首先,打开Visual Studio, 新建一个demo 项目的solution,选择 Blank Soution. 第二步,创建文件夹,按自己的需求创建.在这个Demo 中,我将创建4个文件夹. P ...

  7. Extending Conductor

    后端 导体提供了可插拔的后端.目前的实现使用Dynomite. 每个后端需要实现4个接口: //Store for workflow and task definitions com.netflix. ...

  8. 安装Anaconda以及jupyter的使用

    1)下载https://www.anaconda.com/download/ 2)安装 3)终端查看(Anaconda Prompt) 4)升级所有的包 初次安装下的软件包版本一般都比较老旧,因此提前 ...

  9. Gviz

    1) Introduction 为了理解基因组数据,通常旨在在基因组浏览器中绘制这样的数据,以及各种基因组注释特征,例如基因或转录物模型,CpG岛,重复区域等.这些功能可以从ENSEMBL或UCSC等 ...

  10. Infinity,NaN

    常量 说明 Infinity 表示正无穷大的特殊值. -Infinity 表示负无穷大的特殊值. NaN Number 数据类型的一个特殊成员,用来表示“非数字”(NaN) 值. undefined ...