PPI_network&calc

 # -*- coding: utf-8 -*-

 # __author__ = 'JieYao'

 from biocluster.agent import Agent

 from biocluster.tool import Tool

 import os

 import types

 import subprocess

 from biocluster.core.exceptions import OptionError

 class PpinetworkAgent(Agent):

     """

     需要calc_ppi.py

     version 1.0

     author: JieYao

     last_modified: 2016.8.15

     """

     def __init__(self, parent):

         super(PpinerworkAgent, self).__init__(parent)

         options = [

             {"name": "ppitable", "type": "infile"},

             {"name": "cut", "type": "string", "default": "-1"}

             ]

         self.add_option(options)

         self.step.add_steps('PpinetworkAnalysis')

         self.on('start', self.step_start)

         self.on('end', self.step.end)

     def step_start(self):

         self.step.PpinetworkAnalysis.start()

         self.step.update()

     def step_end(self):

         self.step.PpinetworkAnalysis.finish()

         self.step.update()

     def check_options(self):

         """

         重写参数检查

         """

         if not self.option('ppitable').is_set():

             raise OptionError('必须提供PPI网络表')

         if not os.path.exists(self.option('ppitable')):

             raise OptionError('PPI网络表路径错误')

         ppi_list = open( self.option('ppitable'), "r").readline.strip().split("\t")

         if "combined_score" not in ppi_list:

             raise OptionError("PPI网络表缺少结合分数")

         if ("yfrom" not in ppi_list) or ("to" not in ppi_list):

             raise OptionError("PPI网络缺少相互作用蛋白信息")

         try:

             eval(self.option('cut'))

         except:

             raise OptionError("Cut参数值异常，无法转换")

         return True

     def set_resource(self):

         """

         设置所需资源

         """

         self._cpu = 2

         self._memory = ''

     def end():

         result_dir = self.add_upload_dir(self.output_dir)

         result_dir.add_repath_rules([

                 [".", "", "PPI网络分析结果输出目录"],

                 ["./protein_interaction_network_centrality.txt", "txt", "PPI网络中心系数表"],

                 ["./protein_interaction_network_clustering.txt", "txt", "PPI网络节点聚类系数表"],

                 ["./protein_interaction_network_transitivity.txt", "txt", "PPI网络传递性"],

                 ["./protein_interaction_network_by_cut.txt", "txt", "Cut值约束后的PPI网络"]

                 ["./protein_interaction_network_degree_distribution.txt", "txt", "PPI网络度分布表"],

                 ["./protein_interaction_network_node_degree.txt", "txt", "PPI网络节点度属性表"]

                 ])

         print self.get_upload_files()

         super(PpinetworkAgent, self).end()

 class PpinetworkTool(Tool):

     def __init__(self, config):

         super(PpinetworkTool, self).__init__(config)

         self._version = "1.0.1"

         self.cmd_path = self.config.SOFTWARE_DIR + "/bioinfo/rna/scripts/calc_ppi.py"

         self.ppi_table = self.option('ppitable')

         self.out_files = ['protein_interaction_network_centrality.txt', 'protein_interaction_network_clustering.txt', 'protein_interaction_network_transitivity.txt', 'protein_interaction_network_by_cut.txt', 'protein_interaction_network_degree_distribution.txt', 'protein_interaction_network_node_degree.txt']

     def run(self):

         """

         运行

         """

         super(PpinetworkTool, self).run()

         self.run_ppi_network_py()

     def run_ppi_network_py(self):

         """

         运行calc_ppi.py

         """

         real_ppi_table = self.ppi_table

         cmd = self.config.SOFTWARE_DIR + '.program/Python/bin/python'

         cmd += self.cmd_path

         cmd += " -i %s -o %s" %(real_ppi_table, self.work_dir + '.ppi_network')

         if self.option("cut").is_set:

             cmd += " -c %s" %(self.option('cut'))

         self.logger.info("开始运行calc_ppi.py")

         try:

             subprocess.check_output(cmd, shell=True)

             self.logger.info('PPI_Network计算完成')

         except subprocess.CalledProcessError:

             self.logger.info('PPI_Network计算失败')

             self.ser_error("运行calc_ppi.py失败")

         allfiles = self.get_filesname()

         for i in range(len(self.out_files)):

             self.linkfile(allfiles[i], self.out_files[i])

         self.end()

     def linkfile(self, oldfile, newname):

         """

         link文件到output文件夹

         :param oldfile 资源文件路径

         :param newname 新的文件名

         :return

         """

         newpath = os.path.join(self.output_dir, newname)

         if os.path.exists(newpath):

             os.remove(newpath)

         os.link(oldfile, newpath)

     def get_filesname(self):

         files_status = [None, None, None, None, None, None]

         for paths,d,filelist in os.walk(self.work_dir + '/ppi_network'):

             for filename in filelist:

                 name = os.path.join(paths, filename)

                 for i in range(len(self.out_files)):

                     if self.out_files[i] in name:

                         files_status[i] = name

         for i in range(len(self.out_files)):

             if not files_status[i]:

                 self.set_error('未知原因，结果文件生成出错或丢失')

         return files_status

 # -*- coding: utf-8 -*-

 # __author__ = 'JieYao'

 import os

 import argparse

 from biocluster.config import Config

 import shutil

 import networkx

 global name_list

 name_list = [""]

 def search(node_name):

     global name_list

     for i in range(len(name_list)):

         if node_name == name_list[i]:

             return i

     name_list += [node_name]

     return len(name_list)-1

 parser = argparse.ArgumentParser(description='输入蛋白质相互作用网络，输出网络信息')

 parser.add_argument('-i', "--PPI_network", help="输入的PPI网络", required = True)

 parser.add_argument('-c', "--cut", help='蛋白相互作用阈值', required = False)

 parser.add_argument('-o', "--output", help = "输出文件输出路径", required = True)

 #parser.add_argument('-top', "--top", help = "First k important interaction in graph", required = False)

 args = vars(parser.parse_args())

 inFile = args["PPI_network"]

 outFile = args["output"]

 if not args["cut"]:

     cut = -1

 else:

     cut = args["cut"]

 G = networkx.Graph()

 with open(inFile, "r") as tmp_file:

     data = tmp_file.readlines()

 for i in range(1,len(data)):

     s = data[i].rstrip().split("\t")

     if eval(s[15]) >= cut:

         G.add_edge(search(s[0]), search(s[1]), weight = eval(s[15]))

 Transitivity = networkx.transitivity(G)

 Clustering = networkx.clustering(G)

 Degree_distribution = networkx.degree_histogram(G)

 Degree_Centrality = networkx.degree_centrality(G)

 Closeness_Centrality = networkx.closeness_centrality(G)

 Betweenness_Centrality = networkx.betweenness_centrality(G)

 with open(os.path.join(args["output"], "protein_interaction_network_degree_distribution.txt"), "w") as tmp_file:

     tmp_file.write("Degree\tNode_Num\n")

     for i in range(len(Degree_distribution)):

         tmp_file.write(str(i)+"\t"+str(Degree_distribution[i]))

 with open(os.path.join(args["output"], "protein_interaction_network_by_cut.txt"), "w") as tmp_file:

     tmp_file.write("Node_Num = " + str(len(G.nodes())) + "\n")

     tmp_file.write("Edge_Num = " + str(len(G.edges())) + "\n")

     tmp_file.write("Node1_Name\tNode2_Name\tWeight\n")

     for i in G.edges():

         tmp_file.write(name_list[i[0]]+"\t"+name_list[i[1]]+"\t"+str(G[i[0]][i[1]]["weight"])+"\n")

 with open(os.path.join(args["output"], "protein_interaction_network_node_degree.txt"), "w") as tmp_file:

     tmp_file.write("Node_ID\tNode_Name\tDegree\n")

     for i in range(1,len(G)+1):

         tmp_file.write(str(i)+"\t"+name_list[i]+"\t")

         tmp_file.write(str(G.degree(i))+"\n")

 with open(os.path.join(args["output"], "protein_interaction_network_centrality.txt"), "w") as tmp_file:

     tmp_file.write("Node_ID\tNode_Name\tDegree_Centrality\t")

     tmp_file.write("Closeness_Centrality\tBetweenness_Centrality\n")

     for i in range(1,len(G)+1):

         tmp_file.write(str(i)+"\t"+name_list[i]+"\t")

         tmp_file.write(str(Degree_Centrality[i])+"\t")

         tmp_file.write(str(Closeness_Centrality[i])+"\t")

         tmp_file.write(str(Betweenness_Centrality[i])+"\n")

 with open(os.path.join(args["output"], "protein_interaction_network_clustering.txt"), "w") as tmp_file:

     tmp_file.write("Node_ID\tProtein_Name\tClustering\n")

     for i in range(1,len(G)+1):

         tmp_file.write(str(i)+"\t"+name_list[i]+"\t"+str(Clustering[i])+"\n")

 with open(os.path.join(args["output"], "protein_interaction_network_transitivity.txt"), "w") as tmp_file:

     tmp_file.write("Transitivity\n")

     tmp_file.write(str(Transitivity)+"\n")

calc_ppi

PPI_network&calc_ppi的更多相关文章

随机推荐

深入浅出Zookeeper
能找到的一些zookeeper的资料一上来不是扯一通paxos算法就是一大坨一大坨的代码.很多人对zookeeper更多的是听过,所以这一篇文章就尝试用尽可能用精简的语言科普zookeeper. zo ...
Mysql数据库插入的中文字段值显示问号的问题解决
最近我使用myeclipse连接mysql数据库查询表中的数据,表中字段值为中文的字段显示问号,查了很多资料将解决方法总结如下: 步骤一:修改mysql数据库的配置文件my.ini或者my-defau ...
HDU 5656 CA Loves GCD （数论DP）
CA Loves GCD 题目链接: http://acm.hust.edu.cn/vjudge/contest/123316#problem/B Description CA is a fine c ...
Spring + JdbcTemplate + JdbcDaoSupport examples
In Spring JDBC development, you can use JdbcTemplate and JdbcDaoSupport classes to simplify the over ...
Linux 系统监控和诊断工具：lsof
1.lsof 简介 lsof 是 Linux 下的一个非常实用的系统级的监控.诊断工具. 它的意思是 List Open Files,很容易你就记住了它是 “ls + of”的组合~ 它可以用来列出被 ...
Tomcat参数配置
一.调整JVM参数 JAVA_OPTS= -server -Xms256m -Xmx1024m 注: -server: 启用服务器模式一定要作为第一个参数,如果CPU多,服务器机建议使用此项 -Xm ...
Simple guide to Java Message Service (JMS) using ActiveMQ
JMS let’s you send messages containing for example a String, array of bytes or a serializable Java o ...
DbHelper数据操作类
摘要:本文介绍一下DbHelper数据操作类微软的企业库中有一个非常不错的数据操作类.但是,不少公司(起码我遇到的几个...),对一些"封装"了些什么的东西不太敢用,虽然我推荐过 ...
MPAndroidChart 的实现
效果图: 代码实现: package com.jiahao.me; import java.util.ArrayList; import java.util.List; import android. ...
Codeforces Round #274 (Div. 1) C. Riding in a Lift 前缀和优化dp
C. Riding in a Lift Time Limit: 1 Sec Memory Limit: 256 MB 题目连接 http://codeforces.com/contest/480/pr ...

PPI_network&calc_ppi

PPI_network&calc_ppi的更多相关文章

随机推荐

热门专题