# -*- coding: utf-8 -*-
# __author__ = 'JieYao'
from biocluster.agent import Agent
from biocluster.tool import Tool
import os
import types
import subprocess
from biocluster.core.exceptions import OptionError class PpinetworkAgent(Agent):
"""
需要calc_ppi.py
version 1.0
author: JieYao
last_modified: 2016.8.15
""" def __init__(self, parent):
super(PpinerworkAgent, self).__init__(parent)
options = [
{"name": "ppitable", "type": "infile"},
{"name": "cut", "type": "string", "default": "-1"}
]
self.add_option(options)
self.step.add_steps('PpinetworkAnalysis')
self.on('start', self.step_start)
self.on('end', self.step.end) def step_start(self):
self.step.PpinetworkAnalysis.start()
self.step.update() def step_end(self):
self.step.PpinetworkAnalysis.finish()
self.step.update() def check_options(self):
"""
重写参数检查
"""
if not self.option('ppitable').is_set():
raise OptionError('必须提供PPI网络表')
if not os.path.exists(self.option('ppitable')):
raise OptionError('PPI网络表路径错误')
ppi_list = open( self.option('ppitable'), "r").readline.strip().split("\t")
if "combined_score" not in ppi_list:
raise OptionError("PPI网络表缺少结合分数")
if ("yfrom" not in ppi_list) or ("to" not in ppi_list):
raise OptionError("PPI网络缺少相互作用蛋白信息")
try:
eval(self.option('cut'))
except:
raise OptionError("Cut参数值异常,无法转换")
return True def set_resource(self):
"""
设置所需资源
"""
self._cpu = 2
self._memory = '' def end():
result_dir = self.add_upload_dir(self.output_dir)
result_dir.add_repath_rules([
[".", "", "PPI网络分析结果输出目录"],
["./protein_interaction_network_centrality.txt", "txt", "PPI网络中心系数表"],
["./protein_interaction_network_clustering.txt", "txt", "PPI网络节点聚类系数表"],
["./protein_interaction_network_transitivity.txt", "txt", "PPI网络传递性"],
["./protein_interaction_network_by_cut.txt", "txt", "Cut值约束后的PPI网络"]
["./protein_interaction_network_degree_distribution.txt", "txt", "PPI网络度分布表"],
["./protein_interaction_network_node_degree.txt", "txt", "PPI网络节点度属性表"]
])
print self.get_upload_files()
super(PpinetworkAgent, self).end() class PpinetworkTool(Tool):
def __init__(self, config):
super(PpinetworkTool, self).__init__(config)
self._version = "1.0.1"
self.cmd_path = self.config.SOFTWARE_DIR + "/bioinfo/rna/scripts/calc_ppi.py"
self.ppi_table = self.option('ppitable')
self.out_files = ['protein_interaction_network_centrality.txt', 'protein_interaction_network_clustering.txt', 'protein_interaction_network_transitivity.txt', 'protein_interaction_network_by_cut.txt', 'protein_interaction_network_degree_distribution.txt', 'protein_interaction_network_node_degree.txt'] def run(self):
"""
运行
"""
super(PpinetworkTool, self).run()
self.run_ppi_network_py() def run_ppi_network_py(self):
"""
运行calc_ppi.py
"""
real_ppi_table = self.ppi_table
cmd = self.config.SOFTWARE_DIR + '.program/Python/bin/python'
cmd += self.cmd_path
cmd += " -i %s -o %s" %(real_ppi_table, self.work_dir + '.ppi_network')
if self.option("cut").is_set:
cmd += " -c %s" %(self.option('cut'))
self.logger.info("开始运行calc_ppi.py") try:
subprocess.check_output(cmd, shell=True)
self.logger.info('PPI_Network计算完成')
except subprocess.CalledProcessError:
self.logger.info('PPI_Network计算失败')
self.ser_error("运行calc_ppi.py失败")
allfiles = self.get_filesname()
for i in range(len(self.out_files)):
self.linkfile(allfiles[i], self.out_files[i])
self.end() def linkfile(self, oldfile, newname):
"""
link文件到output文件夹
:param oldfile 资源文件路径
:param newname 新的文件名
:return
"""
newpath = os.path.join(self.output_dir, newname)
if os.path.exists(newpath):
os.remove(newpath)
os.link(oldfile, newpath) def get_filesname(self):
files_status = [None, None, None, None, None, None]
for paths,d,filelist in os.walk(self.work_dir + '/ppi_network'):
for filename in filelist:
name = os.path.join(paths, filename)
for i in range(len(self.out_files)):
if self.out_files[i] in name:
files_status[i] = name
for i in range(len(self.out_files)):
if not files_status[i]:
self.set_error('未知原因,结果文件生成出错或丢失')
return files_status
 # -*- coding: utf-8 -*-
# __author__ = 'JieYao' import os
import argparse
from biocluster.config import Config
import shutil
import networkx global name_list
name_list = [""] def search(node_name):
global name_list
for i in range(len(name_list)):
if node_name == name_list[i]:
return i
name_list += [node_name]
return len(name_list)-1 parser = argparse.ArgumentParser(description='输入蛋白质相互作用网络,输出网络信息')
parser.add_argument('-i', "--PPI_network", help="输入的PPI网络", required = True)
parser.add_argument('-c', "--cut", help='蛋白相互作用阈值', required = False)
parser.add_argument('-o', "--output", help = "输出文件输出路径", required = True)
#parser.add_argument('-top', "--top", help = "First k important interaction in graph", required = False)
args = vars(parser.parse_args()) inFile = args["PPI_network"]
outFile = args["output"]
if not args["cut"]:
cut = -1
else:
cut = args["cut"] G = networkx.Graph()
with open(inFile, "r") as tmp_file:
data = tmp_file.readlines()
for i in range(1,len(data)):
s = data[i].rstrip().split("\t")
if eval(s[15]) >= cut:
G.add_edge(search(s[0]), search(s[1]), weight = eval(s[15])) Transitivity = networkx.transitivity(G)
Clustering = networkx.clustering(G)
Degree_distribution = networkx.degree_histogram(G)
Degree_Centrality = networkx.degree_centrality(G)
Closeness_Centrality = networkx.closeness_centrality(G)
Betweenness_Centrality = networkx.betweenness_centrality(G)
with open(os.path.join(args["output"], "protein_interaction_network_degree_distribution.txt"), "w") as tmp_file:
tmp_file.write("Degree\tNode_Num\n")
for i in range(len(Degree_distribution)):
tmp_file.write(str(i)+"\t"+str(Degree_distribution[i]))
with open(os.path.join(args["output"], "protein_interaction_network_by_cut.txt"), "w") as tmp_file:
tmp_file.write("Node_Num = " + str(len(G.nodes())) + "\n")
tmp_file.write("Edge_Num = " + str(len(G.edges())) + "\n")
tmp_file.write("Node1_Name\tNode2_Name\tWeight\n")
for i in G.edges():
tmp_file.write(name_list[i[0]]+"\t"+name_list[i[1]]+"\t"+str(G[i[0]][i[1]]["weight"])+"\n")
with open(os.path.join(args["output"], "protein_interaction_network_node_degree.txt"), "w") as tmp_file:
tmp_file.write("Node_ID\tNode_Name\tDegree\n")
for i in range(1,len(G)+1):
tmp_file.write(str(i)+"\t"+name_list[i]+"\t")
tmp_file.write(str(G.degree(i))+"\n")
with open(os.path.join(args["output"], "protein_interaction_network_centrality.txt"), "w") as tmp_file:
tmp_file.write("Node_ID\tNode_Name\tDegree_Centrality\t")
tmp_file.write("Closeness_Centrality\tBetweenness_Centrality\n")
for i in range(1,len(G)+1):
tmp_file.write(str(i)+"\t"+name_list[i]+"\t")
tmp_file.write(str(Degree_Centrality[i])+"\t")
tmp_file.write(str(Closeness_Centrality[i])+"\t")
tmp_file.write(str(Betweenness_Centrality[i])+"\n") with open(os.path.join(args["output"], "protein_interaction_network_clustering.txt"), "w") as tmp_file:
tmp_file.write("Node_ID\tProtein_Name\tClustering\n")
for i in range(1,len(G)+1):
tmp_file.write(str(i)+"\t"+name_list[i]+"\t"+str(Clustering[i])+"\n") with open(os.path.join(args["output"], "protein_interaction_network_transitivity.txt"), "w") as tmp_file:
tmp_file.write("Transitivity\n")
tmp_file.write(str(Transitivity)+"\n")

calc_ppi

PPI_network&calc_ppi的更多相关文章

随机推荐

  1. HDU 5783 Divide the Sequence (贪心)

    Divide the Sequence 题目链接: http://acm.hdu.edu.cn/showproblem.php?pid=5783 Description Alice has a seq ...

  2. [iOS UI进阶 - 3.0] 触摸事件的基本处理

    A.需要掌握和练习的 1.介绍事件类型2.通过按钮的事件处理引出view的事件处理3.响应者对象 --> UIResponder --> UIView4.view的拖拽* 实现触摸方法,打 ...

  3. [iOS基础控件 - 6.9] 聊天界面Demo

    A.需求 做出一个类似于QQ.微信的聊天界面 1.每个cell包含发送时间.发送人(头像).发送信息 2.使用对方头像放在左边,我方头像在右边 3.对方信息使用白色背景对话框,我方信息使用蓝色背景对话 ...

  4. Chrome的Postman的使用

    Chrome提供了一个很好的Web App 名为 Postman 使用这个web app,你可以输入一个url,然后可以很清楚的看到返回的各种结果 直接在Google中输入Postman, 找到它   ...

  5. 关于 jquery cookie的用法

    东钿微信公众平台新版上线 需要一个引导用户操作步骤.设置一个cookie师傅偶第一次访问此页面 .如果是则跳出用户引导,如果不是,正常显示. 一开始在百度了一段jquery cookie插件,也没仔细 ...

  6. 关于Token

    Token Token,即计算机术语:令牌 令牌是一种能够控制站点占有媒体的特殊帧,以区别数据帧及其他控制帧.token其实说的更通俗点可以叫暗号,在一些数据传输之前,要先进行暗号的核对,不同的暗号被 ...

  7. 对PostgreSQL中 pg_各表的RelationId的认识

    读取普通的table或者系统表,都会调用heap_open函数: /* ---------------- * heap_open - open a heap relation by relation ...

  8. 【HTML】心愿墙 Demo展示

    这是跟着一个大神做的心愿墙,当时觉得有趣,现在清理磁盘中,所以就放到博客园中进行保存. 效果如下: 下载地址:点击下载

  9. ZOJ 3633 Alice's present 倍增 区间查询最大值

    Alice's present Time Limit: 1 Sec Memory Limit: 256 MB 题目连接 http://acm.hust.edu.cn/vjudge/contest/vi ...

  10. 进程和cpu的相关知识和简单调优方案

    进程就是一段执行的程序,每当一个程序执行时.对于操作系统本身来说,就创建了一个进程,而且分配了相应的资源.进程能够分为3个类别:     1.交互式进程(I/O)     2.批处理进程 (CPU) ...