PPI_network&calc_ppi
# -*- coding: utf-8 -*-
# __author__ = 'JieYao'
from biocluster.agent import Agent
from biocluster.tool import Tool
import os
import types
import subprocess
from biocluster.core.exceptions import OptionError class PpinetworkAgent(Agent):
"""
需要calc_ppi.py
version 1.0
author: JieYao
last_modified: 2016.8.15
""" def __init__(self, parent):
super(PpinerworkAgent, self).__init__(parent)
options = [
{"name": "ppitable", "type": "infile"},
{"name": "cut", "type": "string", "default": "-1"}
]
self.add_option(options)
self.step.add_steps('PpinetworkAnalysis')
self.on('start', self.step_start)
self.on('end', self.step.end) def step_start(self):
self.step.PpinetworkAnalysis.start()
self.step.update() def step_end(self):
self.step.PpinetworkAnalysis.finish()
self.step.update() def check_options(self):
"""
重写参数检查
"""
if not self.option('ppitable').is_set():
raise OptionError('必须提供PPI网络表')
if not os.path.exists(self.option('ppitable')):
raise OptionError('PPI网络表路径错误')
ppi_list = open( self.option('ppitable'), "r").readline.strip().split("\t")
if "combined_score" not in ppi_list:
raise OptionError("PPI网络表缺少结合分数")
if ("yfrom" not in ppi_list) or ("to" not in ppi_list):
raise OptionError("PPI网络缺少相互作用蛋白信息")
try:
eval(self.option('cut'))
except:
raise OptionError("Cut参数值异常,无法转换")
return True def set_resource(self):
"""
设置所需资源
"""
self._cpu = 2
self._memory = '' def end():
result_dir = self.add_upload_dir(self.output_dir)
result_dir.add_repath_rules([
[".", "", "PPI网络分析结果输出目录"],
["./protein_interaction_network_centrality.txt", "txt", "PPI网络中心系数表"],
["./protein_interaction_network_clustering.txt", "txt", "PPI网络节点聚类系数表"],
["./protein_interaction_network_transitivity.txt", "txt", "PPI网络传递性"],
["./protein_interaction_network_by_cut.txt", "txt", "Cut值约束后的PPI网络"]
["./protein_interaction_network_degree_distribution.txt", "txt", "PPI网络度分布表"],
["./protein_interaction_network_node_degree.txt", "txt", "PPI网络节点度属性表"]
])
print self.get_upload_files()
super(PpinetworkAgent, self).end() class PpinetworkTool(Tool):
def __init__(self, config):
super(PpinetworkTool, self).__init__(config)
self._version = "1.0.1"
self.cmd_path = self.config.SOFTWARE_DIR + "/bioinfo/rna/scripts/calc_ppi.py"
self.ppi_table = self.option('ppitable')
self.out_files = ['protein_interaction_network_centrality.txt', 'protein_interaction_network_clustering.txt', 'protein_interaction_network_transitivity.txt', 'protein_interaction_network_by_cut.txt', 'protein_interaction_network_degree_distribution.txt', 'protein_interaction_network_node_degree.txt'] def run(self):
"""
运行
"""
super(PpinetworkTool, self).run()
self.run_ppi_network_py() def run_ppi_network_py(self):
"""
运行calc_ppi.py
"""
real_ppi_table = self.ppi_table
cmd = self.config.SOFTWARE_DIR + '.program/Python/bin/python'
cmd += self.cmd_path
cmd += " -i %s -o %s" %(real_ppi_table, self.work_dir + '.ppi_network')
if self.option("cut").is_set:
cmd += " -c %s" %(self.option('cut'))
self.logger.info("开始运行calc_ppi.py") try:
subprocess.check_output(cmd, shell=True)
self.logger.info('PPI_Network计算完成')
except subprocess.CalledProcessError:
self.logger.info('PPI_Network计算失败')
self.ser_error("运行calc_ppi.py失败")
allfiles = self.get_filesname()
for i in range(len(self.out_files)):
self.linkfile(allfiles[i], self.out_files[i])
self.end() def linkfile(self, oldfile, newname):
"""
link文件到output文件夹
:param oldfile 资源文件路径
:param newname 新的文件名
:return
"""
newpath = os.path.join(self.output_dir, newname)
if os.path.exists(newpath):
os.remove(newpath)
os.link(oldfile, newpath) def get_filesname(self):
files_status = [None, None, None, None, None, None]
for paths,d,filelist in os.walk(self.work_dir + '/ppi_network'):
for filename in filelist:
name = os.path.join(paths, filename)
for i in range(len(self.out_files)):
if self.out_files[i] in name:
files_status[i] = name
for i in range(len(self.out_files)):
if not files_status[i]:
self.set_error('未知原因,结果文件生成出错或丢失')
return files_status
# -*- coding: utf-8 -*-
# __author__ = 'JieYao' import os
import argparse
from biocluster.config import Config
import shutil
import networkx global name_list
name_list = [""] def search(node_name):
global name_list
for i in range(len(name_list)):
if node_name == name_list[i]:
return i
name_list += [node_name]
return len(name_list)-1 parser = argparse.ArgumentParser(description='输入蛋白质相互作用网络,输出网络信息')
parser.add_argument('-i', "--PPI_network", help="输入的PPI网络", required = True)
parser.add_argument('-c', "--cut", help='蛋白相互作用阈值', required = False)
parser.add_argument('-o', "--output", help = "输出文件输出路径", required = True)
#parser.add_argument('-top', "--top", help = "First k important interaction in graph", required = False)
args = vars(parser.parse_args()) inFile = args["PPI_network"]
outFile = args["output"]
if not args["cut"]:
cut = -1
else:
cut = args["cut"] G = networkx.Graph()
with open(inFile, "r") as tmp_file:
data = tmp_file.readlines()
for i in range(1,len(data)):
s = data[i].rstrip().split("\t")
if eval(s[15]) >= cut:
G.add_edge(search(s[0]), search(s[1]), weight = eval(s[15])) Transitivity = networkx.transitivity(G)
Clustering = networkx.clustering(G)
Degree_distribution = networkx.degree_histogram(G)
Degree_Centrality = networkx.degree_centrality(G)
Closeness_Centrality = networkx.closeness_centrality(G)
Betweenness_Centrality = networkx.betweenness_centrality(G)
with open(os.path.join(args["output"], "protein_interaction_network_degree_distribution.txt"), "w") as tmp_file:
tmp_file.write("Degree\tNode_Num\n")
for i in range(len(Degree_distribution)):
tmp_file.write(str(i)+"\t"+str(Degree_distribution[i]))
with open(os.path.join(args["output"], "protein_interaction_network_by_cut.txt"), "w") as tmp_file:
tmp_file.write("Node_Num = " + str(len(G.nodes())) + "\n")
tmp_file.write("Edge_Num = " + str(len(G.edges())) + "\n")
tmp_file.write("Node1_Name\tNode2_Name\tWeight\n")
for i in G.edges():
tmp_file.write(name_list[i[0]]+"\t"+name_list[i[1]]+"\t"+str(G[i[0]][i[1]]["weight"])+"\n")
with open(os.path.join(args["output"], "protein_interaction_network_node_degree.txt"), "w") as tmp_file:
tmp_file.write("Node_ID\tNode_Name\tDegree\n")
for i in range(1,len(G)+1):
tmp_file.write(str(i)+"\t"+name_list[i]+"\t")
tmp_file.write(str(G.degree(i))+"\n")
with open(os.path.join(args["output"], "protein_interaction_network_centrality.txt"), "w") as tmp_file:
tmp_file.write("Node_ID\tNode_Name\tDegree_Centrality\t")
tmp_file.write("Closeness_Centrality\tBetweenness_Centrality\n")
for i in range(1,len(G)+1):
tmp_file.write(str(i)+"\t"+name_list[i]+"\t")
tmp_file.write(str(Degree_Centrality[i])+"\t")
tmp_file.write(str(Closeness_Centrality[i])+"\t")
tmp_file.write(str(Betweenness_Centrality[i])+"\n") with open(os.path.join(args["output"], "protein_interaction_network_clustering.txt"), "w") as tmp_file:
tmp_file.write("Node_ID\tProtein_Name\tClustering\n")
for i in range(1,len(G)+1):
tmp_file.write(str(i)+"\t"+name_list[i]+"\t"+str(Clustering[i])+"\n") with open(os.path.join(args["output"], "protein_interaction_network_transitivity.txt"), "w") as tmp_file:
tmp_file.write("Transitivity\n")
tmp_file.write(str(Transitivity)+"\n")
calc_ppi
PPI_network&calc_ppi的更多相关文章
随机推荐
- HDU 5783 Divide the Sequence (贪心)
Divide the Sequence 题目链接: http://acm.hdu.edu.cn/showproblem.php?pid=5783 Description Alice has a seq ...
- [iOS UI进阶 - 3.0] 触摸事件的基本处理
A.需要掌握和练习的 1.介绍事件类型2.通过按钮的事件处理引出view的事件处理3.响应者对象 --> UIResponder --> UIView4.view的拖拽* 实现触摸方法,打 ...
- [iOS基础控件 - 6.9] 聊天界面Demo
A.需求 做出一个类似于QQ.微信的聊天界面 1.每个cell包含发送时间.发送人(头像).发送信息 2.使用对方头像放在左边,我方头像在右边 3.对方信息使用白色背景对话框,我方信息使用蓝色背景对话 ...
- Chrome的Postman的使用
Chrome提供了一个很好的Web App 名为 Postman 使用这个web app,你可以输入一个url,然后可以很清楚的看到返回的各种结果 直接在Google中输入Postman, 找到它 ...
- 关于 jquery cookie的用法
东钿微信公众平台新版上线 需要一个引导用户操作步骤.设置一个cookie师傅偶第一次访问此页面 .如果是则跳出用户引导,如果不是,正常显示. 一开始在百度了一段jquery cookie插件,也没仔细 ...
- 关于Token
Token Token,即计算机术语:令牌 令牌是一种能够控制站点占有媒体的特殊帧,以区别数据帧及其他控制帧.token其实说的更通俗点可以叫暗号,在一些数据传输之前,要先进行暗号的核对,不同的暗号被 ...
- 对PostgreSQL中 pg_各表的RelationId的认识
读取普通的table或者系统表,都会调用heap_open函数: /* ---------------- * heap_open - open a heap relation by relation ...
- 【HTML】心愿墙 Demo展示
这是跟着一个大神做的心愿墙,当时觉得有趣,现在清理磁盘中,所以就放到博客园中进行保存. 效果如下: 下载地址:点击下载
- ZOJ 3633 Alice's present 倍增 区间查询最大值
Alice's present Time Limit: 1 Sec Memory Limit: 256 MB 题目连接 http://acm.hust.edu.cn/vjudge/contest/vi ...
- 进程和cpu的相关知识和简单调优方案
进程就是一段执行的程序,每当一个程序执行时.对于操作系统本身来说,就创建了一个进程,而且分配了相应的资源.进程能够分为3个类别: 1.交互式进程(I/O) 2.批处理进程 (CPU) ...