Commit 23205bdd authored by Carine Rey's avatar Carine Rey
Browse files

update msd output parsing

parent 695fb2e1
......@@ -4,7 +4,7 @@ open Bistro.EDSL
open Bistro_bioinfo.Std
open File_formats
let env = docker_image ~account:"carinerey" ~name:"msd" ~tag:"07232018" ()
let env = docker_image ~account:"carinerey" ~name:"msd" ~tag:"08082018" ()
let msd ~(faa:aminoacid_fasta workflow) ~(tree_sc:_ workflow) : [`msd] directory workflow =
let map_table = tmp // "map.tsv" in
......@@ -22,7 +22,7 @@ let msd ~(faa:aminoacid_fasta workflow) ~(tree_sc:_ workflow) : [`msd] directory
cmd "msd" ~env [
opt "-t" int 1;
opt "-o" ident out ;
opt "-e" float 1. ;
opt "-e" float 0.05 ;
dep tree_sc;
ident map_table;
dep faa;
......
......@@ -94,23 +94,10 @@ res_line = lines[1].strip()
res_line_split = res_line.split("\t")
n_sites=int(res_line_split[2])
prob_post=["NA"] * (n_sites)
try:
p_value = 1-float(res_line_split[5])
except:
p_value = 1
try:
convergent_sites = res_line_split[7].split(",")
except: # if no convergent sites, convergent sites field don't exist
convergent_sites = []
for cs in convergent_sites:
prob_post[int(cs) -1] = p_value
Sites = [i +1 for i in range(n_sites)]
df_final = pd.DataFrame({'Sites': Sites, '1MinusP' : prob_post})
df_final = pd.read_csv(InputFile.name, sep="\t", header=None, skiprows=[0,1,2], names = ["Sites", "p_value"])
df_final ["1MinusP"] = 1 - df_final["p_value"]
df_final = df_final[["Sites","1MinusP"]]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment