Commit ddbc8651 authored by Carine Rey's avatar Carine Rey
Browse files

update tdg09 indicator

parent a6e57745
......@@ -88,7 +88,7 @@ let plot_merge_results ~(res_by_tools:result list) ~tree ~faa ~tsv : svg workflo
| `Identical_WAG _ -> "Identical_WAG01"
| `Topological_LG _ -> "Topological_LG08"
| `Topological_WAG _ -> "Topological_WAG01"
| `Tdg09 _ -> "Tdg09"
| `Tdg09 _ -> "Tdg09_1-FDR,Tdg09_prob_post"
in
string opt
) |> seq ~sep:","
......@@ -103,7 +103,7 @@ let plot_merge_results ~(res_by_tools:result list) ~tree ~faa ~tsv : svg workflo
| `Identical_WAG _ -> "Identical_WAG01:0.9"
| `Topological_LG _ -> "Topological_LG08:0.9"
| `Topological_WAG _ -> "Topological_WAG01:0.9"
| `Tdg09 _ -> "Tdg09:0.9"
| `Tdg09 _ -> "Tdg09_1-FDR:0.9,Tdg09_prob_post:0.9"
in
string opt
) |> seq ~sep:","
......
......@@ -109,6 +109,7 @@ df_final["Sites"] = pd.to_numeric(df_final["Sites"].str.replace('[','').str.repl
df_final["Topological"] = map(prob_ap, df_final["lnL_conv"],df_final["lnL_noconv"])
df_final = df_final[["Sites","Topological"]]
#===================================================================================================
# Create output files
#===================================================================================================
......
......@@ -5,6 +5,7 @@
import argparse
import pandas as pd
import sys
import math
# LIGNE ESSAI
# python parse_results_tdg09.py -ftdg09 toto -o out
......@@ -30,12 +31,18 @@ args = parser.parse_args()
Tdg09File = args.file
OutFile = args.output
def prob_ap(x,y):
if x and y:
return math.exp(x-y)/(math.exp(x-y)+1)
else:
return 0
searchlines = Tdg09File.readlines()
Tdg09File.close()
n_sites=int(searchlines[9].split()[1])
fdr=["NA"] * (n_sites)
prob_post=["NA"] * (n_sites)
for i,line in enumerate(searchlines):
if "FullResults:\n" in line:
......@@ -44,14 +51,27 @@ for i,line in enumerate(searchlines):
if not '# Misc.' in l:
if l:
l=l.replace("- [", "").replace("]","").strip()
#The FullResults table lists further results from all sites. This includes the log-likelihood for the WAG+ssF (site-specific frequencies or *homogeneous model*) and WAG+lssF (lineage and site-specific frequencies or *non-homogeneous model*). Conserved locations are not analysed, so their entries are 'NA'. Finally, after the FullResults table, the output file contains details of the per-location results, such as the amino acid frequencies estimated by the ssF and lssF models.
# Site, WAG+ssF params, WAG+ssF lnL, WAG+lssF params, WAG+lssF params, delta lnL, dof, LRT, FDR
ll=l.split(",")
(Site,_,_,_,_,_,_,LRT,FDR) = ll
Site = Site.strip()
LRT = LRT.strip()
FDR = FDR.strip()
(Site,_,lnLnoconv,_,lnLconv,_,_,LRT,FDR) = ll
Site = Site.strip()
lnLconv = lnLconv.strip()
lnLnoconv = lnLnoconv.strip()
FDR = FDR.strip()
if Site and FDR != "NA":
fdr[int(Site)-1]=1-float(FDR)
FDR = float(FDR)
fdr[int(Site)-1]=1-FDR
if Site and lnLconv != "NA" and lnLnoconv != "NA":
lnLconv = float(lnLconv)
lnLnoconv = float(lnLnoconv)
prob = prob_ap(lnLconv, lnLnoconv)
prob_post[int(Site)-1]=prob
else:
break
......@@ -59,10 +79,11 @@ for i,line in enumerate(searchlines):
Sites = [i +1 for i in range(n_sites)]
df_final = pd.DataFrame({'Sites': Sites,
'Tdg09' : fdr})
'Tdg09_1-FDR' : fdr,
'Tdg09_prob_post' : prob_post})
df_final = df_final[["Sites","Tdg09"]]
df_final = df_final[["Sites","Tdg09_1-FDR","Tdg09_prob_post"]]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment