Commit aeef86f7 authored by Carine Rey's avatar Carine Rey
Browse files

add sum_bl

parent a168c06f
...@@ -139,7 +139,7 @@ let fna2faa ~(fna:nucleotide_fasta workflow) : aminoacid_fasta workflow = ...@@ -139,7 +139,7 @@ let fna2faa ~(fna:nucleotide_fasta workflow) : aminoacid_fasta workflow =
] ]
] ]
let conf_file_bppseqman_fa2phy ~fna = let conf_file_bppseqman_fna2phy ~fna =
seq ~sep:"\n" [ seq ~sep:"\n" [
assign "input.sequence.file" (dep fna) ; assign "input.sequence.file" (dep fna) ;
assign "output.sequence.file" dest ; assign "output.sequence.file" dest ;
...@@ -150,11 +150,30 @@ let conf_file_bppseqman_fa2phy ~fna = ...@@ -150,11 +150,30 @@ let conf_file_bppseqman_fa2phy ~fna =
sequence.manip = sequence.manip =
|} |}
] ]
let conf_file_bppseqman_faa2phy ~faa =
seq ~sep:"\n" [
assign "alphabet" (string "Protein") ;
assign "input.sequence.file" (dep faa) ;
assign "output.sequence.file" dest ;
assign "output.sequence.format" (string "Phylip(order=interleaved, type=extended)") ;
string {| input.alignment = true
input.sequence.remove_stop_codons = no
input.sequence.sites_to_use = all
sequence.manip =
|}
]
let fna2phy ~(fna: nucleotide_fasta workflow) : nucleotide_phylip workflow =
workflow ~descr:"bppsuite.fna2phy_interleaved" [
cmd "bppseqman" ~env [
assign "param" (file_dump (conf_file_bppseqman_fna2phy ~fna)) ;
]
]
let fa2phy ~(fna: nucleotide_fasta workflow) : nucleotide_phylip workflow = let faa2phy ~(faa: aminoacid_fasta workflow) : aminoacid_phylip workflow =
workflow ~descr:"bppsuite.fa2phy_interleaved" [ workflow ~descr:"bppsuite.faa2phy_interleaved" [
cmd "bppseqman" ~env [ cmd "bppseqman" ~env [
assign "param" (file_dump (conf_file_bppseqman_fa2phy ~fna)) ; assign "param" (file_dump (conf_file_bppseqman_faa2phy ~faa)) ;
] ]
] ]
......
...@@ -35,10 +35,14 @@ val fna2faa : ...@@ -35,10 +35,14 @@ val fna2faa :
fna:nucleotide_fasta workflow -> fna:nucleotide_fasta workflow ->
aminoacid_fasta workflow aminoacid_fasta workflow
val fa2phy : val fna2phy :
fna: nucleotide_fasta workflow -> fna: nucleotide_fasta workflow ->
nucleotide_phylip workflow nucleotide_phylip workflow
val faa2phy :
faa: aminoacid_fasta workflow ->
aminoacid_phylip workflow
val paste_fna: val paste_fna:
fna_l: nucleotide_fasta workflow list -> fna_l: nucleotide_fasta workflow list ->
nucleotide_fasta workflow nucleotide_fasta workflow
...@@ -30,3 +30,7 @@ class type nucleotide_phylip = object ...@@ -30,3 +30,7 @@ class type nucleotide_phylip = object
inherit text_file inherit text_file
method format : [`Nucleotide] method format : [`Nucleotide]
end end
class type aminoacid_phylip = object
inherit text_file
method format : [`Aminoacid]
end
...@@ -6,7 +6,19 @@ open File_formats ...@@ -6,7 +6,19 @@ open File_formats
type phyml type phyml
let phyml ~tree phy : phyml directory workflow = type m =
|GTR
|LG
let phyml ~model ~tree phy : phyml directory workflow =
let m = match model with
| GTR -> "gtr"
| LG -> "lg"
in
let d = match model with
| GTR -> "nt"
| LG -> "aa"
in
let env = Env.env_phyml in let env = Env.env_phyml in
(*mpirun -np 4 phyml -o lr -u cyp_coding.ok.nwk -i cyp_coding.phy -d 'nt' -m gtr*) (*mpirun -np 4 phyml -o lr -u cyp_coding.ok.nwk -i cyp_coding.phy -d 'nt' -m gtr*)
let tmp_phy = dest // "tree.phy" in let tmp_phy = dest // "tree.phy" in
...@@ -22,11 +34,11 @@ let phyml ~tree phy : phyml directory workflow = ...@@ -22,11 +34,11 @@ let phyml ~tree phy : phyml directory workflow =
opt "-u" dep tree; opt "-u" dep tree;
opt "-o" string "lr"; opt "-o" string "lr";
opt "-i" ident tmp_phy ; opt "-i" ident tmp_phy ;
opt "-d" string "nt" ; opt "-d" string d ;
opt "-m" string "gtr" ; opt "-m" string m ;
]; ];
]) ])
] ]
let phyml_tree ~tree phy : nw workflow = let phyml_tree ~model ~tree phy : nw workflow =
(phyml ~tree phy) / selector ["tree.phy_phyml_tree.txt"] (phyml ~model ~tree phy) / selector ["tree.phy_phyml_tree.txt"]
...@@ -279,7 +279,7 @@ let repo_of_dataset_results_l ~dataset_results_l = ...@@ -279,7 +279,7 @@ let repo_of_dataset_results_l ~dataset_results_l =
let derive_from_det_meth ~det_meth ~(dataset : Dataset.t) ~preview = let derive_from_det_meth ~det_meth ~(dataset : Dataset.t) ~preview =
let faa = dataset.dataset.faa in let faa = dataset.dataset.faa in
let fna = dataset.dataset.fna in let fna = dataset.dataset.fna in
let phy_n = Bppsuite.fa2phy ~fna in let phy_n = Bppsuite.fna2phy ~fna in
let tree_sc = Tree_dataset.tree dataset.dataset.tree_dataset `Detection in let tree_sc = Tree_dataset.tree dataset.dataset.tree_dataset `Detection in
let tree_id = Tree_dataset.tree dataset.dataset.tree_dataset `Simulation in let tree_id = Tree_dataset.tree dataset.dataset.tree_dataset `Simulation in
let diffsel_tree = Tree_dataset.diffsel_tree dataset.dataset.tree_dataset in let diffsel_tree = Tree_dataset.diffsel_tree dataset.dataset.tree_dataset in
......
...@@ -41,8 +41,9 @@ type simu_infos = { ...@@ -41,8 +41,9 @@ type simu_infos = {
tree_prefix: string ; tree_prefix: string ;
} }
type reinfered_tree = { type reinfered_trees = {
reinfered_tree : nw workflow ; reinfered_tree_nt : nw workflow ;
reinfered_tree_aa : nw workflow ;
input_tree : nhx workflow ; input_tree : nhx workflow ;
tree_prefix : string ; tree_prefix : string ;
model_prefix : string ; model_prefix : string ;
...@@ -52,6 +53,7 @@ type post_analyses_simu = { ...@@ -52,6 +53,7 @@ type post_analyses_simu = {
simu_infos_l : simu_infos list; simu_infos_l : simu_infos list;
simu_infos_plot : text_file workflow ; simu_infos_plot : text_file workflow ;
trees_plot : text_file workflow ; trees_plot : text_file workflow ;
bl_trees_tsv : text_file workflow ;
} }
let is_hyp ~hyp (dataset_results :dataset_res) = let is_hyp ~hyp (dataset_results :dataset_res) =
...@@ -166,7 +168,8 @@ let plot_trees ~reinfered_tree_l : plot_trees directory workflow = ...@@ -166,7 +168,8 @@ let plot_trees ~reinfered_tree_l : plot_trees directory workflow =
let env_r = Env.env_r in let env_r = Env.env_r in
let env_py = Env.env_py in let env_py = Env.env_py in
let cmd_cp_l = List.map reinfered_tree_l ~f:(fun rt -> [ let cmd_cp_l = List.map reinfered_tree_l ~f:(fun rt -> [
cmd "cp" [dep rt.reinfered_tree ; tmp // (rt.tree_prefix ^"@"^ rt.model_prefix ^ ".nw")]; cmd "cp" [dep rt.reinfered_tree_nt ; tmp // (rt.tree_prefix ^"@"^ rt.model_prefix ^ "@nt.nw")];
cmd "cp" [dep rt.reinfered_tree_aa ; tmp // (rt.tree_prefix ^"@"^ rt.model_prefix ^ "@aa.nw")];
cmd "cp" [dep rt.input_tree ; tmp // (rt.tree_prefix ^"@input_tree.nw")] cmd "cp" [dep rt.input_tree ; tmp // (rt.tree_prefix ^"@input_tree.nw")]
]) |> List.concat ]) |> List.concat
in in
...@@ -407,17 +410,20 @@ let post_analyses_simu_of_simu_dataset_l ~simu_dataset_l = ...@@ -407,17 +410,20 @@ let post_analyses_simu_of_simu_dataset_l ~simu_dataset_l =
) in ) in
let reinfered_tree_l = List.map simu_dataset_l ~f:(fun dataset -> let reinfered_tree_l = List.map simu_dataset_l ~f:(fun dataset ->
let rd = dataset.dataset in let rd = dataset.dataset in
let phy = Bppsuite.fa2phy rd.fna in let phy_nt = Bppsuite.fna2phy rd.fna in
let phy_aa = Bppsuite.faa2phy rd.faa in
let input_tree = rd.input_tree in let input_tree = rd.input_tree in
let reinfered_tree = Phyml.phyml_tree ~tree:input_tree phy in let reinfered_tree_nt = Phyml.phyml_tree ~model:GTR ~tree:input_tree phy_nt in
{reinfered_tree; input_tree; let reinfered_tree_aa = Phyml.phyml_tree ~model:LG ~tree:input_tree phy_aa in
{reinfered_tree_nt; reinfered_tree_aa; input_tree;
tree_prefix = dataset.tree_prefix ; tree_prefix = dataset.tree_prefix ;
model_prefix = dataset.model_prefix model_prefix = dataset.model_prefix
} }
) in ) in
let simu_infos_plot = group_simu_infos ~simu_infos_l / selector ["out.pdf"] in let simu_infos_plot = group_simu_infos ~simu_infos_l / selector ["out.pdf"] in
let trees_plot = plot_trees ~reinfered_tree_l / selector ["out.pdf"] in let trees_plot = plot_trees ~reinfered_tree_l / selector ["out.pdf"] in
{simu_infos_l; simu_infos_plot; trees_plot} let bl_trees_tsv = plot_trees ~reinfered_tree_l / selector ["out.tsv"] in
{simu_infos_l; simu_infos_plot; trees_plot; bl_trees_tsv}
...@@ -480,6 +486,7 @@ let repo_of_post_analyses_simu ~post_analyses_simu = ...@@ -480,6 +486,7 @@ let repo_of_post_analyses_simu ~post_analyses_simu =
Repo.[ Repo.[
item ["hypothesis_validation.pdf"] post_analyses_simu.simu_infos_plot ; item ["hypothesis_validation.pdf"] post_analyses_simu.simu_infos_plot ;
item ["trees_validation.pdf"] post_analyses_simu.trees_plot; item ["trees_validation.pdf"] post_analyses_simu.trees_plot;
item ["trees_bl.tsv"] post_analyses_simu.bl_trees_tsv;
] ]
; ;
(List.map post_analyses_simu.simu_infos_l ~f:(fun simu_infos -> (List.map post_analyses_simu.simu_infos_l ~f:(fun simu_infos ->
......
...@@ -24,11 +24,13 @@ let of_raw ?(descr="") (raw_dataset : Raw_dataset.t) = ...@@ -24,11 +24,13 @@ let of_raw ?(descr="") (raw_dataset : Raw_dataset.t) =
let repo rd = let repo rd =
let phy = (Bppsuite.fa2phy rd.fna) in let phy_nt = (Bppsuite.fna2phy rd.fna) in
let phy_aa = (Bppsuite.faa2phy rd.faa) in
Repo.[ Repo.[
[ [
item ["input_tree.nhx"] rd.input_tree ; item ["input_tree.nhx"] rd.input_tree ;
item ["recalculated_tree.nw"] (Phyml.phyml_tree ~tree:rd.input_tree phy ); item ["recalculated_tree_nt.nw"] (Phyml.phyml_tree ~model:GTR ~tree:rd.input_tree phy_nt );
item ["recalculated_tree_aa.nw"] (Phyml.phyml_tree ~model:LG ~tree:rd.input_tree phy_aa );
item ["tree.H0.node_ids" ] (Tree_dataset.nodes rd.tree_dataset H0_NeG5) ; item ["tree.H0.node_ids" ] (Tree_dataset.nodes rd.tree_dataset H0_NeG5) ;
item ["tree.Ha.node_ids" ] (Tree_dataset.nodes rd.tree_dataset HaPCOC) ; item ["tree.Ha.node_ids" ] (Tree_dataset.nodes rd.tree_dataset HaPCOC) ;
item ["tree.only_convergent_tags.nhx" ] (Tree_dataset.tree rd.tree_dataset `Detection) ; item ["tree.only_convergent_tags.nhx" ] (Tree_dataset.tree rd.tree_dataset `Detection) ;
...@@ -36,7 +38,8 @@ let repo rd = ...@@ -36,7 +38,8 @@ let repo rd =
item ["tree.diffsel" ] (Tree_dataset.diffsel_tree rd.tree_dataset) ; item ["tree.diffsel" ] (Tree_dataset.diffsel_tree rd.tree_dataset) ;
item ["tree.convergent_topology" ] (Tree_dataset.topological_tree rd.tree_dataset) ; item ["tree.convergent_topology" ] (Tree_dataset.topological_tree rd.tree_dataset) ;
item ["simulated_sequences.fna"] rd.fna ; item ["simulated_sequences.fna"] rd.fna ;
item ["simulated_sequences.phy"] phy ; item ["simulated_sequences_nt.phy"] phy_nt ;
item ["simulated_sequences_aa.phy"] phy_aa ;
item ["simulated_sequences.faa"] rd.faa ; item ["simulated_sequences.faa"] rd.faa ;
] ; ] ;
match rd.fna_infos with match rd.fna_infos with
......
...@@ -159,6 +159,8 @@ df_d_H0H0_NeG5NeG5_NeC1 = build_df_dist_couple(df_H0_NeG5, df_H0_NeG5_NeC_1, " ...@@ -159,6 +159,8 @@ df_d_H0H0_NeG5NeG5_NeC1 = build_df_dist_couple(df_H0_NeG5, df_H0_NeG5_NeC_1, "
df_d_H0H0_NeG1NeG1_NeC4 = build_df_dist_couple(df_H0_NeG1, df_H0_NeG1_NeC_4, "H0/H0 NeG1/NeG1_NeC_4") df_d_H0H0_NeG1NeG1_NeC4 = build_df_dist_couple(df_H0_NeG1, df_H0_NeG1_NeC_4, "H0/H0 NeG1/NeG1_NeC_4")
df_d_H0H0_NeG4NeG4_NeC1 = build_df_dist_couple(df_H0_NeG4, df_H0_NeG4_NeC_1, "H0/H0 NeG4/NeG4_NeC_1") df_d_H0H0_NeG4NeG4_NeC1 = build_df_dist_couple(df_H0_NeG4, df_H0_NeG4_NeC_1, "H0/H0 NeG4/NeG4_NeC_1")
df_d_H0_NeG1_HaPC_NeG1_NeC_4 = build_df_dist_couple(df_H0_NeG1, df_HaPC_NeG1_NeC_4, "H0 NeG1/HaPC NeG1_NeC_4")
df_d = rbind.data.frame( df_d = rbind.data.frame(
df_d_H0HaPCOC_NeG1, df_d_H0HaPCOC_NeG1,
df_d_H0HaPC_NeG1, df_d_H0HaPC_NeG1,
...@@ -176,6 +178,7 @@ df_d = rbind.data.frame( ...@@ -176,6 +178,7 @@ df_d = rbind.data.frame(
df_d_H0HaPC_NeG1_NeC_4, df_d_H0HaPC_NeG1_NeC_4,
df_d_H0H0_NeG4NeG4_NeC1, df_d_H0H0_NeG4NeG4_NeC1,
df_d_H0H0_NeG1NeG1_NeC4, df_d_H0H0_NeG1NeG1_NeC4,
df_d_H0_NeG1_HaPC_NeG1_NeC_4,
df_d_H0HaPC_NeG5_indel df_d_H0HaPC_NeG5_indel
) )
...@@ -268,6 +271,8 @@ df_H0HaPC_NeG1_NeC_4 = build_df_couple(df_H0_NeG1_NeC_4, df_HaPC_NeG1_NeC_4, " ...@@ -268,6 +271,8 @@ df_H0HaPC_NeG1_NeC_4 = build_df_couple(df_H0_NeG1_NeC_4, df_HaPC_NeG1_NeC_4, "
df_H0H0_NeG4NeG4_NeC_1 = build_df_couple(df_H0_NeG4, df_H0_NeG4_NeC_1, "H0/H0 NeG4/NeG4_NeC_1") df_H0H0_NeG4NeG4_NeC_1 = build_df_couple(df_H0_NeG4, df_H0_NeG4_NeC_1, "H0/H0 NeG4/NeG4_NeC_1")
df_H0H0_NeG1NeG1_NeC_4 = build_df_couple(df_H0_NeG1, df_H0_NeG1_NeC_4, "H0/H0 NeG1/NeG1_NeC_4") df_H0H0_NeG1NeG1_NeC_4 = build_df_couple(df_H0_NeG1, df_H0_NeG1_NeC_4, "H0/H0 NeG1/NeG1_NeC_4")
df_H0_NeG1_HaPC_NeG1_NeC_4 = build_df_couple(df_H0_NeG1, df_HaPC_NeG1_NeC_4, "H0 NeG1/HaPC NeG1_NeC_4")
df_H0HaPC_NeG5_indel = build_df_couple(df_H0_NeG5_indel, df_HaPC_NeG5_indel, "H0/HaPC NeG5_indel") df_H0HaPC_NeG5_indel = build_df_couple(df_H0_NeG5_indel, df_HaPC_NeG5_indel, "H0/HaPC NeG5_indel")
...@@ -277,6 +282,7 @@ df_l = list(df_H0HaPCOC_NeG1, df_H0HaPC_NeG1, df_H0HaPC_NeG2, df_H0HaPC_NeG3, ...@@ -277,6 +282,7 @@ df_l = list(df_H0HaPCOC_NeG1, df_H0HaPC_NeG1, df_H0HaPC_NeG2, df_H0HaPC_NeG3,
df_H0H0_NeG5NeG5_NeC_1, df_H0H0_NeG1NeG1_NeC_5, df_H0H0_NeG5NeG5_NeC_1, df_H0H0_NeG1NeG1_NeC_5,
df_H0HaPC_NeG4_NeC_1,df_H0HaPC_NeG1_NeC_4, df_H0HaPC_NeG4_NeC_1,df_H0HaPC_NeG1_NeC_4,
df_H0H0_NeG4NeG4_NeC_1, df_H0H0_NeG1NeG1_NeC_4, df_H0H0_NeG4NeG4_NeC_1, df_H0H0_NeG1NeG1_NeC_4,
df_H0_NeG1_HaPC_NeG1_NeC_4,
df_H0HaPC_NeG5_NeC_x2,df_H0HaPC_NeG5_indel) df_H0HaPC_NeG5_NeC_x2,df_H0HaPC_NeG5_indel)
df_l = df_l[-which(sapply(df_l, is.null))] df_l = df_l[-which(sapply(df_l, is.null))]
...@@ -412,11 +418,11 @@ df_auc$string_auc2 = paste0(df_auc$auc_rank, " (", round(df_auc$auc2,3),")") ...@@ -412,11 +418,11 @@ df_auc$string_auc2 = paste0(df_auc$auc_rank, " (", round(df_auc$auc2,3),")")
df_auc$string_best_R_90_P = paste0(df_auc$best_R_90_P_rank, " (", round(df_auc$best_R_90_P,3),")") df_auc$string_best_R_90_P = paste0(df_auc$best_R_90_P_rank, " (", round(df_auc$best_R_90_P,3),")")
df_only_auc = dcast(df_auc, methode ~ couple, value.var="string_auc" ) df_only_auc = dcast(df_auc, methode ~ couple, value.var="string_auc" )
colnames(df_only_auc)[1] = "methode\auc" colnames(df_only_auc)[1] = "methode - auc"
df_only_auc2 = dcast(df_auc, methode ~ couple, value.var="string_auc2" ) df_only_auc2 = dcast(df_auc, methode ~ couple, value.var="string_auc2" )
colnames(df_only_auc2)[1] = "methode\auc2" colnames(df_only_auc2)[1] = "methode - auc2"
df_only_string_best_R_90_P = dcast(df_auc, methode ~ couple, value.var="string_best_R_90_P" ) df_only_string_best_R_90_P = dcast(df_auc, methode ~ couple, value.var="string_best_R_90_P" )
colnames(df_only_string_best_R_90_P)[1] = "methode\best_R_90_P" colnames(df_only_string_best_R_90_P)[1] = "methode - best_R_90_P"
print(df_only_auc) print(df_only_auc)
...@@ -484,8 +490,8 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi ...@@ -484,8 +490,8 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
) )
print("plot recall_precision") print("plot recall_precision")
if (length(meths) <= 9 & all(c("H0/HaPC NeG1","H0/HaPC NeG2","H0/HaPC NeG4","H0/H0 NeG4/NeG4_NeC_1","H0/H0 NeG1/NeG1_NeC_4", "H0/HaPC NeG4_NeC_1","H0/HaPC NeG1_NeC_4") %in% df_out$couple)) { if (length(meths) <= 9 & all(c("H0 NeG1/HaPC NeG1_NeC_4", "H0/HaPC NeG1","H0/HaPC NeG2","H0/HaPC NeG4","H0/H0 NeG4/NeG4_NeC_1","H0/H0 NeG1/NeG1_NeC_4", "H0/HaPC NeG4_NeC_1","H0/HaPC NeG1_NeC_4") %in% df_out$couple)) {
couple_i = c("H0/HaPC NeG1","H0/HaPC NeG2","H0/HaPC NeG3","H0/HaPC NeG4","H0/HaPC NeG5","H0/H0 NeG5/NeG5_NeC_1","H0/H0 NeG1/NeG1_NeC_5", "H0/HaPC NeG5_NeC_1","H0/HaPC NeG1_NeC_5","H0/H0 NeG5/NeG4_NeC_1","H0/H0 NeG3/NeG1_NeC_4", "H0/HaPC NeG4_NeC_1","H0/HaPC NeG1_NeC_4") couple_i = c("H0 NeG1/HaPC NeG1_NeC_4", "H0/HaPC NeG1","H0/HaPC NeG2","H0/HaPC NeG3","H0/HaPC NeG4","H0/HaPC NeG5","H0/H0 NeG5/NeG5_NeC_1","H0/H0 NeG1/NeG1_NeC_5", "H0/HaPC NeG5_NeC_1","H0/HaPC NeG1_NeC_5","H0/H0 NeG5/NeG4_NeC_1","H0/H0 NeG3/NeG1_NeC_4", "H0/HaPC NeG4_NeC_1","H0/HaPC NeG1_NeC_4")
plot = ggplot(subset(df_out, couple %in% couple_i), aes(x=sensitivity, y=precision98_02, col = methode)) plot = ggplot(subset(df_out, couple %in% couple_i), aes(x=sensitivity, y=precision98_02, col = methode))
plot = plot + theme_bw() plot = plot + theme_bw()
plot = plot + labs(x="Sensitivity (= Recall)", y="Precision (98/2)") plot = plot + labs(x="Sensitivity (= Recall)", y="Precision (98/2)")
...@@ -529,24 +535,29 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi ...@@ -529,24 +535,29 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
tmp_df[dim(tmp_df)[1]+1,] = tmp_df[dim(tmp_df)[1],] tmp_df[dim(tmp_df)[1]+1,] = tmp_df[dim(tmp_df)[1],]
tmp_df$couple[dim(tmp_df)[1]] = "to_be_rm" tmp_df$couple[dim(tmp_df)[1]] = "to_be_rm"
} }
if ("NUL2L" %in% couple_l) {
tmp_df[dim(tmp_df)[1]+1,] = tmp_df[dim(tmp_df)[1],]
tmp_df$couple[dim(tmp_df)[1]] = "to_be_rm2"
}
plot = ggplot(tmp_df, aes(x=sensitivity, y=precision98_02, col = methode)) plot = ggplot(tmp_df, aes(x=sensitivity, y=precision98_02, col = methode))
plot = plot + theme_bw() plot = plot + theme_bw()
plot = plot + labs(x="Sensitivity (= Recall)", y="Precision (98/2)") plot = plot + labs(x="Sensitivity (= Recall)", y="Precision")
plot = plot + theme(legend.position="top") plot = plot + theme(legend.position="top")
plot = plot + ylim(c(0,1)) + xlim(c(0,1)) plot = plot + ylim(c(0,1)) + xlim(c(0,1))
plot = plot + guides(fill=FALSE) plot = plot + guides(fill=FALSE)
plot = plot + scale_color_manual(values=colors2) plot = plot + scale_color_manual(values=colors2)
#plot = plot + geom_point(size=1, alpha=alpha) #plot = plot + geom_point(size=1, alpha=alpha)
plot = plot + geom_step(direction="vh", size=0.7, alpha=alpha) plot = plot + geom_step(direction="vh", size=1, alpha=alpha)
plot = plot + geom_hline( aes(yintercept = 0.9), col="black" , size = 0.5, show.legend = NA,linetype="dashed") plot = plot + geom_hline( aes(yintercept = 0.9), col="black" , size = 0.5, show.legend = NA,linetype="dashed")
plot = plot + facet_grid(. ~ couple ) plot = plot + facet_grid(. ~ couple )
plot = plot + theme(axis.text.x = element_text(angle = 45, hjust = 1)) plot = plot + theme(axis.text.x = element_text(angle = 45, hjust = 1))
plot} plot}
plot = plot_PR_c(c("H0/HaPC NeG1","H0/HaPC NeG2","H0/HaPC NeG4","H0/H0 NeG4/NeG4_NeC_1","H0/H0 NeG1/NeG1_NeC_4", "H0/HaPC NeG4_NeC_1","H0/HaPC NeG1_NeC_4")) plot = plot_PR_c(c("H0/HaPC NeG","H0 NeG1/HaPC NeG1_NeC_4","H0/HaPC NeG2","H0/HaPC NeG4","H0/H0 NeG4/NeG4_NeC_1","H0/H0 NeG1/NeG1_NeC_4", "H0/HaPC NeG4_NeC_1","H0/HaPC NeG1_NeC_4"))
plot1 = plot_PR_c(c("H0/HaPC NeG1","H0/HaPC NeG2","H0/HaPC NeG4")) plot1 = plot_PR_c(c("H0 NeG1/HaPC NeG1_NeC_4","NULL","NULL2"))
plot2 = plot_PR_c(c("H0/H0 NeG4/NeG4_NeC_1","H0/H0 NeG1/NeG1_NeC_4","NULL")) plot2 = plot_PR_c(c("H0/HaPC NeG1","H0/HaPC NeG2","H0/HaPC NeG4"))
plot3 = plot_PR_c(c("H0/HaPC NeG4_NeC_1","H0/HaPC NeG1_NeC_4","NULL")) plot3 = plot_PR_c(c("H0/H0 NeG4/NeG4_NeC_1","H0/H0 NeG1/NeG1_NeC_4","NULL"))
plot4 = plot_PR_c(c("H0/HaPC NeG4_NeC_1","H0/HaPC NeG1_NeC_4","NULL"))
legend_PR = get_legend(plot + theme(legend.position="top", legend_PR = get_legend(plot + theme(legend.position="top",
legend.text = element_text(size=10), legend.text = element_text(size=10),
...@@ -560,11 +571,12 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi ...@@ -560,11 +571,12 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
plot1 + theme(legend.position="none"), plot1 + theme(legend.position="none"),
plot2 + theme(legend.position="none"), plot2 + theme(legend.position="none"),
plot3 + theme(legend.position="none"), plot3 + theme(legend.position="none"),
plot4 + theme(legend.position="none"),
ncol = 1, scale = 1, ncol = 1, scale = 1,
labels = c("","Only Pressure Shift","Only Change in Selection Efficacy","Pressure Shift & Change in Selection Efficacy"), labels = c("","Pressure Shift+CSE", "Only Pressure Shift","Only Change in Selection Efficacy","Pressure Shift in a context of global change of Selection Efficacy"),
align = "h", align = "h",
rel_heights = c( 0.3, 0.7,0.7,0.7), rel_heights = c( 0.3, 0.7,0.7,0.7,0.7),
hjust = 0, vjust = 0) hjust = 0, vjust = 0)
...@@ -572,7 +584,7 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi ...@@ -572,7 +584,7 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
save_plot(paste0(opt$out,suffix,".recall_precision_ok.pdf"), save_plot(paste0(opt$out,suffix,".recall_precision_ok.pdf"),
plot_recall_precision_papier_ok, plot_recall_precision_papier_ok,
ncol = 2.5, ncol = 2.5,
nrow = 2.5, nrow = 3.5,
base_aspect_ratio = 1, base_aspect_ratio = 1,
limitsize = FALSE limitsize = FALSE
) )
......
...@@ -33,7 +33,7 @@ import os ...@@ -33,7 +33,7 @@ import os
import logging import logging
from ete3 import Tree, NodeStyle, TreeStyle, TextFace from ete3 import Tree, NodeStyle, TreeStyle, TextFace
import pandas as pd
#=================================================================================================== #===================================================================================================
# inputs # inputs
...@@ -50,8 +50,8 @@ parser.add_argument('--debug', action="store_true", ...@@ -50,8 +50,8 @@ parser.add_argument('--debug', action="store_true",
requiredOptions = parser.add_argument_group('REQUIRED OPTIONS') requiredOptions = parser.add_argument_group('REQUIRED OPTIONS')
requiredOptions.add_argument('-i', "--input_dir", type=str, requiredOptions.add_argument('-i', "--input_dir", type=str,
help='input_dir', required=True) help='input_dir', required=True)
requiredOptions.add_argument('-o', '--output_prefix', type=str, requiredOptions.add_argument('-o', '--output', type=str,
help="Output prefix name", required=True) help="Output name", required=True)
############## ##############
...@@ -60,7 +60,7 @@ requiredOptions.add_argument('-o', '--output_prefix', type=str, ...@@ -60,7 +60,7 @@ requiredOptions.add_argument('-o', '--output_prefix', type=str,
args = parser.parse_args() args = parser.parse_args()
InputDir = args.input_dir InputDir = args.input_dir
OutPrefix = args.output_prefix OutFile = args.output
#=================================================================================================== #===================================================================================================
...@@ -88,12 +88,91 @@ logger.debug(sys.argv) ...@@ -88,12 +88,91 @@ logger.debug(sys.argv)
#=================================================================================================== #===================================================================================================
from os import listdir from os import listdir
from os.path import isfile, join from os.path import isfile, join, basename
files_l = [join(InputDir, f) for f in listdir(InputDir) if isfile(join(InputDir, f))] files_l = [join(InputDir, f) for f in listdir(InputDir) if isfile(join(InputDir, f))]
print(files_l) print(files_l)
fn_d = {}
for fn in files_l:
prefix_l = basename(fn).split("@")
print(prefix_l)
prefix = prefix_l[0]
hyp = prefix_l[1]
if len(prefix_l) ==3:
t = prefix_l[2]
else:
t = prefix_l[1]
hyp = hyp.replace(".nw","") # input_tree
t = t.replace(".nw","")
if not prefix in fn_d:
fn_d[prefix]={}
if not hyp in fn_d[prefix]:
fn_d[prefix][hyp]={}
fn_d[prefix][hyp][t] = fn
print(fn_d)
prefix_l = []
hyp_l = []
bl_aa_l = []
bl_nt_l = []
def calc_bl(fn):
try:
t=Tree(fn)
except Exception as exc:
print(str(exc))
sys.exit(1)
bl = 0
for node in t.traverse("postorder"):
bl += node.dist
return(bl)
for prefix in fn_d:
for hyp in fn_d[prefix]:
if hyp == "input_tree":
bl_aa = 0
bl_nt = calc_bl(fn_d[prefix][hyp]["input_tree"])
else:
if "aa" in fn_d[prefix][hyp]:
bl_aa = calc_bl(fn_d[prefix][hyp]["aa"])
if "nt" in fn_d[prefix][hyp]:
bl_nt = calc_bl(fn_d[prefix][hyp]["nt"])
prefix_l.append(prefix)
hyp_l.append(hyp)
bl_aa_l.append(bl_aa)
bl_nt_l.append(bl_nt)
df_final = pd.DataFrame({'Prefix': prefix_l,
'hyp' : hyp_l,
'bl_aa' : bl_aa_l,
'bl_nt' : bl_nt_l})
df_final["nt/aa"] = df_final["bl_aa"]/df_final["bl_nt"] * 1.
df_final = df_final[["Prefix","hyp","bl_aa","bl_nt","nt/aa"]]
#===================================================================================================
# Create output files
#===================================================================================================
df_final.to_csv(OutFile+".tsv", sep = "\t", index = False)
print(df_final)
print("Write summary output in %s" %OutFile)
sys.exit(0) sys.exit(0)