Commit 0030e37e authored by Carine Rey's avatar Carine Rey
Browse files

repo refactoring

parent 03815065
...@@ -13,10 +13,13 @@ let repo ~preview dataset_l = ...@@ -13,10 +13,13 @@ let repo ~preview dataset_l =
let model_prefix = dataset.model_prefix in let model_prefix = dataset.model_prefix in
let tree_prefix = dataset.tree_prefix in let tree_prefix = dataset.tree_prefix in
let repo_ready_data = Ready_dataset.repo dataset.dataset in let repo_ready_data = Ready_dataset.repo dataset.dataset in
let repo_raw_data = if preview then Raw_dataset.repo ~prefix:model_prefix (Ready_dataset.to_raw dataset.dataset) else [] in let repo_raw_data = Raw_dataset.repo ~prefix:model_prefix (Ready_dataset.to_raw dataset.dataset) in
List.concat [ List.concat [
Repo.shift "simulated_data" (Repo.shift (tree_prefix ^"_"^model_prefix) repo_raw_data); Repo.shift "minimal" (Repo.shift (tree_prefix ^"_"^model_prefix) repo_raw_data);
Repo.shift "simulated_data_debug" (Repo.shift tree_prefix (Repo.shift model_prefix repo_ready_data)); Repo.shift "debug" repo_ready_data;
] ]
|> Repo.shift "dataset"
|> Repo.shift model_prefix
|> Repo.shift "Results_per_hypothesis"
) )
|> List.concat |> List.concat
...@@ -119,10 +119,10 @@ let derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~ns ~no_Ne ~n ...@@ -119,10 +119,10 @@ let derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~ns ~no_Ne ~n
); );
(if ne_test then (if ne_test then
[ [
H0_BigNeInSmallNe; H0_BigNeInSmallNe;
H0_SmallNeInBigNe; H0_SmallNeInBigNe;
HaPC_BigNeInSmallNe; HaPC_BigNeInSmallNe;
HaPC_SmallNeInBigNe; HaPC_SmallNeInBigNe;
] ]
else else
[] []
...@@ -200,7 +200,6 @@ let repo_of_dataset_results_l ~dataset_results_l = ...@@ -200,7 +200,6 @@ let repo_of_dataset_results_l ~dataset_results_l =
in in
repo repo
|> Repo.shift dataset_results.model_prefix |> Repo.shift dataset_results.model_prefix
|> Repo.shift dataset_results.tree_prefix
) )
|> List.concat |> List.concat
...@@ -287,9 +286,12 @@ let derive_profile ?(indir = "") ?(ns = 0) ~preview ~fast_mode ~no_Ne ~ne_test ~ ...@@ -287,9 +286,12 @@ let derive_profile ?(indir = "") ?(ns = 0) ~preview ~fast_mode ~no_Ne ~ne_test ~
let post_analyses_res = Post_analyses.post_analyses_res_of_dataset_results_l ~dataset_results_l in let post_analyses_res = Post_analyses.post_analyses_res_of_dataset_results_l ~dataset_results_l in
let repo_per_tree = [ let repo_per_tree = [
Dataset.repo dataset_l ~preview ; Dataset.repo dataset_l ~preview ;
repo_of_dataset_results_l ~dataset_results_l ; Repo.shift "Results_per_hypothesis" (repo_of_dataset_results_l ~dataset_results_l);
Repo.shift tree_prefix (Post_analyses.repo_of_post_analyses_res ~prefix:tree_prefix ~post_analyses_res); Post_analyses.repo_of_post_analyses_res ~prefix:tree_prefix ~post_analyses_res;
] |> List.concat ]
|> List.concat
|> Repo.shift tree_prefix
|> Repo.shift "Results_per_tree"
in in
(repo_per_tree, post_analyses_res) (repo_per_tree, post_analyses_res)
) )
...@@ -347,7 +349,7 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe ...@@ -347,7 +349,7 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe
repo_of_dataset_results_l ~dataset_results_l ; repo_of_dataset_results_l ~dataset_results_l ;
] |> List.concat ] |> List.concat
in in
let repo = sim_repo_l @ repo_real_trees in let repo = (Repo.shift "Simulated_datasets" sim_repo_l) @ (Repo.shift "Real_datasets" repo_real_trees) in
Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo
let simulation_command = let simulation_command =
......
...@@ -59,37 +59,37 @@ let is_hyp ~hyp (dataset_results :dataset_res) = ...@@ -59,37 +59,37 @@ let is_hyp ~hyp (dataset_results :dataset_res) =
let build_cmd_t_choices (opt_name : string) mr_option = let build_cmd_t_choices (opt_name : string) mr_option =
match mr_option with match mr_option with
| Some x -> [opt opt_name dep x] | Some x -> [opt opt_name dep x]
| None -> [] | None -> []
let make_t_choices ~h0_mr ~h0_NeBig_mr ~h0_NeSmall_mr ~haPCOC_mr ~haPC_mr ~haPC_NeBig_mr ~haPC_NeSmall_mr ~h0_NeBigInSmall_mr let make_t_choices ~h0_mr ~h0_NeBig_mr ~h0_NeSmall_mr ~haPCOC_mr ~haPC_mr ~haPC_NeBig_mr ~haPC_NeSmall_mr ~h0_NeBigInSmall_mr
~h0_NeSmallInBig_mr ~haPC_NeBigInSmall_mr ~haPC_NeSmallInBig_mr () : post_analyses_dir directory workflow = ~h0_NeSmallInBig_mr ~haPC_NeBigInSmall_mr ~haPC_NeSmallInBig_mr () : post_analyses_dir directory workflow =
let env = r_env in let env = r_env in
let out = dest // "out" in let out = dest // "out" in
let cmd_mr = List.map [ let cmd_mr = List.map [
("--H0" , h0_mr ); ("--H0" , h0_mr );
("--H0NeBig" , h0_NeBig_mr ); ("--H0NeBig" , h0_NeBig_mr );
("--H0NeSmall" , h0_NeSmall_mr ); ("--H0NeSmall" , h0_NeSmall_mr );
("--H0NeBigInSmall" , h0_NeBigInSmall_mr ); ("--H0NeBigInSmall" , h0_NeBigInSmall_mr );
("--H0NeSmallInBig" , h0_NeSmallInBig_mr ); ("--H0NeSmallInBig" , h0_NeSmallInBig_mr );
("--HaPCOC" , haPCOC_mr ); ("--HaPCOC" , haPCOC_mr );
("--HaPC" , haPC_mr ); ("--HaPC" , haPC_mr );
("--HaPCNeBig" , haPC_NeBig_mr ); ("--HaPCNeBig" , haPC_NeBig_mr );
("--HaPCNeSmall" , haPC_NeSmall_mr); ("--HaPCNeSmall" , haPC_NeSmall_mr);
("--HaPCNeBigInSmall" , haPC_NeBigInSmall_mr ); ("--HaPCNeBigInSmall" , haPC_NeBigInSmall_mr );
("--HaPCNeSmallInBig" , haPC_NeSmallInBig_mr); ("--HaPCNeSmallInBig" , haPC_NeSmallInBig_mr);
] ~f:(fun (opt_name, mr_option) -> build_cmd_t_choices opt_name mr_option) ] ~f:(fun (opt_name, mr_option) -> build_cmd_t_choices opt_name mr_option)
|> List.concat in |> List.concat in
workflow ~descr:"post_analyses.t_choices" [ workflow ~descr:"post_analyses.t_choices" [
docker env ( docker env (
and_list [ and_list [
mkdir_p dest ; mkdir_p dest ;
cmd "Rscript" ([ cmd "Rscript" ([
[file_dump (string Scripts.calc_t_per_meth) ; [file_dump (string Scripts.calc_t_per_meth) ;
opt "--out " ident out; opt "--out " ident out;
] ; ] ;
cmd_mr ; cmd_mr ;
] |> List.concat) ; ] |> List.concat) ;
]) ])
] ]
...@@ -133,8 +133,8 @@ let group_simu_infos ~simu_infos_l : simu_infos directory workflow = ...@@ -133,8 +133,8 @@ let group_simu_infos ~simu_infos_l : simu_infos directory workflow =
let plot_trees ~reinfered_tree_l : plot_trees directory workflow = let plot_trees ~reinfered_tree_l : plot_trees directory workflow =
let env = r_env in let env = r_env in
let cmd_cp_l = List.map reinfered_tree_l ~f:(fun rt -> [ let cmd_cp_l = List.map reinfered_tree_l ~f:(fun rt -> [
cmd "cp" [dep rt.reinfered_tree ; tmp // (rt.tree_prefix ^"@"^ rt.model_prefix ^ ".nw")]; cmd "cp" [dep rt.reinfered_tree ; tmp // (rt.tree_prefix ^"@"^ rt.model_prefix ^ ".nw")];
cmd "cp" [dep rt.input_tree ; tmp // (rt.tree_prefix ^"@input_tree.nw")] cmd "cp" [dep rt.input_tree ; tmp // (rt.tree_prefix ^"@input_tree.nw")]
]) |> List.concat ]) |> List.concat
in in
let out = dest // "out" in let out = dest // "out" in
...@@ -169,26 +169,26 @@ type res_all_hyp = { ...@@ -169,26 +169,26 @@ type res_all_hyp = {
h0_NeSmallInBig_res : dataset_res option ; h0_NeSmallInBig_res : dataset_res option ;
ha_PC_NeBigInSmall_res : dataset_res option ; ha_PC_NeBigInSmall_res : dataset_res option ;
ha_PC_NeSmallInBig_res : dataset_res option ; ha_PC_NeSmallInBig_res : dataset_res option ;
} }
let make_t_choices_per_couple {h0_res; h0_NeBig_res; h0_NeSmall_res; ha_PC_res; ha_PCOC_res; ha_PC_NeBig_res ; ha_PC_NeSmall_res; let make_t_choices_per_couple {h0_res; h0_NeBig_res; h0_NeSmall_res; ha_PC_res; ha_PCOC_res; ha_PC_NeBig_res ; ha_PC_NeSmall_res;
h0_NeBigInSmall_res; h0_NeSmallInBig_res; ha_PC_NeBigInSmall_res; ha_PC_NeSmallInBig_res} = h0_NeBigInSmall_res; h0_NeSmallInBig_res; ha_PC_NeBigInSmall_res; ha_PC_NeSmallInBig_res} =
let h0_mr = get_merged_results_opt h0_res in let h0_mr = get_merged_results_opt h0_res in
let h0_NeBig_mr = get_merged_results_opt h0_NeBig_res in let h0_NeBig_mr = get_merged_results_opt h0_NeBig_res in
let h0_NeSmall_mr = get_merged_results_opt h0_NeSmall_res in let h0_NeSmall_mr = get_merged_results_opt h0_NeSmall_res in
let h0_NeBigInSmall_mr = get_merged_results_opt h0_NeBigInSmall_res in let h0_NeBigInSmall_mr = get_merged_results_opt h0_NeBigInSmall_res in
let h0_NeSmallInBig_mr = get_merged_results_opt h0_NeSmallInBig_res in let h0_NeSmallInBig_mr = get_merged_results_opt h0_NeSmallInBig_res in
let haPCOC_mr = get_merged_results_opt ha_PCOC_res in let haPCOC_mr = get_merged_results_opt ha_PCOC_res in
let haPC_mr = get_merged_results_opt ha_PC_res in let haPC_mr = get_merged_results_opt ha_PC_res in
let haPC_NeBig_mr = get_merged_results_opt ha_PC_NeBig_res in let haPC_NeBig_mr = get_merged_results_opt ha_PC_NeBig_res in
let haPC_NeSmall_mr = get_merged_results_opt ha_PC_NeSmall_res in let haPC_NeSmall_mr = get_merged_results_opt ha_PC_NeSmall_res in
let haPC_NeBigInSmall_mr = get_merged_results_opt ha_PC_NeBigInSmall_res in let haPC_NeBigInSmall_mr = get_merged_results_opt ha_PC_NeBigInSmall_res in
let haPC_NeSmallInBig_mr = get_merged_results_opt ha_PC_NeSmallInBig_res in let haPC_NeSmallInBig_mr = get_merged_results_opt ha_PC_NeSmallInBig_res in
make_t_choices ~h0_mr ~h0_NeBig_mr ~h0_NeSmall_mr ~haPCOC_mr ~haPC_mr ~haPC_NeBig_mr ~haPC_NeSmall_mr ~h0_NeBigInSmall_mr make_t_choices ~h0_mr ~h0_NeBig_mr ~h0_NeSmall_mr ~haPCOC_mr ~haPC_mr ~haPC_NeBig_mr ~haPC_NeSmall_mr ~h0_NeBigInSmall_mr
~h0_NeSmallInBig_mr ~haPC_NeBigInSmall_mr ~haPC_NeSmallInBig_mr () ~h0_NeSmallInBig_mr ~haPC_NeBigInSmall_mr ~haPC_NeSmallInBig_mr ()
...@@ -286,7 +286,7 @@ let plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile_prefix : se ...@@ -286,7 +286,7 @@ let plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile_prefix : se
let merged_results_dir = tmp // "merged_results_dir" in let merged_results_dir = tmp // "merged_results_dir" in
let out = dest // "out" in let out = dest // "out" in
let cmd_cp_t_choices_l = List.map t_choices_l ~f:(fun t_choices -> let cmd_cp_t_choices_l = List.map t_choices_l ~f:(fun t_choices ->
cmd "cp" [dep t_choices.t_choices_max ; t_choices_dir // (t_choices.tree_prefix ^ ".tsv")] cmd "cp" [dep t_choices.t_choices_recall09 ; t_choices_dir // (t_choices.tree_prefix ^ ".tsv")]
) )
in in
let cmd_cp_merged_results_l = List.map dataset_results_l ~f:(fun dataset_results -> let cmd_cp_merged_results_l = List.map dataset_results_l ~f:(fun dataset_results ->
...@@ -324,20 +324,22 @@ let repo_post_analyses_all_trees_of_all_post_analyses_per_tree ~profile_prefix ~ ...@@ -324,20 +324,22 @@ let repo_post_analyses_all_trees_of_all_post_analyses_per_tree ~profile_prefix ~
post_analyses_res.dataset_results_l) |> List.concat post_analyses_res.dataset_results_l) |> List.concat
in in
let sens_spe_t_choices_plot = plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile_prefix in let sens_spe_t_choices_plot = plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile_prefix in
Repo.[ [Repo.[
item ["sens_spe.pdf"] (sens_spe_t_choices_plot / selector ["out.sens_spe_auto_t.pdf"]); item ["sens_spe.tsv"] (sens_spe_t_choices_plot / selector ["out.sens_spe_auto_t.tsv"]);
item ["all_t_choices.pdf"] (sens_spe_t_choices_plot / selector ["out.max_t_per_tree.pdf"]); item ["all_t_choices.tsv"] (sens_spe_t_choices_plot / selector ["out.t_per_tree.tsv"]);
item ["sens_spe.tsv"] (sens_spe_t_choices_plot / selector ["out.sens_spe_auto_t.tsv"]); ] |> Repo.shift "pdf_tsv" ;
item ["all_t_choices.tsv"] (sens_spe_t_choices_plot / selector ["out.max_t_per_tree.tsv"]); Repo.[
] item ["sens_spe.pdf"] (sens_spe_t_choices_plot / selector ["out.sens_spe_auto_t.pdf"]);
item ["all_t_choices.pdf"] (sens_spe_t_choices_plot / selector ["out.t_per_tree.pdf"]);
]
] |> List.concat
let repo_of_post_analyses_simu ~post_analyses_simu = let repo_of_post_analyses_simu ~post_analyses_simu =
[ [
Repo.[ Repo.[
item ["hypothesis_validation.pdf"] post_analyses_simu.simu_infos_plot ; item ["hypothesis_validation.pdf"] post_analyses_simu.simu_infos_plot ;
item ["trees_validation.pdf"] post_analyses_simu.trees_plot; item ["trees_validation.pdf"] post_analyses_simu.trees_plot;
] |> Repo.shift "simu_infos" ]
; ;
(List.map post_analyses_simu.simu_infos_l ~f:(fun simu_infos -> (List.map post_analyses_simu.simu_infos_l ~f:(fun simu_infos ->
match simu_infos.simu_infos with match simu_infos.simu_infos with
...@@ -345,10 +347,12 @@ let repo_of_post_analyses_simu ~post_analyses_simu = ...@@ -345,10 +347,12 @@ let repo_of_post_analyses_simu ~post_analyses_simu =
| Some w -> | Some w ->
Repo.[ Repo.[
item [simu_infos.tree_prefix ^ "@" ^ simu_infos.model_prefix ^ ".tsv"] w item [simu_infos.tree_prefix ^ "@" ^ simu_infos.model_prefix ^ ".tsv"] w
] |> Repo.shift "simu_infos" ] |> Repo.shift simu_infos.tree_prefix
|> Repo.shift "tsv"
) |> List.concat ) |> List.concat
); );
] |> List.concat ] |> List.concat
|> Repo.shift "Simulation_details"
let repo_of_post_analyses_res ~prefix ~post_analyses_res = let repo_of_post_analyses_res ~prefix ~post_analyses_res =
[ [
...@@ -361,7 +365,7 @@ let repo_of_post_analyses_res ~prefix ~post_analyses_res = ...@@ -361,7 +365,7 @@ let repo_of_post_analyses_res ~prefix ~post_analyses_res =
item [prefix ^ ".t_choices.complete.tsv"] w.t_choices_complete ; item [prefix ^ ".t_choices.complete.tsv"] w.t_choices_complete ;
item [prefix ^ ".t_choices.pdf"] w.t_choices_plot ; item [prefix ^ ".t_choices.pdf"] w.t_choices_plot ;
item [prefix ^ ".t_choices.condensed.pdf"] w.t_choices_condensed_plot ; item [prefix ^ ".t_choices.condensed.pdf"] w.t_choices_condensed_plot ;
] |> Repo.shift "t_choices" ]
); );
(*( (*(
match post_analyses_res.auto_t_plot_l with match post_analyses_res.auto_t_plot_l with
...@@ -374,6 +378,6 @@ let repo_of_post_analyses_res ~prefix ~post_analyses_res = ...@@ -374,6 +378,6 @@ let repo_of_post_analyses_res ~prefix ~post_analyses_res =
] ]
)|> List.concat )|> List.concat
|> Repo.shift "auto_t_pdf" |> Repo.shift "auto_t_pdf"
);*) );*)
] |> List.concat ] |> List.concat
...@@ -50,7 +50,7 @@ let cat_file ~(f_l: text_file workflow list) : text_file workflow = ...@@ -50,7 +50,7 @@ let cat_file ~(f_l: text_file workflow list) : text_file workflow =
let profile_l_of_splitted_profile ~nb_cat ~nb_sites profile_fn = let profile_l_of_splitted_profile ~nb_cat ~nb_sites profile_fn =
let profile_f = input profile_fn in let profile_f = input profile_fn in
let prefix = Filename.chop_extension profile_fn in let prefix = Filename.chop_extension (Filename.basename profile_fn) in
let dist_bins = match nb_cat with let dist_bins = match nb_cat with
| 3 -> "[0.01,0.4],[0.4,0.6],[0.6,2]" | 3 -> "[0.01,0.4],[0.4,0.6],[0.6,2]"
| 1 -> "[0.01,2]" | 1 -> "[0.01,2]"
......
...@@ -45,7 +45,6 @@ let repo rd = ...@@ -45,7 +45,6 @@ let repo rd =
; ;
] ]
|> List.concat |> List.concat
|> Repo.shift "ready_dataset"
let to_raw { input_tree ; fna ; fna_infos} = let to_raw { input_tree ; fna ; fna_infos} =
{ Raw_dataset.input_tree ; fna ; fna_infos} { Raw_dataset.input_tree ; fna ; fna_infos}
......
...@@ -8,6 +8,8 @@ library("reshape2") ...@@ -8,6 +8,8 @@ library("reshape2")
library("ggplot2") library("ggplot2")
library("cowplot") library("cowplot")
date = format(Sys.time(), format="%Y-%m-%d %X")
option_list = list( option_list = list(
make_option(c("--H0") , type="character", default=NULL, help="merged_results H0", metavar="character"), make_option(c("--H0") , type="character", default=NULL, help="merged_results H0", metavar="character"),
make_option(c("--H0NeBig") , type="character", default=NULL, help="merged_results H0NeBig", metavar="character"), make_option(c("--H0NeBig") , type="character", default=NULL, help="merged_results H0NeBig", metavar="character"),
...@@ -89,9 +91,10 @@ df_d_H0HaPC_NeSmall = build_df_dist_couple(df_H0NeSmall_melt, df_HaPCNeSmall_mel ...@@ -89,9 +91,10 @@ df_d_H0HaPC_NeSmall = build_df_dist_couple(df_H0NeSmall_melt, df_HaPCNeSmall_mel
df_d_H0HaPC_NeBigInSmall = build_df_dist_couple(df_H0NeBigInSmall_melt, df_HaPCNeBigInSmall_melt, "H0/HaPC NeBigInSmall") df_d_H0HaPC_NeBigInSmall = build_df_dist_couple(df_H0NeBigInSmall_melt, df_HaPCNeBigInSmall_melt, "H0/HaPC NeBigInSmall")
df_d_H0HaPC_NeSmallInBig = build_df_dist_couple(df_H0NeSmallInBig_melt, df_HaPCNeSmallInBig_melt, "H0/HaPC NeSmallInBig") df_d_H0HaPC_NeSmallInBig = build_df_dist_couple(df_H0NeSmallInBig_melt, df_HaPCNeSmallInBig_melt, "H0/HaPC NeSmallInBig")
df_d_H0HaPCOC = build_df_dist_couple(df_H0_melt, df_HaPCOC_melt, "H0/HaPCOC") df_d_H0HaPCOC = build_df_dist_couple(df_H0_melt, df_HaPCOC_melt, "H0/HaPCOC")
df_d_H0NeBigHaPCOC = build_df_dist_couple(df_H0NeBig_melt, df_HaPCOC_melt, "H0NeBig/HaPCOC")
df_d = rbind.data.frame(df_d_H0HaPC, df_d_H0HaPCOC,df_d_H0HaPC_NeBig,df_d_H0HaPC_NeSmall, df_d = rbind.data.frame(df_d_H0HaPC, df_d_H0HaPCOC,df_d_H0HaPC_NeBig,df_d_H0HaPC_NeSmall,
df_d_H0HaPC_NeBigInSmall,df_d_H0HaPC_NeSmallInBig) df_d_H0HaPC_NeBigInSmall,df_d_H0HaPC_NeSmallInBig,df_d_H0NeBigHaPCOC)
df_d = df_d[order(df_d$methode),] df_d = df_d[order(df_d$methode),]
...@@ -152,9 +155,11 @@ df_H0HaPC_NeSmall = build_df_couple(df_H0NeSmall_melt, df_HaPCNeSmall_melt, "H0/ ...@@ -152,9 +155,11 @@ df_H0HaPC_NeSmall = build_df_couple(df_H0NeSmall_melt, df_HaPCNeSmall_melt, "H0/
df_H0HaPC_NeBigInSmall = build_df_couple(df_H0NeBigInSmall_melt, df_HaPCNeBigInSmall_melt, "H0/HaPC NeBigInSmall") df_H0HaPC_NeBigInSmall = build_df_couple(df_H0NeBigInSmall_melt, df_HaPCNeBigInSmall_melt, "H0/HaPC NeBigInSmall")
df_H0HaPC_NeSmallInBig = build_df_couple(df_H0NeSmallInBig_melt, df_HaPCNeSmallInBig_melt, "H0/HaPC NeSmallInBig") df_H0HaPC_NeSmallInBig = build_df_couple(df_H0NeSmallInBig_melt, df_HaPCNeSmallInBig_melt, "H0/HaPC NeSmallInBig")
df_H0HaPCOC = build_df_couple(df_H0_melt, df_HaPCOC_melt, "H0/HaPCOC") df_H0HaPCOC = build_df_couple(df_H0_melt, df_HaPCOC_melt, "H0/HaPCOC")
df_H0NeBigHaPCOC = build_df_couple(df_H0NeBig_melt, df_HaPCOC_melt, "H0NeBig/HaPCOC")
df = rbind.data.frame(df_H0HaPC, df_H0HaPCOC,df_H0HaPC_NeBig,df_H0HaPC_NeSmall, df = rbind.data.frame(df_H0HaPC, df_H0HaPCOC,df_H0HaPC_NeBig,df_H0HaPC_NeSmall,
df_H0HaPC_NeBigInSmall,df_H0HaPC_NeSmallInBig) df_H0HaPC_NeBigInSmall,df_H0HaPC_NeSmallInBig,
df_H0NeBigHaPCOC)
print(head(df)) print(head(df))
print(tail(df)) print(tail(df))
...@@ -200,7 +205,9 @@ print(summary(df_out)) ...@@ -200,7 +205,9 @@ print(summary(df_out))
print("prep plot max mcc") print("prep plot max mcc")
df_max_mcc_per_method = do.call(rbind, lapply(split(df_out,paste0(df_out$methode,df_out$couple)), df_max_mcc_per_method = do.call(rbind, lapply(split(df_out,paste0(df_out$methode,df_out$couple)),
function(x) {return(x[which.max(x$MCC),c("couple","methode", "threshold", "MCC","sensitivity","specificity","precision")])})) function(x) {
return(x[which.max(x$MCC),c("couple","methode", "threshold", "MCC","sensitivity","specificity","precision")])
}))
print(df_max_mcc_per_method) print(df_max_mcc_per_method)
...@@ -211,17 +218,24 @@ df_max_mcc_per_method_2$variable="MCC" ...@@ -211,17 +218,24 @@ df_max_mcc_per_method_2$variable="MCC"
######################################################################## ########################################################################
print("prep plot recall_precision_per_meth") print("prep plot recall_precision_per_meth")
df_recall_sup09_per_meth = do.call(rbind, lapply(split(df_out,paste0(df_out$methode,df_out$couple)),
print(df_out[is.na(df_out$methode),])
print(table(df_out$methode))
print(table(df_out$methode, df_out$couple))
df_recall_sup09_per_meth = do.call(rbind, lapply(split(df_out,paste0(df_out$methode," ", df_out$couple)),
function(x) { function(x) {
x$precision[is.na(x$precision)] = 0
x2 = x[x$precision > 0.9,] x2 = x[x$precision > 0.9,]
if (nrow(x2) > 0) { if (nrow(x2) > 0) {
return(x2[which.max(x2$sensitivity),c("couple","methode", "threshold", "MCC","sensitivity","specificity","precision")]) res = x2[which.max(x2$sensitivity),c("couple","methode", "threshold", "MCC","sensitivity","specificity","precision")]
} else { } else {
x = x[1,c("couple","methode", "threshold", "MCC","sensitivity","specificity","precision")] x = x[1,c("couple","methode", "threshold", "MCC","sensitivity","specificity","precision")]
print(x)
x[,c("threshold", "MCC","sensitivity","specificity","precision")] = NA x[,c("threshold", "MCC","sensitivity","specificity","precision")] = NA
return(x) res = x
} }
return(res)
})) }))
print(df_recall_sup09_per_meth) print(df_recall_sup09_per_meth)
...@@ -276,6 +290,7 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi ...@@ -276,6 +290,7 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
plot = plot + geom_hline( aes(yintercept = 0.9), col="black" , size = 1, show.legend = NA,linetype="dashed") plot = plot + geom_hline( aes(yintercept = 0.9), col="black" , size = 1, show.legend = NA,linetype="dashed")
plot = plot + facet_grid(couple ~ methode) plot = plot + facet_grid(couple ~ methode)
plot = plot + theme(axis.text.x = element_text(angle = 45, hjust = 1)) plot = plot + theme(axis.text.x = element_text(angle = 45, hjust = 1))
plot = plot + ggtitle(paste0("Threshold definition such as precision > 0.9 - (", date , ")" ))
plot_recall_precision = plot plot_recall_precision = plot
...@@ -305,7 +320,7 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi ...@@ -305,7 +320,7 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
plot_max_MCC = plot plot_max_MCC = plot
save_plot(paste0(opt$out,suffix,".max_MCC_per_meth.pdf"), save_plot(paste0(opt$out,suffix,".indicator_per_meth.pdf"),
plot_max_MCC, plot_max_MCC,
ncol = 0.4 * length(unique(df_out_melt$methode)), ncol = 0.4 * length(unique(df_out_melt$methode)),
nrow = 1.7, nrow = 1.7,
......
...@@ -40,19 +40,19 @@ files = files[grep("tsv", files)] ...@@ -40,19 +40,19 @@ files = files[grep("tsv", files)]
files_split = strsplit(files, ".", fixed = T) files_split = strsplit(files, ".", fixed = T)
files_df = as.data.frame(do.call(rbind, files_split)) files_df = as.data.frame(do.call(rbind, files_split))
print(files_df) print(files_df)
files_df_ok = data.frame(files= paste0(input_dir,"/",files), tree = gsub(".tsv", "",files), bl = "NA", profil = opt$profil) files_df_ok = data.frame(files= paste0(input_dir,"/",files), tree = gsub(".tsv", "",files), profil = opt$profil)
#files_df_ok = data.frame(files= paste0(input_dir,"/",files), tree = files_df$V1, bl = files_df$V1, profil = opt$profil) #files_df_ok = data.frame(files= paste0(input_dir,"/",files), tree = files_df$V1, bl = files_df$V1, profil = opt$profil)
condensed_meths = c("PCOC","diffsel","Identical_LG08","Mutinomial_1MinusLRT","Tdg09_1MinusFDR","Msd_1MinusP","Topological_LG08")
read_dir = function(x) { read_dir = function(x) {
file = x["files"] file = x["files"]
tree = x["tree"] tree = x["tree"]
profil = x["profil"] profil = x["profil"]
bl = x["bl"]
df = read.csv(file, sep = "\t", header = T) df = read.csv(file, sep = "\t", header = T)
df$tree = tree df$tree = tree
df$profil = profil df$profil = profil
df$bl = bl df = df[df$methode %in% condensed_meths, ]
return(df) return(df)
} }
...@@ -62,41 +62,56 @@ alpha = 0.7 ...@@ -62,41 +62,56 @@ alpha = 0.7
x_labs = "" x_labs = ""
y_labs = "Threshold" y_labs = "Threshold"
print(head(df))
df_max_mcc_per_method = do.call(rbind, lapply(split(df,df$methode),
function(x) {return(x[which.max(x$threshold),c("methode", "threshold","tree","profil","couple")])}))
df_tmp = subset(df, couple == "H0/HaPCOC")
df_t_per_method = do.call(rbind, lapply(split(df_tmp, paste0(df_tmp$methode, df_tmp$tree)),
function(x) {return(x[which.max(x$threshold),c("methode", "threshold","tree","profil","couple")])}))
df_max_mcc_per_method_2 = df_max_mcc_per_method df_t_per_method_2 = df_t_per_method
df_max_mcc_per_method_2 [ df_max_mcc_per_method_2$tree == unique(df_max_mcc_per_method_2$tree)[1],] df_t_per_method_2 [ df_t_per_method_2$tree == unique(df_t_per_method_2$tree)[1],]
df2 = df
df2$retained_t = "no"
df2$retained_t[df2$couple == "H0/HaPCOC"] = "yes"
plot = ggplot(df, aes(x=tree, y=threshold, col=couple, shape = profil)) + theme_bw() + labs(x=x_labs, y=y_labs) + ylim(c(0,1.5)) plot = ggplot(df2, aes(x=tree, y=threshold, col=couple, shape = retained_t)) + theme_bw() + labs(x=x_labs, y=y_labs) + ylim(c(0,1))
plot = plot + theme(axis.text.x = element_text(angle = 0, hjust = 1)) plot = plot + theme(axis.text.x = element_text(angle = 0, hjust = 1))
plot = plot + geom_point() plot = plot + geom_point(position= position_dodge(.5), size = 3)
plot = plot + geom_hline( data = df_max_mcc_per_method, aes(yintercept = threshold), col="red", alpha=alpha, show.legend = NA,linetype="dotted") plot = plot + theme(legend.position="top")
plot = plot + geom_label( data = df_max_mcc_per_method_2, nudge_y = 0.25, aes(label = threshold), col="red", alpha=alpha, show.legend = NA) #plot = plot + guides(shape=FALSE)
plot = plot + scale_shape_manual(values = c(19,17))
plot = plot + guides(col=guide_legend(ncol=2))
plot = plot + guides(shape=guide_legend(nrow=2))
#plot = plot + geom_point( data = df_t_per_method, aes(x=tree, y=threshold, group = couple), size = 5, shape = 1, col="red", alpha=alpha, position= position_dodge(.5))
#plot = plot + geom_label( data = df_t_per_method_2, nudge_y = 0.25, aes(label = threshold), col="red", alpha=alpha, show.legend = NA)
plot = plot + facet_grid(methode ~ .) + coord_flip() plot = plot + facet_grid(methode ~ .) + coord_flip()
plot = plot + ggtitle("t max per tree") plot = plot + ggtitle("Threshold definition such as\nprecision > 0.9 per H0*/Ha* couple ")
output_pdf = paste0(opt$out,".max_t_per_tree.pdf") output_pdf = paste0(opt$out,".t_per_tree.pdf")
save_plot(output_pdf, save_plot(output_pdf,
plot, plot,
ncol = 1, ncol = 1,
nrow = 0.3 * length(unique(df$methode)), nrow = 0.5 * length(unique(df$methode)),
base_aspect_ratio = 1.5, base_aspect_ratio = 1.5,
limitsize = FALSE limitsize = FALSE
) )
output_tsv = paste0(opt$out,".max_t_per_tree.tsv") output_tsv = paste0(opt$out,".t_per_tree.tsv")
write.table(df, file=output_tsv, row.names=FALSE, quote=F, sep = "\t") write.table(df, file=output_tsv, row.names=FALSE, quote=F, sep = "\t")
#################### ####################