Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit 725d12e7 authored by Carine Rey's avatar Carine Rey
Browse files

calculate and plot an auto threshold per detection methode for each input tree

parent 2b59323d
......@@ -77,7 +77,7 @@ let merge_results ~res_by_tools : text_file workflow =
] ;
]
let plot_merge_results ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv : svg workflow =
let plot_merge_results ?t_choices ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv (): svg workflow =
let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"06212018" () in
(* use of pcoc env due to its working X server for dra plot with ete3 *)
let meths = List.map res_by_tools ~f:(fun res ->
......@@ -96,7 +96,7 @@ let plot_merge_results ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~t
string opt
) |> seq ~sep:","
in
let meths_t = List.map res_by_tools ~f:(fun res ->
let default_t = List.map res_by_tools ~f:(fun res ->
let opt = match res with
| `Pcoc _ -> "PCOC:0.99,PC:0.99,OC:0.99"
| `Pcoc_gamma _ -> "PCOC_gamma:0.99,PC_gamma:0.99,OC_gamma:0.99"
......@@ -112,6 +112,10 @@ let plot_merge_results ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~t
string opt
) |> seq ~sep:","
in
let meths_t = match t_choices with
| Some _ -> None
| None -> Some default_t
in
let package_diffsel_script_utils = tmp // "diffsel_script_utils.py" in
let package_plot_data = tmp // "plot_data.py" in
let script_plot_convergent_sites = tmp // "plot_convergent_sites.py" in
......@@ -134,9 +138,10 @@ let plot_merge_results ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~t
opt "-tree" dep tree ;
opt "-out" ident out ;
opt "-meth" ident meths ;
opt "-t" ident meths_t ;
option (opt "-t" ident) meths_t ;
option (opt "--t_tsv" dep) t_choices ;
flag string "--all_sites" plot_all_sites ;
]
]
)
]
] / selector ["results.svg"]
......@@ -34,9 +34,11 @@ val merge_results :
text_file workflow
val plot_merge_results :
? t_choices : text_file workflow ->
plot_all_sites: bool ->
res_by_tools : result list ->
tree:nhx workflow ->
faa:aminoacid_fasta workflow ->
tsv:text_file workflow ->
unit ->
svg workflow
......@@ -167,7 +167,7 @@ let repo_of_dataset_results_l ~dataset_results_l =
let model_prefix = dataset_results.model_prefix in
let tree_prefix = dataset_results.tree_prefix in
let merged_results_item = Repo.item [tree_prefix ^"."^model_prefix^".merged_results.tsv"] merged_results in
let plot_merged_results_item = Repo.item [tree_prefix ^"."^model_prefix^".plot_merged_results"] plot_merge_results in
let plot_merged_results_item = Repo.item [tree_prefix ^"."^model_prefix^".plot_merged_results.svg"] plot_merge_results in
let repo =
merged_results_item ::
plot_merged_results_item ::
......@@ -231,7 +231,7 @@ let derive_from_dataset ~dataset ~preview ~fast_mode=
let faa = dataset.dataset.faa in
let tree = Tree_dataset.tree dataset.dataset.tree_dataset `Detection in
let plot_all_sites = if dataset.is_real then false else true in
let plot_merged_results = plot_merge_results ~plot_all_sites ~res_by_tools ~tsv ~faa ~tree in
let plot_merged_results = plot_merge_results ~plot_all_sites ~res_by_tools ~tsv ~faa ~tree () in
let model_prefix = dataset.model_prefix in
let tree_prefix = dataset.tree_prefix in
{model_prefix; tree_prefix; dataset; res_by_tools ; merged_results ; plot_merged_results}
......@@ -260,17 +260,16 @@ let simulation_main ~outdir ?(ns = 0) ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn
Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo
let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~preview ~fast_mode ~no_Ne ~no_HaPC ~tree_dir ~profile_fn ~use_concat ~only_simu () =
(* simulated trees *)
let trees = Array.to_list @@ Sys.readdir tree_dir in
let simu_dataset_l = derive_sim ~tree_dir ~trees ~profile_fn ~preview ~use_concat ~ns ~no_Ne ~no_HaPC in
let post_analyses_simu = Post_analyses.post_analyses_simu_of_simu_dataset_l ~simu_dataset_l in
let repo_of_post_analyses_simu = Post_analyses.repo_of_post_analyses_simu ~post_analyses_simu in
let repo_per_tree = List.map trees ~f:(fun tree ->
let repo_per_tree_simu = List.map trees ~f:(fun tree ->
let trees = [tree] in
let tree_prefix = Filename.chop_extension tree in
let indir_dataset_l = if indir = "" then [] else parse_input_data indir in
let dataset_l =
derive_sim ~tree_dir ~trees ~profile_fn ~preview ~use_concat ~ns ~no_Ne ~no_HaPC
@ indir_dataset_l in
derive_sim ~tree_dir ~trees ~profile_fn ~preview ~use_concat ~ns ~no_Ne ~no_HaPC in
let dataset_results_l =
if only_simu then
[]
......@@ -285,10 +284,23 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe
] |> List.concat
in
repo_per_tree
)
|> List.concat
) |> List.concat
in
(* real trees *)
let indir_dataset_l = if indir = "" then [] else parse_input_data indir in
let dataset_l = indir_dataset_l in
let dataset_results_l =
if only_simu then
[]
else
derive_det ~dataset_l ~preview ~fast_mode
in
let repo_real_trees = [
Dataset.repo dataset_l ~preview ;
repo_of_dataset_results_l ~dataset_results_l ;
] |> List.concat
in
let repo = repo_of_post_analyses_simu @ repo_per_tree in
let repo = repo_of_post_analyses_simu @ repo_real_trees @ repo_per_tree_simu in
Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo
let simulation_command =
......
......@@ -14,15 +14,24 @@ type t_choices = {
t_choices_plot: text_file workflow ;
}
type auto_t_plot = {
tree_prefix:string ;
model_prefix:string ;
auto_t_plot: svg workflow;
}
type post_analyses_res = {
t_choices : t_choices option;
auto_t_plot_l : auto_t_plot list option;
}
type simu_infos = {
simu_infos: text_file workflow option ;
model_prefix: string ;
tree_prefix: string ;
}
type post_analyses_res = {
t_choices : t_choices option;
}
type post_analyses_simu = {
simu_infos_l : simu_infos list;
simu_infos_plot : text_file workflow ;
......@@ -100,6 +109,27 @@ let get_t_choices ~(dataset_results_l: dataset_res list) : t_choices option =
Some {t_choices_max; t_choices_complete ; t_choices_plot}
| _ -> None
let plot_det_meth_res_auto_t ~t_choices ~dataset_results_l =
match t_choices with
| None -> None
| Some w -> Some (
List.map dataset_results_l ~f:(fun (dataset_results : dataset_res) ->
let tree_prefix = dataset_results.tree_prefix in
let model_prefix = dataset_results.model_prefix in
let ready_dataset = dataset_results.dataset.dataset in
let tree = Tree_dataset.tree ready_dataset.tree_dataset `Detection in
let faa = ready_dataset.faa in
let tsv = dataset_results.merged_results in
let res_by_tools = dataset_results.res_by_tools in
let plot_all_sites = true in
let t_choices = w.t_choices_max in
let auto_t_plot = plot_merge_results ~t_choices ~plot_all_sites ~res_by_tools ~tree ~faa ~tsv () in
{tree_prefix; model_prefix ; auto_t_plot}
)
)
let get_simu_infos ~(dataset:Dataset.t) =
let model_prefix = dataset.model_prefix in
let ready_dataset = dataset.dataset in
......@@ -113,7 +143,8 @@ let get_simu_infos ~(dataset:Dataset.t) =
let post_analyses_res_of_dataset_results_l ~dataset_results_l =
let t_choices = get_t_choices ~dataset_results_l in
{t_choices}
let auto_t_plot_l = plot_det_meth_res_auto_t ~t_choices ~dataset_results_l in
{t_choices; auto_t_plot_l}
let post_analyses_simu_of_simu_dataset_l ~simu_dataset_l =
let simu_infos_l = List.map simu_dataset_l ~f:(fun dataset ->
......@@ -143,12 +174,27 @@ let repo_of_post_analyses_simu ~post_analyses_simu =
] |> List.concat
let repo_of_post_analyses_res ~prefix ~post_analyses_res =
match post_analyses_res.t_choices with
| None -> []
| Some w ->
Repo.[
item [prefix ^ ".t_choices.max_mcc_per_meth.tsv"] w.t_choices_max ;
item [prefix ^ ".t_choices.complete.tsv"] w.t_choices_complete ;
item [prefix ^ ".t_choices.pdf"] w.t_choices_plot ;
] |> Repo.shift "t_choices"
[
(match post_analyses_res.t_choices with
| None -> []
| Some w ->
Repo.[
item [prefix ^ ".t_choices.max_mcc_per_meth.tsv"] w.t_choices_max ;
item [prefix ^ ".t_choices.complete.tsv"] w.t_choices_complete ;
item [prefix ^ ".t_choices.pdf"] w.t_choices_plot ;
] |> Repo.shift "t_choices"
);
(
match post_analyses_res.auto_t_plot_l with
| None -> []
| Some w_l ->
List.map w_l ~f:(fun w ->
Repo.[
let prefix_f = w.tree_prefix ^ "@" ^ w.model_prefix in
item [ prefix_f ^ ".auto_t.svg"] w.auto_t_plot ;
]
)|> List.concat
|> Repo.shift "auto_t_pdf"
);
] |> List.concat
......@@ -59,6 +59,9 @@ parser.add_argument('-meth', dest="methods_to_be_plotted", type=str,
parser.add_argument('-t', dest="threshold_by_method", type=str,
metavar="\"meth1:0.85,meth3:70\"", help="threshold to filter site by method in the table file (default:0.99 or 99)",
default=None)
parser.add_argument('--t_tsv', dest="threshold_by_method_tsv", type=str,
metavar="\"t_choices.tsv\"", help="tabular file containing thresholds to filter site by method in the table file (default:none)",
default=None)
parser.add_argument('--all_sites', dest="plot_all_sites", action="store_true",
help="Plot all sites (default:False)",
default=False)
......@@ -83,12 +86,19 @@ methods_to_be_plotted = args.methods_to_be_plotted
if methods_to_be_plotted:
methods_to_be_plotted = [m for m in methods_to_be_plotted.split(",") if m]
MESSAGE("Methods to be plotted: "+", ".join([param(meth) for meth in methods_to_be_plotted]))
threshold_by_method = args.threshold_by_method
dic_threshold_by_method = {}
if threshold_by_method:
dic_threshold_by_method = {mt.split(":")[0]:float(mt.split(":")[1]) for mt in threshold_by_method.split(",") if mt}
MESSAGE("Threshold by method: "+", ".join([(param(key)+": "+param(value)) for key,value in dic_threshold_by_method.items()]))
threshold_by_method_tsv = args.threshold_by_method_tsv
if threshold_by_method_tsv:
MESSAGE("threshold tabular file is "+param(threshold_by_method_tsv))
df_threshold_by_method = pd.read_table(threshold_by_method_tsv)
dic_threshold_by_method = df_threshold_by_method.set_index('methode').to_dict()['threshold']
MESSAGE("Threshold by method: "+", ".join([(param(key)+": "+param(value)) for key,value in dic_threshold_by_method.items()]))
#===================================================================================================
STEP("Reading input files:")
......@@ -118,6 +128,7 @@ if not methods_to_be_plotted:
MESSAGE("Computing thresholds to filter sites:")
methode_scales = {}
for meth in methods_to_be_plotted:
max_meth = max(result_table[meth])
if max_meth > 1:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment