Commit 725d12e7 authored by Carine Rey's avatar Carine Rey
Browse files

calculate and plot an auto threshold per detection methode for each input tree

parent 2b59323d
......@@ -77,7 +77,7 @@ let merge_results ~res_by_tools : text_file workflow =
] ;
]
let plot_merge_results ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv : svg workflow =
let plot_merge_results ?t_choices ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv (): svg workflow =
let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"06212018" () in
(* use of pcoc env due to its working X server for dra plot with ete3 *)
let meths = List.map res_by_tools ~f:(fun res ->
......@@ -96,7 +96,7 @@ let plot_merge_results ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~t
string opt
) |> seq ~sep:","
in
let meths_t = List.map res_by_tools ~f:(fun res ->
let default_t = List.map res_by_tools ~f:(fun res ->
let opt = match res with
| `Pcoc _ -> "PCOC:0.99,PC:0.99,OC:0.99"
| `Pcoc_gamma _ -> "PCOC_gamma:0.99,PC_gamma:0.99,OC_gamma:0.99"
......@@ -112,6 +112,10 @@ let plot_merge_results ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~t
string opt
) |> seq ~sep:","
in
let meths_t = match t_choices with
| Some _ -> None
| None -> Some default_t
in
let package_diffsel_script_utils = tmp // "diffsel_script_utils.py" in
let package_plot_data = tmp // "plot_data.py" in
let script_plot_convergent_sites = tmp // "plot_convergent_sites.py" in
......@@ -134,9 +138,10 @@ let plot_merge_results ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~t
opt "-tree" dep tree ;
opt "-out" ident out ;
opt "-meth" ident meths ;
opt "-t" ident meths_t ;
option (opt "-t" ident) meths_t ;
option (opt "--t_tsv" dep) t_choices ;
flag string "--all_sites" plot_all_sites ;
]
]
)
]
] / selector ["results.svg"]
......@@ -34,9 +34,11 @@ val merge_results :
text_file workflow
val plot_merge_results :
? t_choices : text_file workflow ->
plot_all_sites: bool ->
res_by_tools : result list ->
tree:nhx workflow ->
faa:aminoacid_fasta workflow ->
tsv:text_file workflow ->
unit ->
svg workflow
......@@ -167,7 +167,7 @@ let repo_of_dataset_results_l ~dataset_results_l =
let model_prefix = dataset_results.model_prefix in
let tree_prefix = dataset_results.tree_prefix in
let merged_results_item = Repo.item [tree_prefix ^"."^model_prefix^".merged_results.tsv"] merged_results in
let plot_merged_results_item = Repo.item [tree_prefix ^"."^model_prefix^".plot_merged_results"] plot_merge_results in
let plot_merged_results_item = Repo.item [tree_prefix ^"."^model_prefix^".plot_merged_results.svg"] plot_merge_results in
let repo =
merged_results_item ::
plot_merged_results_item ::
......@@ -231,7 +231,7 @@ let derive_from_dataset ~dataset ~preview ~fast_mode=
let faa = dataset.dataset.faa in
let tree = Tree_dataset.tree dataset.dataset.tree_dataset `Detection in
let plot_all_sites = if dataset.is_real then false else true in
let plot_merged_results = plot_merge_results ~plot_all_sites ~res_by_tools ~tsv ~faa ~tree in
let plot_merged_results = plot_merge_results ~plot_all_sites ~res_by_tools ~tsv ~faa ~tree () in
let model_prefix = dataset.model_prefix in
let tree_prefix = dataset.tree_prefix in
{model_prefix; tree_prefix; dataset; res_by_tools ; merged_results ; plot_merged_results}
......@@ -260,17 +260,16 @@ let simulation_main ~outdir ?(ns = 0) ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn
Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo
let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~preview ~fast_mode ~no_Ne ~no_HaPC ~tree_dir ~profile_fn ~use_concat ~only_simu () =
(* simulated trees *)
let trees = Array.to_list @@ Sys.readdir tree_dir in
let simu_dataset_l = derive_sim ~tree_dir ~trees ~profile_fn ~preview ~use_concat ~ns ~no_Ne ~no_HaPC in
let post_analyses_simu = Post_analyses.post_analyses_simu_of_simu_dataset_l ~simu_dataset_l in
let repo_of_post_analyses_simu = Post_analyses.repo_of_post_analyses_simu ~post_analyses_simu in
let repo_per_tree = List.map trees ~f:(fun tree ->
let repo_per_tree_simu = List.map trees ~f:(fun tree ->
let trees = [tree] in
let tree_prefix = Filename.chop_extension tree in
let indir_dataset_l = if indir = "" then [] else parse_input_data indir in
let dataset_l =
derive_sim ~tree_dir ~trees ~profile_fn ~preview ~use_concat ~ns ~no_Ne ~no_HaPC
@ indir_dataset_l in
derive_sim ~tree_dir ~trees ~profile_fn ~preview ~use_concat ~ns ~no_Ne ~no_HaPC in
let dataset_results_l =
if only_simu then
[]
......@@ -285,10 +284,23 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe
] |> List.concat
in
repo_per_tree
)
|> List.concat
) |> List.concat
in
(* real trees *)
let indir_dataset_l = if indir = "" then [] else parse_input_data indir in
let dataset_l = indir_dataset_l in
let dataset_results_l =
if only_simu then
[]
else
derive_det ~dataset_l ~preview ~fast_mode
in
let repo_real_trees = [
Dataset.repo dataset_l ~preview ;
repo_of_dataset_results_l ~dataset_results_l ;
] |> List.concat
in
let repo = repo_of_post_analyses_simu @ repo_per_tree in
let repo = repo_of_post_analyses_simu @ repo_real_trees @ repo_per_tree_simu in
Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo
let simulation_command =
......
......@@ -14,15 +14,24 @@ type t_choices = {
t_choices_plot: text_file workflow ;
}
type auto_t_plot = {
tree_prefix:string ;
model_prefix:string ;
auto_t_plot: svg workflow;
}
type post_analyses_res = {
t_choices : t_choices option;
auto_t_plot_l : auto_t_plot list option;
}
type simu_infos = {
simu_infos: text_file workflow option ;
model_prefix: string ;
tree_prefix: string ;
}
type post_analyses_res = {
t_choices : t_choices option;
}
type post_analyses_simu = {
simu_infos_l : simu_infos list;
simu_infos_plot : text_file workflow ;
......@@ -100,6 +109,27 @@ let get_t_choices ~(dataset_results_l: dataset_res list) : t_choices option =
Some {t_choices_max; t_choices_complete ; t_choices_plot}
| _ -> None
let plot_det_meth_res_auto_t ~t_choices ~dataset_results_l =
match t_choices with
| None -> None
| Some w -> Some (
List.map dataset_results_l ~f:(fun (dataset_results : dataset_res) ->
let tree_prefix = dataset_results.tree_prefix in
let model_prefix = dataset_results.model_prefix in
let ready_dataset = dataset_results.dataset.dataset in
let tree = Tree_dataset.tree ready_dataset.tree_dataset `Detection in
let faa = ready_dataset.faa in
let tsv = dataset_results.merged_results in
let res_by_tools = dataset_results.res_by_tools in
let plot_all_sites = true in
let t_choices = w.t_choices_max in
let auto_t_plot = plot_merge_results ~t_choices ~plot_all_sites ~res_by_tools ~tree ~faa ~tsv () in
{tree_prefix; model_prefix ; auto_t_plot}
)
)
let get_simu_infos ~(dataset:Dataset.t) =
let model_prefix = dataset.model_prefix in
let ready_dataset = dataset.dataset in
......@@ -113,7 +143,8 @@ let get_simu_infos ~(dataset:Dataset.t) =
let post_analyses_res_of_dataset_results_l ~dataset_results_l =
let t_choices = get_t_choices ~dataset_results_l in
{t_choices}
let auto_t_plot_l = plot_det_meth_res_auto_t ~t_choices ~dataset_results_l in
{t_choices; auto_t_plot_l}
let post_analyses_simu_of_simu_dataset_l ~simu_dataset_l =
let simu_infos_l = List.map simu_dataset_l ~f:(fun dataset ->
......@@ -143,12 +174,27 @@ let repo_of_post_analyses_simu ~post_analyses_simu =
] |> List.concat
let repo_of_post_analyses_res ~prefix ~post_analyses_res =
match post_analyses_res.t_choices with
| None -> []
| Some w ->
Repo.[
item [prefix ^ ".t_choices.max_mcc_per_meth.tsv"] w.t_choices_max ;
item [prefix ^ ".t_choices.complete.tsv"] w.t_choices_complete ;
item [prefix ^ ".t_choices.pdf"] w.t_choices_plot ;
] |> Repo.shift "t_choices"
[
(match post_analyses_res.t_choices with
| None -> []
| Some w ->
Repo.[
item [prefix ^ ".t_choices.max_mcc_per_meth.tsv"] w.t_choices_max ;
item [prefix ^ ".t_choices.complete.tsv"] w.t_choices_complete ;
item [prefix ^ ".t_choices.pdf"] w.t_choices_plot ;
] |> Repo.shift "t_choices"
);
(
match post_analyses_res.auto_t_plot_l with
| None -> []
| Some w_l ->
List.map w_l ~f:(fun w ->
Repo.[
let prefix_f = w.tree_prefix ^ "@" ^ w.model_prefix in
item [ prefix_f ^ ".auto_t.svg"] w.auto_t_plot ;
]
)|> List.concat
|> Repo.shift "auto_t_pdf"
);
] |> List.concat
......@@ -59,6 +59,9 @@ parser.add_argument('-meth', dest="methods_to_be_plotted", type=str,
parser.add_argument('-t', dest="threshold_by_method", type=str,
metavar="\"meth1:0.85,meth3:70\"", help="threshold to filter site by method in the table file (default:0.99 or 99)",
default=None)
parser.add_argument('--t_tsv', dest="threshold_by_method_tsv", type=str,
metavar="\"t_choices.tsv\"", help="tabular file containing thresholds to filter site by method in the table file (default:none)",
default=None)
parser.add_argument('--all_sites', dest="plot_all_sites", action="store_true",
help="Plot all sites (default:False)",
default=False)
......@@ -83,12 +86,19 @@ methods_to_be_plotted = args.methods_to_be_plotted
if methods_to_be_plotted:
methods_to_be_plotted = [m for m in methods_to_be_plotted.split(",") if m]
MESSAGE("Methods to be plotted: "+", ".join([param(meth) for meth in methods_to_be_plotted]))
threshold_by_method = args.threshold_by_method
dic_threshold_by_method = {}
if threshold_by_method:
dic_threshold_by_method = {mt.split(":")[0]:float(mt.split(":")[1]) for mt in threshold_by_method.split(",") if mt}
MESSAGE("Threshold by method: "+", ".join([(param(key)+": "+param(value)) for key,value in dic_threshold_by_method.items()]))
threshold_by_method_tsv = args.threshold_by_method_tsv
if threshold_by_method_tsv:
MESSAGE("threshold tabular file is "+param(threshold_by_method_tsv))
df_threshold_by_method = pd.read_table(threshold_by_method_tsv)
dic_threshold_by_method = df_threshold_by_method.set_index('methode').to_dict()['threshold']
MESSAGE("Threshold by method: "+", ".join([(param(key)+": "+param(value)) for key,value in dic_threshold_by_method.items()]))
#===================================================================================================
STEP("Reading input files:")
......@@ -118,6 +128,7 @@ if not methods_to_be_plotted:
MESSAGE("Computing thresholds to filter sites:")
methode_scales = {}
for meth in methods_to_be_plotted:
max_meth = max(result_table[meth])
if max_meth > 1:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment