open Core open Bistro.Shell_dsl open Bistro type result = [ | `Pcoc of [`pcoc] dworkflow | `Pcoc_gamma of [`pcoc] dworkflow | `Pcoc_C60 of [`pcoc] dworkflow | `Diffsel of [`diffsel] dworkflow | `Identical_LG of [`identical] dworkflow | `Identical_WAG of [`identical] dworkflow | `Topological_LG of [`topological] dworkflow | `Topological_WAG of [`topological] dworkflow | `Tdg09 of [`tdg09] dworkflow | `Multinomial of text_file pworkflow | `Msd of [`msd] dworkflow * float ] let meth_string_of_result = function | `Pcoc _ -> "pcoc" | `Pcoc_gamma _ -> "pcoc_gamma" | `Pcoc_C60 _ -> "pcoc_gamma" | `Diffsel _ -> "diffsel" | `Identical_LG _ -> "identical_LG" | `Identical_WAG _ -> "identical_WAG" | `Topological_LG _ -> "topological_LG" | `Topological_WAG _ -> "topological_WAG" | `Tdg09 _ -> "tdg09" | `Multinomial _ -> "multinomial" | `Msd (_, e) -> sprintf "msd_%f" e type dataset_res = { model_prefix : string ; tree_prefix : string ; dataset : Dataset.t ; res_by_tools: result list ; merged_results : text_file pworkflow ; plot_merged_results : svg pworkflow ; } let merge_results ?fna_infos ~(res_by_tools : result list) () : text_file pworkflow = let command = List.map res_by_tools ~f:(fun res -> let w = match res with | `Pcoc d -> Pcoc.results d | `Pcoc_gamma d -> Pcoc.results d | `Pcoc_C60 d -> Pcoc.results d | `Diffsel d -> Diffsel.selector d | `Identical_LG d -> Identical.results d | `Identical_WAG d -> Identical.results d | `Topological_LG d -> Topological.results d | `Topological_WAG d -> Topological.results d | `Tdg09 d -> Tamuri.results d | `Multinomial d -> d | `Msd (d, _) -> Msd.results d in let opt = match res with | `Pcoc _ -> string "--pcoc" | `Pcoc_gamma _ -> string "--pcoc_gamma" | `Pcoc_C60 _ -> string "--pcoc_C60" | `Diffsel _ -> string "--diffsel" | `Identical_LG _ -> string "--identical_LG" | `Identical_WAG _ -> string "--identical_WAG" | `Topological_LG _ -> string "--topological_LG" | `Topological_WAG _ -> string "--topological_WAG" | `Tdg09 _ -> string "--tdg09" | `Multinomial _ -> string "--multinomial" | `Msd (_, e) -> string (sprintf "--msd %f" e) in seq ~sep:" " [opt; dep w] ) in Workflow.shell ~descr:"convergence_detection.merge_results" [ cmd "python" ~img:Env.env_py [ file_dump (string Scripts.merge_det_results) ; opt "-o" ident dest ; seq ~sep:" " command ; option (opt "--fna_infos" dep) fna_infos; ] ; ] let merge_result_tables ?fna_infos ?oracle ?multinomial ?tdg09 ?diffsel ?diffseldsparse () : text_file pworkflow = Workflow.shell ~descr:"convergence_detection.merge_results" [ cmd "python" ~img:Env.env_py [ file_dump (string Scripts.merge_det_results) ; opt "-o" ident dest ; option (opt "--multinomial" dep) multinomial ; option (opt "--tdg09" dep) tdg09 ; option (opt "--diffsel" dep) diffsel ; option (opt "--diffseldsparse" dep) diffseldsparse ; option (opt "--oracle" dep) oracle ; option (opt "--fna_infos" dep) fna_infos; ] ; ] let plot_merge_results ?t_choices ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv (): svg pworkflow = let img = Pcoc.img in (* use of pcoc env due to its working X server for dra plot with ete3 *) let meths = List.map res_by_tools ~f:(fun res -> let opt = match res with | `Pcoc _ -> "PCOC,PC,OC" | `Pcoc_gamma _ -> "PCOC_gamma,PC_gamma,OC_gamma," | `Pcoc_C60 _ -> "PCOC_C60,PC_C60,OC_C60," | `Diffsel _ -> "Diffsel_mean,Diffsel_max" | `Identical_LG _ -> "Identical_LG08" | `Identical_WAG _ -> "Identical_WAG01" | `Topological_LG _ -> "Topological_LG08" | `Topological_WAG _ -> "Topological_WAG01" | `Tdg09 _ -> "Tdg09_1MinusFDR,Tdg09_1MinusLRT,Tdg09_prob_post" | `Multinomial _ -> "Mutinomial_1MinusLRT" | `Msd _ -> "Msd_0.05_1MinusP" in string opt ) |> seq ~sep:"," in let default_t = List.map res_by_tools ~f:(fun res -> let opt = match res with | `Pcoc _ -> "PCOC:0,PC:0,OC:0" | `Pcoc_gamma _ -> "PCOC_gamma:0,PC_gamma:0,OC_gamma:0" | `Pcoc_C60 _ -> "PCOC_C60:0,PC_C60:0,OC_C60:0" | `Diffsel _ -> "Diffsel_mean:0,Diffsel_max:0" | `Identical_LG _ -> "Identical_LG08:0" | `Identical_WAG _ -> "Identical_WAG01:0" | `Topological_LG _ -> "Topological_LG08:0" | `Topological_WAG _ -> "Topological_WAG01:0" | `Tdg09 _ -> "Tdg09_1MinusFDR:0,Tdg09_prob_post:0,Tdg09_1MinusLRT:0" | `Multinomial _ -> "Mutinomial_1MinusLRT:0" | `Msd _ -> "Msd_0.05_1MinusP:0" in string opt ) |> seq ~sep:"," in let meths_t = match t_choices with | Some _ -> None | None -> Some default_t in let out = dest // "results.svg" in let inner = Workflow.shell ~descr:"convergence_detection.plot_results" [ within_container img ( and_list [ mkdir_p dest ; cmd "python" [ Utils.script_dump Scripts.[ diffsel_script_utils ; plot_data ; plot_convergent_sites ] ; opt "-msa" dep faa ; opt "-tsv" dep tsv ; opt "-tree" dep tree ; opt "-out" ident out ; opt "-meth" ident meths ; option (opt "-t" ident) meths_t ; option (opt "--t_tsv" dep) t_choices ; flag string "--all_sites" plot_all_sites ; ] ] ) ] in Workflow.select inner ["results.svg"] let plot_convergent_sites ?(plot_all_sites = true) ~alignment ~detection_results ~tree () = Workflow.shell ~descr:"plot_convergent_sites.py" [ within_container Pcoc.img ( and_list [ mkdir_p dest ; cmd "python" [ Utils.script_dump Scripts.[ diffsel_script_utils ; plot_data ; plot_convergent_sites ] ; opt "-tsv" dep detection_results ; opt "-msa" dep alignment ; opt "-tree" dep tree ; opt "-out" ident (dest // "plot.svg") ; flag string "--all_sites" plot_all_sites ; ] ] ) ] |> Fn.flip Workflow.select ["plot.svg"] let recall_precision_curve table = Workflow.shell ~descr:"recall_precision_curve" [ cmd "Rscript" ~img:Env.env_r [ file_dump (string Scripts.recall_precision_curve) ; dep table ; dest ; ] ; ] let%pworkflow oracle ~n_h0 ~n_ha = let n_h0 = [%param n_h0] in let n_ha = [%param n_ha] in "Sites\tOracle" :: (List.init n_h0 ~f:(fun i -> sprintf "%d\t0" (i + 1))) @ (List.init n_ha ~f:(fun i -> sprintf "%d\t1" (n_h0 + i + 1))) |> Out_channel.write_lines [%dest]