open Core open Bistro.Std open Bistro.EDSL open Bistro_bioinfo.Std open Bistro_utils open File_formats open Convergence_detection type plot_trees type post_analyses_dir type sens_spe_t_choices_plot type t_choices = { t_choices_complete: text_file workflow ; t_choices_max: text_file workflow ; t_choices_recall09: text_file workflow ; t_choices_plot: text_file workflow ; t_choices_condensed_plot: text_file workflow ; tree_prefix: string; } type auto_t_plot = { tree_prefix:string ; model_prefix:string ; auto_t_plot: svg workflow; } type post_analyses_res = { t_choices : t_choices option; auto_t_plot_l : auto_t_plot list option; dataset_results_l : dataset_res list; } type simu_infos = { simu_infos: text_file workflow option ; model_prefix: string ; tree_prefix: string ; } type reinfered_tree = { reinfered_tree : nw workflow ; input_tree : nhx workflow ; tree_prefix : string ; model_prefix : string ; } type post_analyses_simu = { simu_infos_l : simu_infos list; simu_infos_plot : text_file workflow ; trees_plot : text_file workflow ; } let r_env = docker_image ~account:"carinerey" ~name:"r_basics" ~tag:"08012018" () let is_hyp ~hyp (dataset_results :dataset_res) = let model_prefix = dataset_results.model_prefix in model_prefix = hyp let build_cmd_t_choices (opt_name : string) mr_option = match mr_option with | Some x -> [opt opt_name dep x] | None -> [] let make_t_choices ~h0_mr ~h0_NeBig_mr ~h0_NeSmall_mr ~haPCOC_mr ~haPC_mr ~haPC_NeBig_mr ~haPC_NeSmall_mr ~h0_NeBigInSmall_mr ~h0_NeSmallInBig_mr ~haPC_NeBigInSmall_mr ~haPC_NeSmallInBig_mr () : post_analyses_dir directory workflow = let env = r_env in let out = dest // "out" in let cmd_mr = List.map [ ("--H0" , h0_mr ); ("--H0NeBig" , h0_NeBig_mr ); ("--H0NeSmall" , h0_NeSmall_mr ); ("--H0NeBigInSmall" , h0_NeBigInSmall_mr ); ("--H0NeSmallInBig" , h0_NeSmallInBig_mr ); ("--HaPCOC" , haPCOC_mr ); ("--HaPC" , haPC_mr ); ("--HaPCNeBig" , haPC_NeBig_mr ); ("--HaPCNeSmall" , haPC_NeSmall_mr); ("--HaPCNeBigInSmall" , haPC_NeBigInSmall_mr ); ("--HaPCNeSmallInBig" , haPC_NeSmallInBig_mr); ] ~f:(fun (opt_name, mr_option) -> build_cmd_t_choices opt_name mr_option) |> List.concat in workflow ~descr:"post_analyses.t_choices" [ docker env ( and_list [ mkdir_p dest ; cmd "Rscript" ([ [file_dump (string Scripts.calc_t_per_meth) ; opt "--out " ident out; ] ; cmd_mr ; ] |> List.concat) ; ]) ] let make_simu_infos ?(descr="") ?(fna_infos) ~faa ~tree_sc : text_file workflow = let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"07022018" () in workflow ~descr:("post_analyses.simu_infos." ^ descr) [ cmd "python" ~env [ file_dump (string Scripts.calc_simu_infos) ; opt "--faa" dep faa; opt "--tree" dep tree_sc; option ( opt "--fna_infos" dep) fna_infos; opt "--output " ident dest; ]; ] let group_simu_infos ~simu_infos_l : simu_infos directory workflow = let env = r_env in let cmd_cp_l = List.map simu_infos_l ~f:(fun s -> match s.simu_infos with | Some w -> [cmd "cp" [dep w ; tmp // (s.tree_prefix ^"@"^ s.model_prefix ^ ".tsv")]] | None -> [] ) |> List.concat in let out = dest // "out" in workflow ~descr:"post_analyses.plot_simu_infos" [ docker env ( and_list ([ [mkdir_p dest]; [mkdir_p tmp ]; cmd_cp_l; [cmd "Rscript" [ file_dump (string Scripts.plot_hyp_simu_validation) ; opt "--input_dir" ident tmp; opt "--out " ident out; ];] ] |> List.concat) ) ] let plot_trees ~reinfered_tree_l : plot_trees directory workflow = let env = r_env in let cmd_cp_l = List.map reinfered_tree_l ~f:(fun rt -> [ cmd "cp" [dep rt.reinfered_tree ; tmp // (rt.tree_prefix ^"@"^ rt.model_prefix ^ ".nw")]; cmd "cp" [dep rt.input_tree ; tmp // (rt.tree_prefix ^"@input_tree.nw")] ]) |> List.concat in let out = dest // "out" in workflow ~descr:"post_analyses.plot_trees" [ docker env ( and_list ([ [mkdir_p dest]; [mkdir_p tmp ]; cmd_cp_l; [cmd "Rscript" [ file_dump (string Scripts.plot_trees) ; opt "--input_dir" ident tmp; opt "--out " ident out; ];] ] |> List.concat) ) ] let get_merged_results_opt hx = match hx with | Some w -> Some w.merged_results | None -> None type res_all_hyp = { h0_res : dataset_res option ; h0_NeBig_res : dataset_res option ; h0_NeSmall_res : dataset_res option ; ha_PC_res : dataset_res option ; ha_PCOC_res : dataset_res option ; ha_PC_NeBig_res : dataset_res option ; ha_PC_NeSmall_res : dataset_res option ; h0_NeBigInSmall_res : dataset_res option ; h0_NeSmallInBig_res : dataset_res option ; ha_PC_NeBigInSmall_res : dataset_res option ; ha_PC_NeSmallInBig_res : dataset_res option ; } let make_t_choices_per_couple {h0_res; h0_NeBig_res; h0_NeSmall_res; ha_PC_res; ha_PCOC_res; ha_PC_NeBig_res ; ha_PC_NeSmall_res; h0_NeBigInSmall_res; h0_NeSmallInBig_res; ha_PC_NeBigInSmall_res; ha_PC_NeSmallInBig_res} = let h0_mr = get_merged_results_opt h0_res in let h0_NeBig_mr = get_merged_results_opt h0_NeBig_res in let h0_NeSmall_mr = get_merged_results_opt h0_NeSmall_res in let h0_NeBigInSmall_mr = get_merged_results_opt h0_NeBigInSmall_res in let h0_NeSmallInBig_mr = get_merged_results_opt h0_NeSmallInBig_res in let haPCOC_mr = get_merged_results_opt ha_PCOC_res in let haPC_mr = get_merged_results_opt ha_PC_res in let haPC_NeBig_mr = get_merged_results_opt ha_PC_NeBig_res in let haPC_NeSmall_mr = get_merged_results_opt ha_PC_NeSmall_res in let haPC_NeBigInSmall_mr = get_merged_results_opt ha_PC_NeBigInSmall_res in let haPC_NeSmallInBig_mr = get_merged_results_opt ha_PC_NeSmallInBig_res in make_t_choices ~h0_mr ~h0_NeBig_mr ~h0_NeSmall_mr ~haPCOC_mr ~haPC_mr ~haPC_NeBig_mr ~haPC_NeSmall_mr ~h0_NeBigInSmall_mr ~h0_NeSmallInBig_mr ~haPC_NeBigInSmall_mr ~haPC_NeSmallInBig_mr () let get_t_choices ~(dataset_results_l: dataset_res list) : t_choices option = let h0_res = List.find dataset_results_l (is_hyp ~hyp: "H0" ) in let h0_NeBig_res = List.find dataset_results_l (is_hyp ~hyp: "H0_NeBig" ) in let h0_NeSmall_res = List.find dataset_results_l (is_hyp ~hyp: "H0_NeSmall" ) in let h0_NeBigInSmall_res = List.find dataset_results_l (is_hyp ~hyp: "H0_BigNeInSmallNe") in let h0_NeSmallInBig_res = List.find dataset_results_l (is_hyp ~hyp: "H0_SmallNeInBigNe") in let ha_PCOC_res = List.find dataset_results_l (is_hyp ~hyp: "HaPCOC" ) in let ha_PC_res = List.find dataset_results_l (is_hyp ~hyp: "HaPC" ) in let ha_PC_NeBig_res = List.find dataset_results_l (is_hyp ~hyp: "HaPC_NeBig" ) in let ha_PC_NeSmall_res = List.find dataset_results_l (is_hyp ~hyp: "HaPC_NeSmall" ) in let ha_PC_NeBigInSmall_res = List.find dataset_results_l (is_hyp ~hyp: "HaPC_BigNeInSmallNe") in let ha_PC_NeSmallInBig_res = List.find dataset_results_l (is_hyp ~hyp: "HaPC_SmallNeInBigNe") in match (h0_res, ha_PCOC_res) with | (Some h0, Some _) -> let t_choices_dir = make_t_choices_per_couple {h0_res; h0_NeBig_res; h0_NeSmall_res; ha_PC_res; ha_PCOC_res; ha_PC_NeBig_res ; ha_PC_NeSmall_res; h0_NeBigInSmall_res; h0_NeSmallInBig_res; ha_PC_NeBigInSmall_res; ha_PC_NeSmallInBig_res} in let t_choices_max = t_choices_dir / selector ["out.max_MCC_per_meth.tsv"] in let t_choices_recall09 = t_choices_dir / selector ["out.recall09_per_meth.tsv"] in let t_choices_complete = t_choices_dir / selector ["out.complete.tsv"] in let t_choices_plot = t_choices_dir / selector ["out.pdf"] in let t_choices_condensed_plot = t_choices_dir / selector ["out_condensed.pdf"] in let tree_prefix = h0.tree_prefix in Some {t_choices_max; t_choices_recall09; t_choices_complete ; t_choices_plot; t_choices_condensed_plot; tree_prefix} | _ -> None let plot_det_meth_res_auto_t ~t_choices ~dataset_results_l = match t_choices with | None -> None | Some w -> Some ( List.map dataset_results_l ~f:(fun (dataset_results : dataset_res) -> let tree_prefix = dataset_results.tree_prefix in let model_prefix = dataset_results.model_prefix in let ready_dataset = dataset_results.dataset.dataset in let tree = Tree_dataset.tree ready_dataset.tree_dataset `Detection in let faa = ready_dataset.faa in let tsv = dataset_results.merged_results in let res_by_tools = dataset_results.res_by_tools in let plot_all_sites = true in let t_choices = w.t_choices_max in let auto_t_plot = plot_merge_results ~t_choices ~plot_all_sites ~res_by_tools ~tree ~faa ~tsv () in {tree_prefix; model_prefix ; auto_t_plot} ) ) let get_simu_infos ~(dataset:Dataset.t) = let model_prefix = dataset.model_prefix in let ready_dataset = dataset.dataset in let faa = ready_dataset.faa in let tree_sc = Tree_dataset.tree ready_dataset.tree_dataset `Detection in let fna_infos = ready_dataset.fna_infos in match fna_infos with | Some w -> Some (make_simu_infos ~descr:model_prefix ~faa ~tree_sc ~fna_infos:w) | None -> None (*make_simu_infos ~faa ~tree_sc*) let post_analyses_res_of_dataset_results_l ~dataset_results_l = let t_choices = get_t_choices ~dataset_results_l in let auto_t_plot_l = plot_det_meth_res_auto_t ~t_choices ~dataset_results_l in {t_choices; auto_t_plot_l;dataset_results_l} let post_analyses_simu_of_simu_dataset_l ~simu_dataset_l = let simu_infos_l = List.map simu_dataset_l ~f:(fun dataset -> {simu_infos = (get_simu_infos ~dataset); tree_prefix = dataset.tree_prefix ; model_prefix = dataset.model_prefix } ) in let reinfered_tree_l = List.map simu_dataset_l ~f:(fun dataset -> let rd = dataset.dataset in let phy = Bppsuite.fa2phy rd.fna in let input_tree = rd.input_tree in let reinfered_tree = Phyml.phyml_tree ~tree:input_tree phy in {reinfered_tree; input_tree; tree_prefix = dataset.tree_prefix ; model_prefix = dataset.model_prefix } ) in let simu_infos_plot = group_simu_infos ~simu_infos_l / selector ["out.pdf"] in let trees_plot = plot_trees ~reinfered_tree_l / selector ["out.pdf"] in {simu_infos_l; simu_infos_plot; trees_plot} let plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile_prefix : sens_spe_t_choices_plot directory workflow = let env = r_env in let t_choices_dir = tmp // "t_choices_dir" in let merged_results_dir = tmp // "merged_results_dir" in let out = dest // "out" in let cmd_cp_t_choices_l = List.map t_choices_l ~f:(fun t_choices -> cmd "cp" [dep t_choices.t_choices_max ; t_choices_dir // (t_choices.tree_prefix ^ ".tsv")] ) in let cmd_cp_merged_results_l = List.map dataset_results_l ~f:(fun dataset_results -> cmd "cp" [dep dataset_results.merged_results ; merged_results_dir // (dataset_results.tree_prefix ^"@"^ dataset_results.model_prefix ^ ".tsv")] ) in workflow ~descr:"post_analyses.plot_sens_spe_all_trees" [ docker env ( and_list ([ [mkdir_p dest]; [mkdir_p t_choices_dir ]; [mkdir_p merged_results_dir ]; cmd_cp_t_choices_l; cmd_cp_merged_results_l; [cmd "Rscript" [ file_dump (string Scripts.plot_sens_spe_all_trees) ; opt "--input_dir" ident t_choices_dir; opt "--input_dir2" ident merged_results_dir; opt "--profil" string profile_prefix; opt "--out " ident out; ];] ] |> List.concat) ) ] let repo_post_analyses_all_trees_of_all_post_analyses_per_tree ~profile_prefix ~all_post_analyses_per_tree = let t_choices_l = List.map all_post_analyses_per_tree ~f:(fun post_analyses_res -> match post_analyses_res.t_choices with | Some w -> [w] | None -> [] ) |> List.concat in let dataset_results_l = List.map all_post_analyses_per_tree ~f:(fun post_analyses_res -> post_analyses_res.dataset_results_l) |> List.concat in let sens_spe_t_choices_plot = plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile_prefix in Repo.[ item ["sens_spe.pdf"] (sens_spe_t_choices_plot / selector ["out.sens_spe_auto_t.pdf"]); item ["all_t_choices.pdf"] (sens_spe_t_choices_plot / selector ["out.max_t_per_tree.pdf"]); item ["sens_spe.tsv"] (sens_spe_t_choices_plot / selector ["out.sens_spe_auto_t.tsv"]); item ["all_t_choices.tsv"] (sens_spe_t_choices_plot / selector ["out.max_t_per_tree.tsv"]); ] let repo_of_post_analyses_simu ~post_analyses_simu = [ Repo.[ item ["hypothesis_validation.pdf"] post_analyses_simu.simu_infos_plot ; item ["trees_validation.pdf"] post_analyses_simu.trees_plot; ] |> Repo.shift "simu_infos" ; (List.map post_analyses_simu.simu_infos_l ~f:(fun simu_infos -> match simu_infos.simu_infos with | None -> [] | Some w -> Repo.[ item [simu_infos.tree_prefix ^ "@" ^ simu_infos.model_prefix ^ ".tsv"] w ] |> Repo.shift "simu_infos" ) |> List.concat ); ] |> List.concat let repo_of_post_analyses_res ~prefix ~post_analyses_res = [ (match post_analyses_res.t_choices with | None -> [] | Some w -> Repo.[ item [prefix ^ ".t_choices.max_mcc_per_meth.tsv"] w.t_choices_max ; item [prefix ^ ".t_choices.recall09_per_meth.tsv"] w.t_choices_recall09 ; item [prefix ^ ".t_choices.complete.tsv"] w.t_choices_complete ; item [prefix ^ ".t_choices.pdf"] w.t_choices_plot ; item [prefix ^ ".t_choices.condensed.pdf"] w.t_choices_condensed_plot ; ] |> Repo.shift "t_choices" ); (*( match post_analyses_res.auto_t_plot_l with | None -> [] | Some w_l -> List.map w_l ~f:(fun w -> Repo.[ let prefix_f = w.tree_prefix ^ "@" ^ w.model_prefix in item [ prefix_f ^ ".auto_t.svg"] w.auto_t_plot ; ] )|> List.concat |> Repo.shift "auto_t_pdf" );*) ] |> List.concat