open Core open Reviewphiltrans open Bistro_utils let first_nhx_in_dir dir = Sys.readdir dir |> Array.find_exn ~f:(String.is_suffix ~suffix:".nhx") let sw b x = if b then Some x else None let realdata_main ~use_diffsel ~use_pcoc ~use_pcoc_c60 ~use_pcoc_gamma ~use_pcoc_v2 ~use_pcoc_pcp ~use_tdg09 ~use_topological ~use_identical ~no_use_multinomial ~calc_dnds ~calc_gene_trees ~indir ~outdir ~np ~mem () = let loggers = [ Console_logger.create () ; ] in let mem = Option.map mem ~f:(fun i -> `GB i) in let rd = Real_dataset.make ~alignment_dir_path:(Filename.concat indir "Alignments") ~tree_path:(Filename.concat indir (first_nhx_in_dir indir)) in let use_multinomial = not no_use_multinomial in let meths = List.filter_opt [ sw use_diffsel `Diffsel ; sw use_pcoc `Pcoc ; sw use_pcoc_c60 `Pcoc_C60 ; sw use_pcoc_gamma `Pcoc_gamma ; sw use_pcoc_v2 `PCOC_v2 ; sw use_pcoc_pcp `PCOC_pcp ; sw use_tdg09 `Tdg09 ; sw use_topological `Topological ; sw use_identical `Identical ; sw use_multinomial `Multinomial ; ] in let pal = List.filter_opt [ sw calc_dnds `DnDs; sw calc_gene_trees `GeneTree; ] in List.concat [ Repo.shift "Merged_results" (Real_dataset.repo meths rd) ; Repo.shift "PreParsed_Dataset" (Real_dataset.repo_parsed_rd pal rd); ] |> Bistro_utils.Repo.build_main ~outdir ~loggers ?np ?mem let realdata_command = let open Command.Let_syntax in Command.basic ~summary:"Run pipeline on real data" [%map_open let outdir = flag "--outdir" (required string) ~doc:"PATH Output directory" and indir = flag "--indir" (required string) ~doc:"PATH Input directory" and use_diffsel = flag "--diffsel" no_arg ~doc:" use the diffsel method (very slow)." and use_pcoc = flag "--pcoc" no_arg ~doc:" use the pcoc method (slow)." and use_pcoc_c60 = flag "--pcoc-c60" no_arg ~doc:" use the pcoc method with c60 profils (very_slow)." and use_pcoc_gamma = flag "--pcoc-gamma" no_arg ~doc:" use the pcoc method with the gamma option (very_slow)." and use_pcoc_v2 = flag "--pcoc-v2" no_arg ~doc:" use the pcoc v2 method with the C10 profiles (slow)." and use_pcoc_pcp = flag "--pcoc-pcp" no_arg ~doc:" use the pcoc v2 method with the physico-chemical profiles (slow)." and use_tdg09 = flag "--tdg09" no_arg ~doc:" use the tdg09 method (slow)." and use_topological = flag "--topological" no_arg ~doc:" use the topological method (fast)." and use_identical = flag "--identical" no_arg ~doc:" use the identical method (fast)." and no_use_multinomial = flag "--no-multinomial" no_arg ~doc:" not use the multinomial method (very fast so by default)." and calc_dnds = flag "--dnds" no_arg ~doc:" calculate dn ds dnds trees (slow)." and calc_gene_trees = flag "--gt" no_arg ~doc:" calculate gene trees (slow)." and np = flag "--np" (optional int) ~doc:"INT Number of available processors" and mem = flag "--mem" (optional int) ~doc:"INT Available memory (in GB)" in realdata_main ~use_diffsel ~use_pcoc ~use_pcoc_c60 ~use_pcoc_gamma ~use_pcoc_v2 ~use_pcoc_pcp ~use_tdg09 ~use_topological ~use_identical ~no_use_multinomial ~calc_dnds ~calc_gene_trees ~indir ~outdir ~np ~mem ] let () = Command.group ~summary:"Reviewphiltrans" [ "validation", Pipeline.validation_command ; "realdata", realdata_command ; "alistats", Alistats.command ; ] |> Command.run