open Core open Bistro_utils open Bistro.EDSL open Bistro.Std open File_formats open Defs open Convergence_detection open Profile let parse_input_data ~seed indir = let datasets = Array.to_list @@ Sys.readdir indir in List.map datasets ~f:(fun dataset_prefix -> let files = Array.to_list @@ Sys.readdir (Filename.concat indir dataset_prefix ) in if List.length files = 2 then let h_file = List.hd files in let h_file = match h_file with | Some s -> s | None -> "" in let h_file_ext = Filename.split_extension h_file in let t_file = List.nth files 1 in let t_file = match t_file with | Some s -> s | None -> "" in let t_file_ext = Filename.split_extension t_file in let fna, input_tree = match (h_file_ext, t_file_ext) with | ( _ , Some "fna") , ( _ , Some "nhx") -> h_file, t_file | ( _ , Some "nhx"), ( _ , Some "fna") -> t_file, h_file | _ -> failwith ({|Syntax error: extension errors in |} ^ (Filename.concat indir dataset_prefix ) ^ " nhx: " ^ (h_file) ^ " fna: " ^ (t_file)) in let tree_prefix = Filename.chop_extension input_tree in let input_tree = input (Filename.concat indir (Filename.concat dataset_prefix input_tree)) in let fna = input (Filename.concat indir (Filename.concat dataset_prefix fna)) in let fna_infos = None in let raw_dataset = Raw_dataset.{input_tree; fna; fna_infos} in let dataset = {Dataset.model_prefix = tree_prefix; is_real = true; tree_prefix = dataset_prefix; dataset = Ready_dataset.of_raw ~descr:("real_data." ^ tree_prefix) raw_dataset; seed; } in [dataset] else failwith ({|More than 2 files in |} ^ (Filename.concat indir dataset_prefix )) ) |> List.concat let calc_fixed_seed ~(str:string) (seed:int) : int = let str_hash = Hashtbl.hash str in Hashtbl.hash (str_hash + seed) let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed = let model_prefix = Convergence_hypothesis.string_of_model model in let nb_sites = ns in let nodes = Tree_dataset.nodes tree_dataset model in let tree = Tree_dataset.tree tree_dataset `Simulation in let descr = "."^model_prefix^"."^tree_prefix in (* only 1 profile or 1 couple of profiles*) (*let config = Convergence_hypothesis.bpp_config nodes model in let fna = Bppsuite.bppseqgen ~descr ~nb_sites ~tree ~config in *) (* with several profiles or couples of profiles *) let config_p = Convergence_hypothesis.bpp_config_F nodes model in let ne_g = match model with | H0_NeG1 -> 1. | H0_NeG2 -> 2. | H0_NeG3 -> 3. | H0_NeG4 -> 4. | H0_NeG5 -> 5. | HaPCOC -> 1. | HaPC_NeG1-> 1. | HaPC_NeG2-> 2. | HaPC_NeG3-> 3. | HaPC_NeG4-> 4. | HaPC_NeG5-> 5. | HaPC_NeG5_NeC_div2 -> 5. | HaPC_NeG5_NeC_x2 -> 5. | H0_NeG5_NeC_div2 -> 5. | H0_NeG5_NeC_x2 -> 5. | HaPC_NeG1_NeC_5 -> 1. | HaPC_NeG5_NeC_1 -> 5. | H0_NeG1_NeC_5 -> 1. | H0_NeG5_NeC_1 -> 5. | HaPC_NeG1_NeC_4 -> 1. | HaPC_NeG4_NeC_1 -> 4. | H0_NeG1_NeC_4 -> 1. | H0_NeG4_NeC_1 -> 4. in let ne_c = match model with | H0_NeG1 -> ne_g | H0_NeG2 -> ne_g | H0_NeG3 -> ne_g | H0_NeG4 -> ne_g | H0_NeG5 -> ne_g | HaPCOC -> ne_g | HaPC_NeG1-> ne_g | HaPC_NeG2-> ne_g | HaPC_NeG3-> ne_g | HaPC_NeG4-> ne_g | HaPC_NeG5-> ne_g | HaPC_NeG5_NeC_div2 -> ne_g /. 2. | HaPC_NeG5_NeC_x2 -> ne_g *. 2. | H0_NeG5_NeC_div2 -> ne_g /. 2. | H0_NeG5_NeC_x2 -> ne_g *. 2. | HaPC_NeG1_NeC_5 -> 5. | HaPC_NeG5_NeC_1 -> 1. | H0_NeG1_NeC_5 -> 5. | H0_NeG5_NeC_1 -> 1. | HaPC_NeG1_NeC_4 -> 4. | HaPC_NeG4_NeC_1 -> 1. | H0_NeG1_NeC_4 -> 4. | H0_NeG4_NeC_1 -> 1. in let ne_a = match model with | H0_NeG1 -> ne_g | H0_NeG2 -> ne_g | H0_NeG3 -> ne_g | H0_NeG4 -> ne_g | H0_NeG5 -> ne_g | HaPCOC -> ne_g | HaPC_NeG1-> ne_g | HaPC_NeG2-> ne_g | HaPC_NeG3-> ne_g | HaPC_NeG4-> ne_g | HaPC_NeG5-> ne_g | HaPC_NeG5_NeC_div2 -> ne_g | HaPC_NeG5_NeC_x2 -> ne_g | H0_NeG5_NeC_div2 -> ne_g | H0_NeG5_NeC_x2 -> ne_g | HaPC_NeG1_NeC_5 -> ne_g | HaPC_NeG5_NeC_1 -> ne_g | H0_NeG1_NeC_5 -> ne_g | H0_NeG5_NeC_1 -> ne_g | HaPC_NeG1_NeC_4 -> ne_g | HaPC_NeG4_NeC_1 -> ne_g | H0_NeG1_NeC_4 -> ne_g | H0_NeG4_NeC_1 -> ne_g in let profile_f = profile.profile_f in let profile_c = profile.profile_c in (*let seed = Random.int Int.max_value in*) let seed = calc_fixed_seed ~str:descr seed in let run_fna = Bppsuite.bppseqgen_multi_profiles ~descr ~nb_sites ~tree ~config:config_p ~profile_f ~profile_c ~ne_c ~ne_a ~seed in let fna = Bppsuite.bppseqgen_multi_profiles_get_fa run_fna in let fna_infos = Some (Bppsuite.bppseqgen_multi_profiles_get_info run_fna) in let faa = Bppsuite.fna2faa ~fna in let ready_dataset = { Ready_dataset.input_tree = input_tree ; tree_dataset ; fna; faa; fna_infos } in { Dataset.model_prefix; is_real= false; tree_prefix; dataset = ready_dataset; seed } let derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test ~add_indels ~seed = let tree_prefix = Filename.chop_extension tree in let input_tree = input (Filename.concat tree_dir tree) in let tree_dataset = Tree_dataset.prepare ~descr:("simulated_data." ^ tree_prefix) input_tree in let dataset_H0_NeG5 = derive_from_model ~model:H0_NeG5 ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in let dataset_HaPCOC = derive_from_model ~model:HaPCOC ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in let dataset_HaPC_NeG5 = derive_from_model ~model:HaPC_NeG5 ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in let indel_H0_NeG5 = Dataset.add_indels_to_dataset dataset_H0_NeG5 ~seed:(Random.int Int.max_value) in let indel_HaPC_NeG5 = Dataset.add_indels_to_dataset dataset_HaPC_NeG5 ~seed:(Random.int Int.max_value) in let dataset_basis_hyps = [dataset_H0_NeG5; dataset_HaPCOC; dataset_HaPC_NeG5] in let models = Convergence_hypothesis.[ [ H0_NeG1 ; HaPC_NeG1; (*H0_NeG5 ; HaPC_NeG5; HaPCOC ; calculated above in dataset_* *) ]; if preview then [] else [ [(*H0_NeG3 ;HaPC_NeG3; *) H0_NeG4; HaPC_NeG4; H0_NeG2; HaPC_NeG2; ] ; (if no_Ne then [] else [(*HaPC_NeG5_NeC_div2 ; HaPC_NeG5_NeC_x2 ; H0_NeG5_NeC_div2 ; H0_NeG5_NeC_x2 ; HaPC_NeG1_NeC_5 ; HaPC_NeG5_NeC_1 ; H0_NeG1_NeC_5 ; H0_NeG5_NeC_1 ;*) HaPC_NeG1_NeC_4 ; HaPC_NeG4_NeC_1 ; H0_NeG1_NeC_4 ; H0_NeG4_NeC_1 ; ] ); (if ne_test then [ ] else [] ) ] |> List.concat ] |> List.concat in let dataset_per_hypo = List.map models ~f:(fun model -> derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed ) in let _concat_H0HaPCOC = {Dataset.model_prefix="H0_NeG5+HaPCOC"; tree_prefix; is_real = false; dataset = Ready_dataset.paste dataset_H0_NeG5.dataset dataset_HaPCOC.dataset; seed=(dataset_H0_NeG5.seed + dataset_HaPCOC.seed |> Hashtbl.hash) } in let concat_H0HaPC = {Dataset.model_prefix="H0_NeG5+HaPC_NeG5"; tree_prefix; is_real = false; dataset = Ready_dataset.paste dataset_H0_NeG5.dataset dataset_HaPC_NeG5.dataset; seed=(dataset_H0_NeG5.seed + dataset_HaPC_NeG5.seed |> Hashtbl.hash) } in let dataset_concat_hypos = if use_concat then [concat_H0HaPC;] else [] in let dataset_with_indels = if add_indels then [indel_H0_NeG5 ; indel_HaPC_NeG5] else [] in List.concat [ (*dataset_basis_hyps;*) dataset_per_hypo ; dataset_concat_hypos; dataset_with_indels ] let derive_sim ~tree_dir ~trees ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test ~add_indels ~seed = List.map trees ~f:(fun tree -> derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~add_indels ~ns ~no_Ne ~no_HaPC ~ne_test ~seed) |> List.concat let repo_of_detection_result res = let det_meth_prefix = Convergence_detection.meth_string_of_result res in Repo.[ [ match res with | `Pcoc w -> item ["pcoc.results.tsv"] (Pcoc.results w) | `Pcoc_gamma w -> item ["pcoc_gamma.results.tsv"] (Pcoc.results w) | `Pcoc_C60 w -> item ["pcoc_C60.results.tsv"] (Pcoc.results w) | `Diffsel w -> item ["diffsel.results.tsv"] (Diffsel.selector w) | `Identical_LG w -> item ["Identical_LG.results.tsv"] (Identical.results w) | `Identical_WAG w -> item ["Identical_WAG.results.tsv"] (Identical.results w) | `Topological_LG w -> item ["Topological_LG.results.tsv"] (Topological.results w) | `Topological_WAG w -> item ["Topological_WAG.results.tsv"] (Topological.results w) | `Tdg09 w -> item ["Tdg09.results.tsv"] (Tamuri.results w) | `Multinomial w -> item ["Multinomial.results.tsv"] (Multinomial.results w) | `Msd (w, e) -> item [sprintf "Msd.%f.results.tsv" e] (Msd.results w) ] ; [ match res with | `Pcoc w -> item ["raw_results"] w | `Pcoc_gamma w -> item ["raw_results"] w | `Pcoc_C60 w -> item ["raw_results"] w | `Diffsel w -> item ["raw_results"] w | `Identical_LG w -> item ["raw_results"] w | `Identical_WAG w -> item ["raw_results"] w | `Topological_LG w -> item ["raw_results"] w | `Topological_WAG w -> item ["raw_results"] w | `Tdg09 w -> item ["raw_results"] w | `Multinomial w -> item ["raw_results"] w | `Msd (w, _) -> item ["raw_results"] w ] ; match res with | `Diffsel w -> [item ["chain_convergence_checking.html"] ((Diffsel.check_conv w) / selector ["out.html"])] | _ -> [] ] |> List.concat |> Repo.shift det_meth_prefix |> Repo.shift "Detection_tools" let repo_of_dataset_results_l ~dataset_results_l = List.map dataset_results_l ~f:(fun dataset_results -> let det_results_l = dataset_results.res_by_tools in let merged_results = dataset_results.merged_results in let plot_merge_results = dataset_results.plot_merged_results in let model_prefix = dataset_results.model_prefix in let tree_prefix = dataset_results.tree_prefix in let merged_results_item = Repo.item [tree_prefix ^"."^model_prefix^".merged_results.tsv"] merged_results in let plot_merged_results_item = Repo.item [tree_prefix ^"."^model_prefix^".plot_merged_results.svg"] plot_merge_results in let repo = merged_results_item :: plot_merged_results_item :: (List.map det_results_l ~f:repo_of_detection_result |> List.concat) in repo |> Repo.shift dataset_results.model_prefix ) |> List.concat let derive_from_det_meth ~det_meth ~(dataset : Dataset.t) ~preview = let faa = dataset.dataset.faa in let fna = dataset.dataset.fna in let phy_n = Bppsuite.fa2phy ~fna in let tree_sc = Tree_dataset.tree dataset.dataset.tree_dataset `Detection in let tree_id = Tree_dataset.tree dataset.dataset.tree_dataset `Simulation in let diffsel_tree = Tree_dataset.diffsel_tree dataset.dataset.tree_dataset in let tree_conv = Tree_dataset.topological_tree dataset.dataset.tree_dataset in let w_every = if preview then 1 else 1 in let n_cycles = if preview then 10 else 2000 in let seed = Hashtbl.hash dataset.seed in match det_meth with | `Pcoc -> `Pcoc (Pcoc.pcoc ~catx_est:10 ~plot_complete:true ~gamma:false ~faa ~tree:tree_sc) | `Pcoc_gamma -> `Pcoc_gamma (Pcoc.pcoc ~catx_est:10 ~plot_complete: true ~gamma:true ~faa ~tree:tree_sc) | `Pcoc_C60 -> `Pcoc_C60 (Pcoc.pcoc ~catx_est:60 ~plot_complete: true ~gamma:false ~faa ~tree:tree_sc) | `Tdg09 -> `Tdg09 (Tamuri.tdg09 ~faa ~tree:tree_sc) | `Diffsel -> `Diffsel (Diffsel.diffsel ~phy_n ~tree:diffsel_tree ~w_every ~n_cycles ~id:1 ~seed ()) | `Identical_LG -> `Identical_LG (Identical.identical ~faa ~tree_id ~tree_sc ~prot_model:"LG08") | `Identical_WAG -> `Identical_WAG (Identical.identical ~faa ~tree_id ~tree_sc ~prot_model:"WAG01") | `Topological_LG -> `Topological_LG (Topological.topological ~faa ~tree:tree_id ~tree_conv ~prot_model:"LG08") | `Topological_WAG -> `Topological_WAG (Topological.topological ~faa ~tree:tree_id ~tree_conv ~prot_model:"WAG01") | `Multinomial -> `Multinomial (Multinomial.multinomial ~faa ~tree_id ~tree_sc) | `Msd e -> `Msd (Msd.msd ~e ~faa ~tree_sc, e) let derive_from_dataset ~dataset ~preview ~fast_mode= let det_meths = [ ([ `Identical_LG; `Topological_LG; `Multinomial; `Pcoc; ] @ List.map [0.05] (fun x -> `Msd x)) ; if preview then [] else [ `Tdg09; `Pcoc_gamma; `Identical_WAG; `Topological_WAG; ] ; if fast_mode then [] else [`Diffsel; `Pcoc_C60] ; ] |> List.concat in let res_by_tools = List.map det_meths ~f:(fun det_meth -> derive_from_det_meth ~det_meth ~dataset ~preview ) in let fna_infos = dataset.dataset.fna_infos in let merged_results = merge_results ~fna_infos ~res_by_tools () in let tsv = merged_results in let faa = dataset.dataset.faa in let tree = Tree_dataset.tree dataset.dataset.tree_dataset `Detection in let plot_all_sites = if dataset.is_real then false else true in let plot_merged_results = plot_merge_results ~plot_all_sites ~res_by_tools ~tsv ~faa ~tree () in let model_prefix = dataset.model_prefix in let tree_prefix = dataset.tree_prefix in {model_prefix; tree_prefix; dataset; res_by_tools ; merged_results ; plot_merged_results} let derive_det ~dataset_l ~preview ~fast_mode = List.map dataset_l ~f:(fun dataset -> derive_from_dataset ~preview ~dataset ~fast_mode) let derive_profile ?(indir = "") ?(ns = 0) ~preview ~fast_mode ~no_Ne ~ne_test ~no_HaPC ~tree_dir ~profile ~use_concat ~add_indels ~only_simu ~seed () = let trees = Array.to_list @@ Sys.readdir tree_dir in let repo_and_post_analyses_per_tree_simu = List.map trees ~f:(fun tree -> (*to keep together all models per tree*) let trees = [tree] in let tree_prefix = Filename.chop_extension tree in let dataset_l = derive_sim ~tree_dir ~trees ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test ~add_indels ~seed in let dataset_results_l = if only_simu then [] else derive_det ~dataset_l ~preview ~fast_mode in let post_analyses_res = Post_analyses.post_analyses_res_of_dataset_results_l ~tree_prefix ~dataset_results_l in let repo_per_tree = [ Dataset.repo dataset_l ~preview ; Repo.shift "Results_per_hypothesis" (repo_of_dataset_results_l ~dataset_results_l); Post_analyses.repo_of_post_analyses_res ~prefix:tree_prefix ~post_analyses_res; ] |> List.concat |> Repo.shift tree_prefix |> Repo.shift "Results_per_tree" in (repo_per_tree, post_analyses_res, dataset_results_l, dataset_l) ) in let all_repo_per_tree_simu = List.concat_map repo_and_post_analyses_per_tree_simu ~f:(fun (r, _, _, _) -> r) in let all_post_analyses_per_tree = List.map repo_and_post_analyses_per_tree_simu ~f:(fun (_, p, _, _) -> p) in let all_dataset_results = List.concat_map repo_and_post_analyses_per_tree_simu ~f:(fun (_, _, dr, _) -> dr) in let simu_dataset_l = List.concat_map repo_and_post_analyses_per_tree_simu ~f:(fun (_, _, _, d) -> d) in let post_analyses_simu = Post_analyses.post_analyses_simu_of_simu_dataset_l ~simu_dataset_l in let repo_of_post_analyses_simu = Post_analyses.repo_of_post_analyses_simu ~post_analyses_simu in let profile_prefix = profile.profile_n in let repo_post_analyses_all_trees = Post_analyses.repo_post_analyses_all_trees_of_all_post_analyses_per_tree ~all_post_analyses_per_tree ~profile_prefix in let repo_post_analyses_all_trees = [] in let repo_post_analyses_all_trees = if only_simu then [] else repo_post_analyses_all_trees in let repo = repo_of_post_analyses_simu @ all_repo_per_tree_simu @ repo_post_analyses_all_trees |> Repo.shift profile_prefix in object method repo = repo method dataset_results = all_dataset_results end let time_logger = Time_logger.create () let logger = Logger.tee [ Console_logger.create () ; Dot_output.create "dag.dot" ; (*dot -Tpdf example/dag.dot -o dag.pdf*) Bistro_utils.Html_logger.create "report.html" ; time_logger#logger ; ] let detection_main ~outdir ~indir ?(np = 2) ?(mem = 2) ~preview ~fast_mode ?(seed = Random.int Int.max_value) () = let dataset_l = parse_input_data ~seed indir in let dataset_results_l = derive_det ~dataset_l ~preview ~fast_mode in let repo = repo_of_dataset_results_l ~dataset_results_l in Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo let simulation_main ~outdir ?(ns = 0) ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn ~preview ~use_concat ~no_Ne ~no_HaPC ~add_indels ?(seed = Random.int Int.max_value) () = let nb_sites = if ns = 0 then (if preview then 20 else 50) else ns in let profile = Profile.profile_l_of_splitted_profile ~nb_cat:All ~nb_sites profile_fn ~seed:(Random.int Int.max_value) in let trees = Array.to_list @@ Sys.readdir tree_dir in let dataset_l = derive_sim ~tree_dir ~trees ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test:false ~add_indels ~seed in let repo = Dataset.repo dataset_l ~preview in Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~preview ~fast_mode ~no_Ne ~ne_test ~no_HaPC ~tree_dir ~profile_fn ~use_concat ~add_indels ~only_simu ?(seed = Random.int Int.max_value) () = printf "Global seed: %i\n" seed; Out_channel.write_all "global.seed" ~data:(string_of_int seed); (* simulated trees *) Random.init seed ; let nb_sites = if ns = 0 then (if preview then 20 else 50) else ns in let profile = Profile.profile_l_of_splitted_profile ~nb_cat:Unif_3 ~nb_sites profile_fn ~seed:(Random.int Int.max_value) in let sim_repo_l = derive_profile ~indir ~ns ~preview ~fast_mode ~no_Ne ~ne_test ~no_HaPC ~tree_dir ~profile ~use_concat ~add_indels ~only_simu ~seed () in (* real trees *) let indir_dataset_l = if indir = "" then [] else parse_input_data ~seed indir in let dataset_l = indir_dataset_l in let dataset_results_l = if only_simu then [] else derive_det ~dataset_l ~preview ~fast_mode in let repo_real_trees = [ Dataset.repo dataset_l ~preview ; repo_of_dataset_results_l ~dataset_results_l ; ] |> List.concat in let repo = (Repo.shift "Simulated_datasets" sim_repo_l#repo) @ (Repo.shift "Real_datasets" repo_real_trees) in Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo ; time_logger#report sim_repo_l#dataset_results (Filename.concat outdir ("elapsed_time_" ^ (Unix.time () |> int_of_float |> string_of_int) ^ ".tsv")) let simulation_command = let open Command.Let_syntax in Command.basic ~summary:"Run simulation pipeline" [%map_open let outdir = flag "--outdir" (required string) ~doc:"PATH Output directory" and preview = flag "--preview-mode" no_arg ~doc:" Preview mode" and no_Ne = flag "--no-ne" no_arg ~doc:" mode without hypothesis including different Ne" and no_HaPC = flag "--no-hapc" no_arg ~doc:" mode without ~HaPC hypothesis" and ns = flag "--ns" (optional int) ~doc:"INT Number of sites to simulate" and np = flag "--np" (optional int) ~doc:"INT Number of available processors" and mem = flag "--mem" (optional int) ~doc:"INT Available memory (in GB)" and use_concat = flag "--use-concat" no_arg ~doc:" Use concatenation H0+Ha_pcoc" and add_indels = flag "--add-indels" no_arg ~doc:" add indels in H*NeG5" and tree_dir = flag "--tree-dir" (required string) ~doc:"PATH Path to tree directory" and profile_fn = flag "--profile-fn" (required string) ~doc:"PATH Path to profile file" and seed = flag "--seed" (optional int) ~doc:"INT Global seed" in simulation_main ~outdir ?ns ?np ?mem ~no_Ne ~no_HaPC ~tree_dir ~profile_fn ~preview ~use_concat ~add_indels ?seed ] let detection_command = let open Command.Let_syntax in Command.basic ~summary:"Run simulation pipeline" [%map_open let outdir = flag "--outdir" (required string) ~doc:"PATH Output directory" and indir = flag "--indir" (required string) ~doc:"PATH Input directory" and preview = flag "--preview-mode" no_arg ~doc:" Preview mode" and fast_mode = flag "--fast" no_arg ~doc:" 'Fast' mode without the most costly methods" and np = flag "--np" (optional int) ~doc:"INT Number of available processors" and mem = flag "--mem" (optional int) ~doc:"INT Available memory (in GB)" and seed = flag "--seed" (optional int) ~doc:"INT Global seed" in detection_main ~outdir ~indir ?np ?mem ~preview ~fast_mode ?seed ] let validation_command = let _ = Random.self_init() in let open Command.Let_syntax in Command.basic ~summary:"Run simulation pipeline" [%map_open let outdir = flag "--outdir" (required string) ~doc:"PATH Output directory" and indir = flag "--indir" (optional string) ~doc:"PATH Input directory" and preview = flag "--preview-mode" no_arg ~doc:" Preview mode" and fast_mode = flag "--fast" no_arg ~doc:" 'Fast' mode without the most costly methods" and ne_test = flag "--ne-test" no_arg ~doc:" mode with hypothesis in test including different Ne" and no_Ne = flag "--no-ne" no_arg ~doc:" mode without hypothesis including different Ne" and no_HaPC = flag "--no-hapc" no_arg ~doc:" mode without ~HaPC hypothesis" and only_simu = flag "--only-simu" no_arg ~doc:" mode only simulation" and use_concat = flag "--use-concat" no_arg ~doc:" Use concatenation H0+Ha_pcoc" and add_indels = flag "--add-indels" no_arg ~doc:" add indels in H*NeG5" and ns = flag "--ns" (optional int) ~doc:"INT Number of sites to simulate (WARNING: will be multiplicated per 3)" and np = flag "--np" (optional int) ~doc:"INT Number of available processors" and mem = flag "--mem" (optional int) ~doc:"INT Available memory (in GB)" and tree_dir = flag "--tree-dir" (required string) ~doc:"PATH Path to tree directory" and profile_fn = flag "--profile-fn" (required string) ~doc:"PATH Path to profile file" and seed = flag "--seed" (optional int) ~doc:"INT Global seed" in validation_main ~outdir ?indir ?ns ?np ?mem ~preview ~fast_mode ~no_Ne ~ne_test ~no_HaPC ~tree_dir ~profile_fn ~use_concat ~only_simu ~add_indels ?seed ]