pipeline.ml 22.4 KB
Newer Older
Philippe Veber's avatar
Philippe Veber committed
1
open Core
Philippe Veber's avatar
Philippe Veber committed
2
open Bistro_utils
LANORE Vincent's avatar
LANORE Vincent committed
3
open Bistro.EDSL
Carine Rey's avatar
typing  
Carine Rey committed
4
open Bistro.Std
5
open File_formats
Philippe Veber's avatar
Philippe Veber committed
6
open Defs
Carine Rey's avatar
Carine Rey committed
7
open Convergence_detection
8
open Profile
9

Carine Rey's avatar
Carine Rey committed
10
let parse_input_data ~seed indir =
11 12
  let datasets = Array.to_list @@ Sys.readdir indir in
  List.map datasets ~f:(fun dataset_prefix ->
LANORE Vincent's avatar
LANORE Vincent committed
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
      let files = Array.to_list @@ Sys.readdir (Filename.concat indir dataset_prefix ) in
      if List.length files = 2 then
        let h_file = List.hd files in
        let h_file = match h_file with
          | Some s -> s
          | None -> ""
        in
        let h_file_ext = Filename.split_extension h_file in
        let t_file = List.nth files 1 in
        let t_file = match t_file with
          | Some s -> s
          | None -> ""
        in
        let t_file_ext = Filename.split_extension t_file in
        let fna, input_tree = match (h_file_ext, t_file_ext) with
          | ( _ , Some "fna") ,  ( _ , Some "nhx") -> h_file, t_file
          | ( _ , Some "nhx"), ( _ , Some "fna") -> t_file, h_file
          | _ -> failwith ({|Syntax error: extension errors in |} ^ (Filename.concat indir dataset_prefix ) ^ " nhx: " ^ (h_file) ^ " fna: " ^ (t_file))
        in
        let tree_prefix = Filename.chop_extension input_tree in
        let input_tree = input (Filename.concat indir (Filename.concat dataset_prefix input_tree)) in
        let fna = input (Filename.concat indir (Filename.concat dataset_prefix fna)) in
        let fna_infos = None in
36
        let raw_dataset = Raw_dataset.{input_tree; fna; fna_infos} in
LANORE Vincent's avatar
LANORE Vincent committed
37 38 39
        let dataset = {Dataset.model_prefix = tree_prefix;
                       is_real = true;
                       tree_prefix = dataset_prefix;
Carine Rey's avatar
Carine Rey committed
40 41
                       dataset = Ready_dataset.of_raw ~descr:("real_data." ^ tree_prefix) raw_dataset;
                       seed;
LANORE Vincent's avatar
LANORE Vincent committed
42 43 44
                      } in
        [dataset]
      else
45
        failwith ({|More than 2 files in |} ^ (Filename.concat indir dataset_prefix ))
46 47 48
    )
  |> List.concat

Carine Rey's avatar
Carine Rey committed
49
let calc_fixed_seed ~(str:string) (seed:int) : int =
50 51
  let str_hash = Hashtbl.hash str in
  Hashtbl.hash (str_hash + seed)
Carine Rey's avatar
Carine Rey committed
52 53

let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed =
54
  let model_prefix = Convergence_hypothesis.string_of_model model in
55
  let nb_sites = ns in
56
  let nodes = Tree_dataset.nodes tree_dataset model in
Philippe Veber's avatar
Philippe Veber committed
57
  let tree = Tree_dataset.tree tree_dataset `Simulation in
58
  let descr = "."^model_prefix^"."^tree_prefix in
Carine Rey's avatar
Carine Rey committed
59
  (* only 1 profile or 1 couple of profiles*)
LANORE Vincent's avatar
LANORE Vincent committed
60
  (*let config = Convergence_hypothesis.bpp_config nodes model in
Carine Rey's avatar
Carine Rey committed
61
    let fna = Bppsuite.bppseqgen ~descr ~nb_sites ~tree ~config in
LANORE Vincent's avatar
LANORE Vincent committed
62
  *)
Carine Rey's avatar
Carine Rey committed
63 64
  (* with several profiles or couples of profiles *)
  let config_p = Convergence_hypothesis.bpp_config_F nodes model in
Carine Rey's avatar
Carine Rey committed
65
  let ne_g = match model with
66 67 68 69 70 71 72 73 74 75 76 77 78 79
    | H0_NeG1  -> 1.
    | H0_NeG2  -> 2.
    | H0_NeG3  -> 3.
    | H0_NeG4  -> 4.
    | H0_NeG5  -> 5.
    | HaPC_NeG1-> 1.
    | HaPC_NeG2-> 2.
    | HaPC_NeG3-> 3.
    | HaPC_NeG4-> 4.
    | HaPC_NeG5-> 5.
    | HaPC_NeG5_NeC_div2  -> 5.
    | HaPC_NeG5_NeC_x2    -> 5.
    | H0_NeG5_NeC_div2    -> 5.
    | H0_NeG5_NeC_x2      -> 5.
Carine Rey's avatar
Carine Rey committed
80 81 82 83
    | HaPC_NeG1_NeC_5 -> 1.
    | HaPC_NeG5_NeC_1 -> 5.
    | H0_NeG1_NeC_5   -> 1.
    | H0_NeG5_NeC_1   -> 5.
84
    | _ -> 1.
Carine Rey's avatar
Carine Rey committed
85
  in
86
  let ne_c = match model with
Carine Rey's avatar
Carine Rey committed
87 88 89 90 91 92 93 94 95 96
    | H0_NeG1  -> ne_g
    | H0_NeG2  -> ne_g
    | H0_NeG3  -> ne_g
    | H0_NeG4  -> ne_g
    | H0_NeG5  -> ne_g
    | HaPC_NeG1-> ne_g
    | HaPC_NeG2-> ne_g
    | HaPC_NeG3-> ne_g
    | HaPC_NeG4-> ne_g
    | HaPC_NeG5-> ne_g
97 98 99 100
    | HaPC_NeG5_NeC_div2 ->  ne_g /. 2.
    | HaPC_NeG5_NeC_x2   ->  ne_g *. 2.
    | H0_NeG5_NeC_div2   ->  ne_g /. 2.
    | H0_NeG5_NeC_x2     ->  ne_g *. 2.
Carine Rey's avatar
Carine Rey committed
101 102 103 104
    | HaPC_NeG1_NeC_5 -> 5.
    | HaPC_NeG5_NeC_1 -> 1.
    | H0_NeG1_NeC_5   -> 5.
    | H0_NeG5_NeC_1   -> 1.
105
    | _ ->  ne_g
106
  in
107
  let ne_a = match model with
Carine Rey's avatar
Carine Rey committed
108 109 110 111 112 113 114 115 116 117
    | H0_NeG1  -> ne_g
    | H0_NeG2  -> ne_g
    | H0_NeG3  -> ne_g
    | H0_NeG4  -> ne_g
    | H0_NeG5  -> ne_g
    | HaPC_NeG1-> ne_g
    | HaPC_NeG2-> ne_g
    | HaPC_NeG3-> ne_g
    | HaPC_NeG4-> ne_g
    | HaPC_NeG5-> ne_g
118 119 120 121
    | HaPC_NeG5_NeC_div2  -> ne_g
    | HaPC_NeG5_NeC_x2    -> ne_g
    | H0_NeG5_NeC_div2    -> ne_g
    | H0_NeG5_NeC_x2      -> ne_g
Carine Rey's avatar
Carine Rey committed
122 123 124 125
    | HaPC_NeG1_NeC_5     -> ne_g
    | HaPC_NeG5_NeC_1     -> ne_g
    | H0_NeG1_NeC_5       -> ne_g
    | H0_NeG5_NeC_1       -> ne_g
126
    | _ -> ne_g
127
  in
128 129
  let profile_f = profile.profile_f in
  let profile_c = profile.profile_c in
Carine Rey's avatar
Carine Rey committed
130 131
  (*let seed = Random.int Int.max_value in*)
  let seed = calc_fixed_seed ~str:descr seed in
Carine Rey's avatar
Carine Rey committed
132
  let run_fna = Bppsuite.bppseqgen_multi_profiles ~descr ~nb_sites ~tree ~config:config_p ~profile_f ~profile_c ~ne_c ~ne_a ~seed in
Carine Rey's avatar
Carine Rey committed
133 134
  let fna = Bppsuite.bppseqgen_multi_profiles_get_fa run_fna in
  let fna_infos = Some (Bppsuite.bppseqgen_multi_profiles_get_info run_fna) in
Carine Rey's avatar
Carine Rey committed
135

136
  let faa = Bppsuite.fna2faa ~fna in
137
  let ready_dataset = { Ready_dataset.input_tree = input_tree ; tree_dataset ; fna; faa; fna_infos } in
Carine Rey's avatar
Carine Rey committed
138
  { Dataset.model_prefix; is_real= false; tree_prefix; dataset = ready_dataset; seed }
Carine Rey's avatar
Carine Rey committed
139

140
let derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test ~add_indels ~seed =
Carine Rey's avatar
Carine Rey committed
141
  let tree_prefix = Filename.chop_extension tree in
142
  let input_tree = input (Filename.concat tree_dir tree) in
143
  let tree_dataset = Tree_dataset.prepare ~descr:("simulated_data." ^ tree_prefix) input_tree in
144 145 146
  let dataset_H0_NeG5   = derive_from_model ~model:H0_NeG5   ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in
  let dataset_HaPCOC    = derive_from_model ~model:HaPCOC    ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in
  let dataset_HaPC_NeG5 = derive_from_model ~model:HaPC_NeG5 ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in
147 148
  let indel_H0_NeG5 = Dataset.add_indels_to_dataset dataset_H0_NeG5 ~seed:(Random.int Int.max_value) in
  let indel_HaPC_NeG5 = Dataset.add_indels_to_dataset dataset_HaPC_NeG5 ~seed:(Random.int Int.max_value) in
149
  let dataset_basis_hyps = [dataset_H0_NeG5; dataset_HaPCOC; dataset_HaPC_NeG5] in
Carine Rey's avatar
Carine Rey committed
150 151
  let models = Convergence_hypothesis.[
      [
Carine Rey's avatar
Carine Rey committed
152
        H0_NeG1  ;
153
        HaPC_NeG1;
154
        (*H0_NeG5  ;
155 156
          HaPC_NeG5;
          HaPCOC   ; calculated above in dataset_* *)
Carine Rey's avatar
Carine Rey committed
157
      ];
Carine Rey's avatar
Carine Rey committed
158 159
      if preview then
        []
Carine Rey's avatar
Carine Rey committed
160
      else
161
        [
162 163 164 165 166 167 168 169 170 171 172 173 174
          [H0_NeG3  ;
           H0_NeG4  ;
           HaPC_NeG3;
           HaPC_NeG4;
          ]
          ;
          (if no_Ne then
             []
           else
             [HaPC_NeG5_NeC_div2  ;
              HaPC_NeG5_NeC_x2    ;
              H0_NeG5_NeC_div2    ;
              H0_NeG5_NeC_x2      ;
Carine Rey's avatar
Carine Rey committed
175 176 177 178
              HaPC_NeG1_NeC_5     ;
              HaPC_NeG5_NeC_1     ;
              H0_NeG1_NeC_5       ;
              H0_NeG5_NeC_1       ;
179 180 181 182 183 184 185 186
             ]
          );
          (if ne_test then
             [
             ]
           else
             []
          )
Carine Rey's avatar
Carine Rey committed
187
        ] |> List.concat
Carine Rey's avatar
Carine Rey committed
188
    ] |> List.concat
Carine Rey's avatar
Carine Rey committed
189
  in
Carine Rey's avatar
Carine Rey committed
190
  let dataset_per_hypo = List.map models ~f:(fun model ->
Carine Rey's avatar
Carine Rey committed
191
      derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed
LANORE Vincent's avatar
LANORE Vincent committed
192
    ) in
Carine Rey's avatar
Carine Rey committed
193

194 195
  let _concat_H0HaPCOC = {Dataset.model_prefix="H0_NeG5+HaPCOC"; tree_prefix; is_real = false; dataset = Ready_dataset.paste dataset_H0_NeG5.dataset dataset_HaPCOC.dataset; seed=(dataset_H0_NeG5.seed + dataset_HaPCOC.seed |> Hashtbl.hash) } in
  let concat_H0HaPC = {Dataset.model_prefix="H0_NeG5+HaPC_NeG5"; tree_prefix; is_real = false; dataset = Ready_dataset.paste dataset_H0_NeG5.dataset dataset_HaPC_NeG5.dataset; seed=(dataset_H0_NeG5.seed + dataset_HaPC_NeG5.seed |> Hashtbl.hash) } in
196
  let dataset_concat_hypos = if use_concat then [concat_H0HaPC;] else [] in
197 198
  let dataset_with_indels = if add_indels then [indel_H0_NeG5 ; indel_HaPC_NeG5] else [] in
  List.concat [ dataset_basis_hyps; dataset_per_hypo ; dataset_concat_hypos; dataset_with_indels ]
199

200
let derive_sim ~tree_dir ~trees ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test ~add_indels ~seed =
LANORE Vincent's avatar
LANORE Vincent committed
201
  List.map trees ~f:(fun tree ->
202
      derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~add_indels ~ns ~no_Ne ~no_HaPC ~ne_test ~seed)
LANORE Vincent's avatar
LANORE Vincent committed
203 204
  |> List.concat

Carine Rey's avatar
Carine Rey committed
205

Philippe Veber's avatar
Philippe Veber committed
206 207 208
let repo_of_detection_result res =
  let det_meth_prefix = Convergence_detection.meth_string_of_result res in
  Repo.[
209
    [ match res with
Philippe Veber's avatar
Philippe Veber committed
210 211
      | `Pcoc w -> item ["pcoc.results.tsv"] (Pcoc.results w)
      | `Pcoc_gamma w -> item ["pcoc_gamma.results.tsv"] (Pcoc.results w)
Carine Rey's avatar
Carine Rey committed
212
      | `Pcoc_C60 w -> item ["pcoc_C60.results.tsv"] (Pcoc.results w)
Philippe Veber's avatar
Philippe Veber committed
213
      | `Diffsel w -> item ["diffsel.results.tsv"] (Diffsel.selector w)
Carine Rey's avatar
Carine Rey committed
214 215
      | `Identical_LG w -> item ["Identical_LG.results.tsv"] (Identical.results w)
      | `Identical_WAG w -> item ["Identical_WAG.results.tsv"] (Identical.results w)
216 217 218
      | `Topological_LG w -> item ["Topological_LG.results.tsv"] (Topological.results w)
      | `Topological_WAG w -> item ["Topological_WAG.results.tsv"] (Topological.results w)
      | `Tdg09 w -> item ["Tdg09.results.tsv"] (Tamuri.results w)
219
      | `Multinomial w -> item ["Multinomial.results.tsv"] (Multinomial.results w)
220
      | `Msd (w, e) -> item [sprintf "Msd.%f.results.tsv" e] (Msd.results w)
221 222
    ] ;
    [
Philippe Veber's avatar
Philippe Veber committed
223 224 225
      match res with
      | `Pcoc w -> item ["raw_results"]  w
      | `Pcoc_gamma w -> item ["raw_results"] w
Carine Rey's avatar
Carine Rey committed
226
      | `Pcoc_C60 w -> item ["raw_results"] w
Philippe Veber's avatar
Philippe Veber committed
227
      | `Diffsel w -> item ["raw_results"] w
Carine Rey's avatar
Carine Rey committed
228 229
      | `Identical_LG w -> item ["raw_results"] w
      | `Identical_WAG w -> item ["raw_results"] w
230 231 232
      | `Topological_LG w -> item ["raw_results"] w
      | `Topological_WAG w -> item ["raw_results"] w
      | `Tdg09 w -> item ["raw_results"] w
233
      | `Multinomial w -> item ["raw_results"] w
234
      | `Msd (w, _) -> item ["raw_results"] w
235
    ] ;
236 237 238
    match res with
    | `Diffsel w -> [item ["chain_convergence_checking.html"] ((Diffsel.check_conv w) / selector ["out.html"])]
    | _ -> []
239
  ] |> List.concat
Philippe Veber's avatar
Philippe Veber committed
240 241 242
  |> Repo.shift det_meth_prefix
  |> Repo.shift "Detection_tools"

243
let repo_of_dataset_results_l ~dataset_results_l =
Carine Rey's avatar
Carine Rey committed
244
  List.map dataset_results_l ~f:(fun dataset_results ->
LANORE Vincent's avatar
LANORE Vincent committed
245 246 247
      let det_results_l = dataset_results.res_by_tools in
      let merged_results = dataset_results.merged_results in
      let plot_merge_results = dataset_results.plot_merged_results in
248 249
      let model_prefix = dataset_results.model_prefix in
      let tree_prefix = dataset_results.tree_prefix in
Carine Rey's avatar
Carine Rey committed
250
      let merged_results_item = Repo.item [tree_prefix ^"."^model_prefix^".merged_results.tsv"] merged_results in
251
      let plot_merged_results_item = Repo.item [tree_prefix ^"."^model_prefix^".plot_merged_results.svg"] plot_merge_results in
LANORE Vincent's avatar
LANORE Vincent committed
252 253 254 255 256 257 258 259
      let repo =
        merged_results_item ::
        plot_merged_results_item ::
        (List.map det_results_l ~f:repo_of_detection_result |> List.concat)
      in
      repo
      |> Repo.shift dataset_results.model_prefix
    )
Carine Rey's avatar
Carine Rey committed
260 261
  |> List.concat

262 263 264
let derive_from_det_meth ~det_meth ~(dataset : Dataset.t) ~preview =
  let faa = dataset.dataset.faa in
  let fna = dataset.dataset.fna in
Carine Rey's avatar
Carine Rey committed
265
  let phy_n = Bppsuite.fa2phy ~fna in
Carine Rey's avatar
Carine Rey committed
266 267
  let tree_sc = Tree_dataset.tree dataset.dataset.tree_dataset `Detection in
  let tree_id = Tree_dataset.tree dataset.dataset.tree_dataset `Simulation in
268
  let diffsel_tree = Tree_dataset.diffsel_tree dataset.dataset.tree_dataset in
269
  let tree_conv = Tree_dataset.topological_tree dataset.dataset.tree_dataset in
270 271
  let w_every = if preview then 1 else 1 in
  let n_cycles = if preview then 10 else 2000 in
272
  let seed = Hashtbl.hash dataset.seed in
Philippe Veber's avatar
Philippe Veber committed
273
  match det_meth with
LANORE Vincent's avatar
LANORE Vincent committed
274 275
  | `Pcoc -> `Pcoc (Pcoc.pcoc ~catx_est:10 ~plot_complete:true ~gamma:false ~faa ~tree:tree_sc)
  | `Pcoc_gamma -> `Pcoc_gamma (Pcoc.pcoc ~catx_est:10 ~plot_complete: true ~gamma:true ~faa ~tree:tree_sc)
Carine Rey's avatar
Carine Rey committed
276
  | `Pcoc_C60 -> `Pcoc_C60 (Pcoc.pcoc ~catx_est:60 ~plot_complete: true ~gamma:false ~faa ~tree:tree_sc)
LANORE Vincent's avatar
LANORE Vincent committed
277
  | `Tdg09 -> `Tdg09 (Tamuri.tdg09 ~faa ~tree:tree_sc)
278
  | `Diffsel -> `Diffsel (Diffsel.diffsel ~phy_n ~tree:diffsel_tree ~w_every ~n_cycles ~id:1 ~seed ())
LANORE Vincent's avatar
LANORE Vincent committed
279 280 281 282
  | `Identical_LG -> `Identical_LG (Identical.identical ~faa ~tree_id ~tree_sc ~prot_model:"LG08")
  | `Identical_WAG -> `Identical_WAG (Identical.identical ~faa ~tree_id ~tree_sc ~prot_model:"WAG01")
  | `Topological_LG -> `Topological_LG (Topological.topological ~faa ~tree:tree_id ~tree_conv ~prot_model:"LG08")
  | `Topological_WAG -> `Topological_WAG (Topological.topological ~faa ~tree:tree_id ~tree_conv ~prot_model:"WAG01")
283
  | `Multinomial -> `Multinomial (Multinomial.multinomial ~faa ~tree_id ~tree_sc)
284
  | `Msd e -> `Msd (Msd.msd ~e ~faa ~tree_sc, e)
Carine Rey's avatar
Carine Rey committed
285 286


287
let derive_from_dataset ~dataset ~preview ~fast_mode=
Carine Rey's avatar
Carine Rey committed
288
  let det_meths = [
Carine Rey's avatar
Carine Rey committed
289
    ([
Carine Rey's avatar
Carine Rey committed
290 291 292 293
      `Identical_LG;
      `Topological_LG;
      `Multinomial;
      `Pcoc;
Carine Rey's avatar
Carine Rey committed
294
    ] @ List.map [0.05] (fun x -> `Msd x)) ;
LANORE Vincent's avatar
LANORE Vincent committed
295 296 297
    if preview then
      []
    else
Carine Rey's avatar
update  
Carine Rey committed
298
      [ `Tdg09;
299 300 301
        `Pcoc_gamma;
        `Identical_WAG;
        `Topological_WAG;
LANORE Vincent's avatar
LANORE Vincent committed
302 303
      ]
    ;
304
    if fast_mode then
LANORE Vincent's avatar
LANORE Vincent committed
305 306
      []
    else
Carine Rey's avatar
Carine Rey committed
307
      [`Diffsel; `Pcoc_C60]  ;
LANORE Vincent's avatar
LANORE Vincent committed
308 309
  ]
    |> List.concat in
310
  let res_by_tools = List.map det_meths ~f:(fun det_meth ->
LANORE Vincent's avatar
LANORE Vincent committed
311
      derive_from_det_meth ~det_meth ~dataset ~preview
312
    ) in
Carine Rey's avatar
Carine Rey committed
313 314
  let fna_infos = dataset.dataset.fna_infos in
  let merged_results = merge_results ~fna_infos ~res_by_tools () in
Carine Rey's avatar
Carine Rey committed
315
  let tsv = merged_results in
316 317
  let faa = dataset.dataset.faa in
  let tree = Tree_dataset.tree dataset.dataset.tree_dataset `Detection in
Carine Rey's avatar
Carine Rey committed
318
  let plot_all_sites = if dataset.is_real then false else true in
319
  let plot_merged_results = plot_merge_results ~plot_all_sites ~res_by_tools ~tsv ~faa ~tree () in
Carine Rey's avatar
Carine Rey committed
320 321
  let model_prefix = dataset.model_prefix in
  let tree_prefix = dataset.tree_prefix in
Carine Rey's avatar
Carine Rey committed
322
  {model_prefix; tree_prefix; dataset; res_by_tools ; merged_results ; plot_merged_results}
Carine Rey's avatar
Carine Rey committed
323

324
let derive_det ~dataset_l ~preview ~fast_mode =
Carine Rey's avatar
Carine Rey committed
325
  List.map dataset_l ~f:(fun dataset ->
326
      derive_from_dataset ~preview ~dataset ~fast_mode)
Carine Rey's avatar
Carine Rey committed
327

Carine Rey's avatar
Carine Rey committed
328
let derive_profile ?(indir = "") ?(ns = 0) ~preview ~fast_mode ~no_Ne ~ne_test ~no_HaPC ~tree_dir ~profile ~use_concat ~add_indels ~only_simu ~seed () =
LANORE Vincent's avatar
LANORE Vincent committed
329
  let trees = Array.to_list @@ Sys.readdir tree_dir in
Carine Rey's avatar
Carine Rey committed
330
  let repo_and_post_analyses_per_tree_simu = List.map trees ~f:(fun tree -> (*to keep together all models per tree*)
331 332 333
      let trees = [tree] in
      let tree_prefix = Filename.chop_extension tree in
      let dataset_l =
334
        derive_sim ~tree_dir ~trees ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test ~add_indels ~seed in
Carine Rey's avatar
Carine Rey committed
335
      let dataset_results_l =
336
        if only_simu then
Carine Rey's avatar
Carine Rey committed
337 338 339 340
          []
        else
          derive_det ~dataset_l ~preview ~fast_mode
      in
341
      let post_analyses_res = Post_analyses.post_analyses_res_of_dataset_results_l ~tree_prefix ~dataset_results_l in
342 343
      let repo_per_tree = [
        Dataset.repo dataset_l ~preview ;
Carine Rey's avatar
Carine Rey committed
344 345 346 347 348 349
        Repo.shift "Results_per_hypothesis" (repo_of_dataset_results_l ~dataset_results_l);
        Post_analyses.repo_of_post_analyses_res ~prefix:tree_prefix ~post_analyses_res;
      ]
        |> List.concat
        |> Repo.shift tree_prefix
        |> Repo.shift "Results_per_tree"
350
      in
351
      (repo_per_tree, post_analyses_res, dataset_results_l, dataset_l)
Carine Rey's avatar
Carine Rey committed
352
    )
353
  in
354 355 356 357 358 359
  let all_repo_per_tree_simu = List.concat_map repo_and_post_analyses_per_tree_simu ~f:(fun (r, _, _, _) -> r) in
  let all_post_analyses_per_tree = List.map repo_and_post_analyses_per_tree_simu ~f:(fun (_, p, _, _) -> p) in
  let all_dataset_results = List.concat_map repo_and_post_analyses_per_tree_simu ~f:(fun (_, _, dr, _) -> dr) in
  let simu_dataset_l = List.concat_map repo_and_post_analyses_per_tree_simu ~f:(fun (_, _, _, d) -> d) in
  let post_analyses_simu = Post_analyses.post_analyses_simu_of_simu_dataset_l ~simu_dataset_l in
  let repo_of_post_analyses_simu = Post_analyses.repo_of_post_analyses_simu ~post_analyses_simu in
360 361
  let profile_prefix = profile.profile_n in
  let repo_post_analyses_all_trees = Post_analyses.repo_post_analyses_all_trees_of_all_post_analyses_per_tree ~all_post_analyses_per_tree ~profile_prefix in
362 363 364 365 366
  let repo_post_analyses_all_trees = if only_simu then
      []
    else
      repo_post_analyses_all_trees
  in
367 368 369 370 371 372 373 374
  let repo =
    repo_of_post_analyses_simu @ all_repo_per_tree_simu @ repo_post_analyses_all_trees
    |> Repo.shift profile_prefix
  in
  object
    method repo = repo
    method dataset_results = all_dataset_results
  end
375

376
let time_logger = Time_logger.create ()
377 378 379 380 381 382

let logger =
  Logger.tee [
    Console_logger.create () ;
    Dot_output.create "dag.dot" ; (*dot -Tpdf example/dag.dot -o dag.pdf*)
    Bistro_utils.Html_logger.create "report.html" ;
383
    time_logger#logger ;
384 385
  ]

Carine Rey's avatar
Carine Rey committed
386 387
let detection_main ~outdir ~indir ?(np = 2) ?(mem = 2) ~preview ~fast_mode ?(seed = Random.int Int.max_value) () =
  let dataset_l = parse_input_data ~seed indir in
388 389 390 391
  let dataset_results_l = derive_det ~dataset_l ~preview ~fast_mode in
  let repo = repo_of_dataset_results_l ~dataset_results_l in
  Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo

392
let simulation_main ~outdir ?(ns = 0) ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn ~preview ~use_concat ~no_Ne ~no_HaPC ~add_indels ?(seed = Random.int Int.max_value) () =
393
  let nb_sites = if ns = 0 then (if preview then 20 else 50) else ns in
Carine Rey's avatar
Carine Rey committed
394
  let profile = Profile.profile_l_of_splitted_profile ~nb_cat:All ~nb_sites profile_fn ~seed:(Random.int Int.max_value) in
395
  let trees = Array.to_list @@ Sys.readdir tree_dir in
396 397
  let dataset_l = derive_sim ~tree_dir ~trees ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test:false ~add_indels ~seed in

398 399 400
  let repo = Dataset.repo dataset_l ~preview in
  Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo

Carine Rey's avatar
Carine Rey committed
401
let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~preview ~fast_mode ~no_Ne ~ne_test ~no_HaPC ~tree_dir ~profile_fn ~use_concat ~add_indels ~only_simu ?(seed = Random.int Int.max_value) () =
Carine Rey's avatar
Carine Rey committed
402
  printf "Global seed: %i\n" seed;
403
  Out_channel.write_all "global.seed" ~data:(string_of_int seed);
404
  (* simulated trees *)
Philippe Veber's avatar
Philippe Veber committed
405
  Random.init seed ;
406
  let nb_sites = if ns = 0 then (if preview then 20 else 50) else ns in
Carine Rey's avatar
Carine Rey committed
407
  let profile = Profile.profile_l_of_splitted_profile ~nb_cat:Dist ~nb_sites profile_fn ~seed:(Random.int Int.max_value) in
Carine Rey's avatar
Carine Rey committed
408
  let sim_repo_l = derive_profile ~indir ~ns ~preview ~fast_mode ~no_Ne ~ne_test ~no_HaPC ~tree_dir ~profile ~use_concat ~add_indels ~only_simu ~seed () in
409
  (* real trees *)
Carine Rey's avatar
Carine Rey committed
410
  let indir_dataset_l = if indir = "" then [] else parse_input_data ~seed indir in
411 412
  let dataset_l = indir_dataset_l in
  let dataset_results_l =
413
    if only_simu then
414 415 416 417 418 419 420 421
      []
    else
      derive_det ~dataset_l ~preview ~fast_mode
  in
  let repo_real_trees = [
    Dataset.repo dataset_l ~preview ;
    repo_of_dataset_results_l ~dataset_results_l ;
  ] |> List.concat
Carine Rey's avatar
Carine Rey committed
422
  in
423 424
  let repo = (Repo.shift "Simulated_datasets" sim_repo_l#repo) @ (Repo.shift "Real_datasets" repo_real_trees) in
  Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo ;
425
  time_logger#report sim_repo_l#dataset_results (Filename.concat outdir ("elapsed_time_" ^ (Unix.time () |> int_of_float |> string_of_int) ^ ".tsv"))
Philippe Veber's avatar
Philippe Veber committed
426

LANORE Vincent's avatar
LANORE Vincent committed
427
let simulation_command =
Philippe Veber's avatar
Philippe Veber committed
428 429 430 431 432
  let open Command.Let_syntax in
  Command.basic
    ~summary:"Run simulation pipeline"
    [%map_open
      let outdir =
LANORE Vincent's avatar
LANORE Vincent committed
433 434 435
        flag "--outdir" (required string) ~doc:"PATH Output directory"
      and preview =
        flag "--preview-mode" no_arg ~doc:" Preview mode"
Carine Rey's avatar
Carine Rey committed
436 437
      and no_Ne =
        flag "--no-ne" no_arg ~doc:" mode without hypothesis including different Ne"
Carine Rey's avatar
Carine Rey committed
438 439
      and no_HaPC =
        flag "--no-hapc" no_arg ~doc:" mode without ~HaPC hypothesis"
440 441
      and ns =
        flag "--ns" (optional int) ~doc:"INT Number of sites to simulate"
Philippe Veber's avatar
Philippe Veber committed
442 443 444 445
      and np =
        flag "--np" (optional int) ~doc:"INT Number of available processors"
      and mem =
        flag "--mem" (optional int) ~doc:"INT Available memory (in GB)"
446 447
      and use_concat =
        flag "--use-concat" no_arg ~doc:" Use concatenation H0+Ha_pcoc"
Carine Rey's avatar
Carine Rey committed
448 449
      and add_indels =
        flag "--add-indels" no_arg ~doc:" add indels in H*NeG5"
LANORE Vincent's avatar
LANORE Vincent committed
450 451
      and tree_dir =
        flag "--tree-dir" (required string) ~doc:"PATH Path to tree directory"
452 453
      and profile_fn =
        flag "--profile-fn" (required string) ~doc:"PATH Path to profile file"
Philippe Veber's avatar
Philippe Veber committed
454 455
      and seed =
        flag "--seed" (optional int) ~doc:"INT Global seed"
Philippe Veber's avatar
Philippe Veber committed
456
      in
457
      simulation_main ~outdir ?ns ?np ?mem ~no_Ne ~no_HaPC ~tree_dir ~profile_fn ~preview ~use_concat ~add_indels ?seed
LANORE Vincent's avatar
LANORE Vincent committed
458 459 460 461 462 463 464 465 466 467 468 469 470
    ]

let detection_command =
  let open Command.Let_syntax in
  Command.basic
    ~summary:"Run simulation pipeline"
    [%map_open
      let outdir =
        flag "--outdir" (required string) ~doc:"PATH Output directory"
      and indir =
        flag "--indir" (required string) ~doc:"PATH Input directory"
      and preview =
        flag "--preview-mode" no_arg ~doc:" Preview mode"
471 472
      and fast_mode =
        flag "--fast" no_arg ~doc:" 'Fast' mode without the most costly methods"
LANORE Vincent's avatar
LANORE Vincent committed
473 474 475 476
      and np =
        flag "--np" (optional int) ~doc:"INT Number of available processors"
      and mem =
        flag "--mem" (optional int) ~doc:"INT Available memory (in GB)"
Carine Rey's avatar
Carine Rey committed
477 478
      and seed =
        flag "--seed" (optional int) ~doc:"INT Global seed"
LANORE Vincent's avatar
LANORE Vincent committed
479
      in
Carine Rey's avatar
Carine Rey committed
480
      detection_main ~outdir ~indir ?np ?mem ~preview ~fast_mode ?seed
LANORE Vincent's avatar
LANORE Vincent committed
481 482
    ]

Philippe Veber's avatar
Philippe Veber committed
483
let validation_command =
484
  let _ = Random.self_init() in
Philippe Veber's avatar
Philippe Veber committed
485 486 487 488 489 490 491
  let open Command.Let_syntax in
  Command.basic
    ~summary:"Run simulation pipeline"
    [%map_open
      let outdir =
        flag "--outdir" (required string) ~doc:"PATH Output directory"
      and indir =
492
        flag "--indir" (optional string) ~doc:"PATH Input directory"
Philippe Veber's avatar
Philippe Veber committed
493 494
      and preview =
        flag "--preview-mode" no_arg ~doc:" Preview mode"
495 496
      and fast_mode =
        flag "--fast" no_arg ~doc:" 'Fast' mode without the most costly methods"
Carine Rey's avatar
Carine Rey committed
497 498
      and ne_test =
        flag "--ne-test" no_arg ~doc:" mode with hypothesis in test including different Ne"
Carine Rey's avatar
Carine Rey committed
499 500
      and no_Ne =
        flag "--no-ne" no_arg ~doc:" mode without hypothesis including different Ne"
Carine Rey's avatar
Carine Rey committed
501 502
      and no_HaPC =
        flag "--no-hapc" no_arg ~doc:" mode without ~HaPC hypothesis"
Carine Rey's avatar
Carine Rey committed
503 504
      and only_simu =
        flag "--only-simu" no_arg ~doc:" mode only simulation"
505 506
      and use_concat =
        flag "--use-concat" no_arg ~doc:" Use concatenation H0+Ha_pcoc"
Carine Rey's avatar
Carine Rey committed
507 508
      and add_indels =
        flag "--add-indels" no_arg ~doc:" add indels in H*NeG5"
509
      and ns =
510
        flag "--ns" (optional int) ~doc:"INT Number of sites to simulate (WARNING: will be multiplicated per 3)"
Philippe Veber's avatar
Philippe Veber committed
511 512 513 514 515 516 517 518
      and np =
        flag "--np" (optional int) ~doc:"INT Number of available processors"
      and mem =
        flag "--mem" (optional int) ~doc:"INT Available memory (in GB)"
      and tree_dir =
        flag "--tree-dir" (required string) ~doc:"PATH Path to tree directory"
      and profile_fn =
        flag "--profile-fn" (required string) ~doc:"PATH Path to profile file"
Philippe Veber's avatar
Philippe Veber committed
519 520
      and seed =
        flag "--seed" (optional int) ~doc:"INT Global seed"
Philippe Veber's avatar
Philippe Veber committed
521
      in
Carine Rey's avatar
Carine Rey committed
522
      validation_main ~outdir ?indir ?ns ?np ?mem ~preview ~fast_mode ~no_Ne ~ne_test ~no_HaPC ~tree_dir ~profile_fn ~use_concat ~only_simu ~add_indels ?seed
Philippe Veber's avatar
Philippe Veber committed
523
    ]