pipeline.ml 23 KB
Newer Older
Philippe Veber's avatar
Philippe Veber committed
1
open Core
Philippe Veber's avatar
Philippe Veber committed
2
open Bistro_utils
LANORE Vincent's avatar
LANORE Vincent committed
3
open Bistro.EDSL
Carine Rey's avatar
typing  
Carine Rey committed
4
open Bistro.Std
5
open File_formats
Philippe Veber's avatar
Philippe Veber committed
6
open Defs
Carine Rey's avatar
Carine Rey committed
7
open Convergence_detection
8
open Profile
9

Carine Rey's avatar
Carine Rey committed
10
let parse_input_data ~seed indir =
11 12
  let datasets = Array.to_list @@ Sys.readdir indir in
  List.map datasets ~f:(fun dataset_prefix ->
LANORE Vincent's avatar
LANORE Vincent committed
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
      let files = Array.to_list @@ Sys.readdir (Filename.concat indir dataset_prefix ) in
      if List.length files = 2 then
        let h_file = List.hd files in
        let h_file = match h_file with
          | Some s -> s
          | None -> ""
        in
        let h_file_ext = Filename.split_extension h_file in
        let t_file = List.nth files 1 in
        let t_file = match t_file with
          | Some s -> s
          | None -> ""
        in
        let t_file_ext = Filename.split_extension t_file in
        let fna, input_tree = match (h_file_ext, t_file_ext) with
          | ( _ , Some "fna") ,  ( _ , Some "nhx") -> h_file, t_file
          | ( _ , Some "nhx"), ( _ , Some "fna") -> t_file, h_file
          | _ -> failwith ({|Syntax error: extension errors in |} ^ (Filename.concat indir dataset_prefix ) ^ " nhx: " ^ (h_file) ^ " fna: " ^ (t_file))
        in
        let tree_prefix = Filename.chop_extension input_tree in
        let input_tree = input (Filename.concat indir (Filename.concat dataset_prefix input_tree)) in
        let fna = input (Filename.concat indir (Filename.concat dataset_prefix fna)) in
        let fna_infos = None in
36
        let raw_dataset = Raw_dataset.{input_tree; fna; fna_infos} in
LANORE Vincent's avatar
LANORE Vincent committed
37 38 39
        let dataset = {Dataset.model_prefix = tree_prefix;
                       is_real = true;
                       tree_prefix = dataset_prefix;
Carine Rey's avatar
Carine Rey committed
40 41
                       dataset = Ready_dataset.of_raw ~descr:("real_data." ^ tree_prefix) raw_dataset;
                       seed;
LANORE Vincent's avatar
LANORE Vincent committed
42 43 44
                      } in
        [dataset]
      else
45
        failwith ({|More than 2 files in |} ^ (Filename.concat indir dataset_prefix ))
46 47 48
    )
  |> List.concat

Carine Rey's avatar
Carine Rey committed
49
let calc_fixed_seed ~(str:string) (seed:int) : int =
50 51
  let str_hash = Hashtbl.hash str in
  Hashtbl.hash (str_hash + seed)
Carine Rey's avatar
Carine Rey committed
52 53

let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed =
54
  let model_prefix = Convergence_hypothesis.string_of_model model in
55
  let nb_sites = ns in
56
  let nodes = Tree_dataset.nodes tree_dataset model in
Philippe Veber's avatar
Philippe Veber committed
57
  let tree = Tree_dataset.tree tree_dataset `Simulation in
58
  let descr = "."^model_prefix^"."^tree_prefix in
Carine Rey's avatar
Carine Rey committed
59
  (* only 1 profile or 1 couple of profiles*)
LANORE Vincent's avatar
LANORE Vincent committed
60
  (*let config = Convergence_hypothesis.bpp_config nodes model in
Carine Rey's avatar
Carine Rey committed
61
    let fna = Bppsuite.bppseqgen ~descr ~nb_sites ~tree ~config in
LANORE Vincent's avatar
LANORE Vincent committed
62
  *)
Carine Rey's avatar
Carine Rey committed
63 64
  (* with several profiles or couples of profiles *)
  let config_p = Convergence_hypothesis.bpp_config_F nodes model in
Carine Rey's avatar
Carine Rey committed
65
  let ne_g = match model with
66 67 68 69 70
    | H0_NeG1  -> 1.
    | H0_NeG2  -> 2.
    | H0_NeG3  -> 3.
    | H0_NeG4  -> 4.
    | H0_NeG5  -> 5.
Carine Rey's avatar
Carine Rey committed
71
    | HaPCOC   -> 1.
72 73 74 75 76 77 78 79 80
    | HaPC_NeG1-> 1.
    | HaPC_NeG2-> 2.
    | HaPC_NeG3-> 3.
    | HaPC_NeG4-> 4.
    | HaPC_NeG5-> 5.
    | HaPC_NeG5_NeC_div2  -> 5.
    | HaPC_NeG5_NeC_x2    -> 5.
    | H0_NeG5_NeC_div2    -> 5.
    | H0_NeG5_NeC_x2      -> 5.
81 82 83 84
    | HaPC_NeG1_NeC_5 -> 1.
    | HaPC_NeG5_NeC_1 -> 5.
    | H0_NeG1_NeC_5   -> 1.
    | H0_NeG5_NeC_1   -> 5.
85 86 87 88
    | HaPC_NeG1_NeC_4 -> 1.
    | HaPC_NeG4_NeC_1 -> 4.
    | H0_NeG1_NeC_4   -> 1.
    | H0_NeG4_NeC_1   -> 4.
Carine Rey's avatar
Carine Rey committed
89
  in
90
  let ne_c = match model with
Carine Rey's avatar
Carine Rey committed
91 92 93 94 95
    | H0_NeG1  -> ne_g
    | H0_NeG2  -> ne_g
    | H0_NeG3  -> ne_g
    | H0_NeG4  -> ne_g
    | H0_NeG5  -> ne_g
Carine Rey's avatar
Carine Rey committed
96
    | HaPCOC   -> ne_g
Carine Rey's avatar
Carine Rey committed
97 98 99 100 101
    | HaPC_NeG1-> ne_g
    | HaPC_NeG2-> ne_g
    | HaPC_NeG3-> ne_g
    | HaPC_NeG4-> ne_g
    | HaPC_NeG5-> ne_g
102 103 104 105
    | HaPC_NeG5_NeC_div2 ->  ne_g /. 2.
    | HaPC_NeG5_NeC_x2   ->  ne_g *. 2.
    | H0_NeG5_NeC_div2   ->  ne_g /. 2.
    | H0_NeG5_NeC_x2     ->  ne_g *. 2.
106 107 108 109
    | HaPC_NeG1_NeC_5 -> 5.
    | HaPC_NeG5_NeC_1 -> 1.
    | H0_NeG1_NeC_5   -> 5.
    | H0_NeG5_NeC_1   -> 1.
110 111 112 113
    | HaPC_NeG1_NeC_4 -> 4.
    | HaPC_NeG4_NeC_1 -> 1.
    | H0_NeG1_NeC_4   -> 4.
    | H0_NeG4_NeC_1   -> 1.
114
  in
115
  let ne_a = match model with
Carine Rey's avatar
Carine Rey committed
116 117 118 119 120
    | H0_NeG1  -> ne_g
    | H0_NeG2  -> ne_g
    | H0_NeG3  -> ne_g
    | H0_NeG4  -> ne_g
    | H0_NeG5  -> ne_g
Carine Rey's avatar
Carine Rey committed
121
    | HaPCOC   -> ne_g
Carine Rey's avatar
Carine Rey committed
122 123 124 125 126
    | HaPC_NeG1-> ne_g
    | HaPC_NeG2-> ne_g
    | HaPC_NeG3-> ne_g
    | HaPC_NeG4-> ne_g
    | HaPC_NeG5-> ne_g
127 128 129 130
    | HaPC_NeG5_NeC_div2  -> ne_g
    | HaPC_NeG5_NeC_x2    -> ne_g
    | H0_NeG5_NeC_div2    -> ne_g
    | H0_NeG5_NeC_x2      -> ne_g
131 132 133 134
    | HaPC_NeG1_NeC_5     -> ne_g
    | HaPC_NeG5_NeC_1     -> ne_g
    | H0_NeG1_NeC_5       -> ne_g
    | H0_NeG5_NeC_1       -> ne_g
135 136 137 138
    | HaPC_NeG1_NeC_4     -> ne_g
    | HaPC_NeG4_NeC_1     -> ne_g
    | H0_NeG1_NeC_4       -> ne_g
    | H0_NeG4_NeC_1       -> ne_g
139
  in
140 141
  let profile_f = profile.profile_f in
  let profile_c = profile.profile_c in
Carine Rey's avatar
Carine Rey committed
142 143
  (*let seed = Random.int Int.max_value in*)
  let seed = calc_fixed_seed ~str:descr seed in
Carine Rey's avatar
Carine Rey committed
144
  let run_fna = Bppsuite.bppseqgen_multi_profiles ~descr ~nb_sites ~tree ~config:config_p ~profile_f ~profile_c ~ne_c ~ne_a ~seed in
Carine Rey's avatar
Carine Rey committed
145 146
  let fna = Bppsuite.bppseqgen_multi_profiles_get_fa run_fna in
  let fna_infos = Some (Bppsuite.bppseqgen_multi_profiles_get_info run_fna) in
Carine Rey's avatar
Carine Rey committed
147

148
  let faa = Bppsuite.fna2faa ~fna in
149
  let ready_dataset = { Ready_dataset.input_tree = input_tree ; tree_dataset ; fna; faa; fna_infos } in
Carine Rey's avatar
Carine Rey committed
150
  { Dataset.model_prefix; is_real= false; tree_prefix; dataset = ready_dataset; seed }
Carine Rey's avatar
Carine Rey committed
151

152
let derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test ~add_indels ~seed =
Carine Rey's avatar
Carine Rey committed
153
  let tree_prefix = Filename.chop_extension tree in
154
  let input_tree = input (Filename.concat tree_dir tree) in
155
  let tree_dataset = Tree_dataset.prepare ~descr:("simulated_data." ^ tree_prefix) input_tree in
156 157 158
  let dataset_H0_NeG5   = derive_from_model ~model:H0_NeG5   ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in
  let dataset_HaPCOC    = derive_from_model ~model:HaPCOC    ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in
  let dataset_HaPC_NeG5 = derive_from_model ~model:HaPC_NeG5 ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in
159 160
  let indel_H0_NeG5 = Dataset.add_indels_to_dataset dataset_H0_NeG5 ~seed:(Random.int Int.max_value) in
  let indel_HaPC_NeG5 = Dataset.add_indels_to_dataset dataset_HaPC_NeG5 ~seed:(Random.int Int.max_value) in
161
  let dataset_basis_hyps = [dataset_H0_NeG5; dataset_HaPCOC; dataset_HaPC_NeG5] in
Carine Rey's avatar
Carine Rey committed
162 163
  let models = Convergence_hypothesis.[
      [
Carine Rey's avatar
Carine Rey committed
164
        H0_NeG1  ;
165
        HaPC_NeG1;
166
        (*H0_NeG5  ;
167 168
          HaPC_NeG5;
          HaPCOC   ; calculated above in dataset_* *)
Carine Rey's avatar
Carine Rey committed
169
      ];
Carine Rey's avatar
Carine Rey committed
170 171
      if preview then
        []
Carine Rey's avatar
Carine Rey committed
172
      else
173
        [
Carine Rey's avatar
Carine Rey committed
174 175
          [(*H0_NeG3  ;HaPC_NeG3; *)
           H0_NeG4;
176
           HaPC_NeG4;
Carine Rey's avatar
Carine Rey committed
177 178
           H0_NeG2;
           HaPC_NeG2;
179 180 181 182 183
          ]
          ;
          (if no_Ne then
             []
           else
Carine Rey's avatar
Carine Rey committed
184
             [(*HaPC_NeG5_NeC_div2  ;
185 186 187
              HaPC_NeG5_NeC_x2    ;
              H0_NeG5_NeC_div2    ;
              H0_NeG5_NeC_x2      ;
188 189 190
              HaPC_NeG1_NeC_5     ;
              HaPC_NeG5_NeC_1     ;
              H0_NeG1_NeC_5       ;
Carine Rey's avatar
Carine Rey committed
191
              H0_NeG5_NeC_1       ;*)
192 193 194 195
              HaPC_NeG1_NeC_4     ;
              HaPC_NeG4_NeC_1     ;
              H0_NeG1_NeC_4       ;
              H0_NeG4_NeC_1       ;
196 197 198 199 200 201 202 203
             ]
          );
          (if ne_test then
             [
             ]
           else
             []
          )
Carine Rey's avatar
Carine Rey committed
204
        ] |> List.concat
Carine Rey's avatar
Carine Rey committed
205
    ] |> List.concat
Carine Rey's avatar
Carine Rey committed
206
  in
Carine Rey's avatar
Carine Rey committed
207
  let dataset_per_hypo = List.map models ~f:(fun model ->
Carine Rey's avatar
Carine Rey committed
208
      derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed
LANORE Vincent's avatar
LANORE Vincent committed
209
    ) in
Carine Rey's avatar
Carine Rey committed
210

211 212
  let _concat_H0HaPCOC = {Dataset.model_prefix="H0_NeG5+HaPCOC"; tree_prefix; is_real = false; dataset = Ready_dataset.paste dataset_H0_NeG5.dataset dataset_HaPCOC.dataset; seed=(dataset_H0_NeG5.seed + dataset_HaPCOC.seed |> Hashtbl.hash) } in
  let concat_H0HaPC = {Dataset.model_prefix="H0_NeG5+HaPC_NeG5"; tree_prefix; is_real = false; dataset = Ready_dataset.paste dataset_H0_NeG5.dataset dataset_HaPC_NeG5.dataset; seed=(dataset_H0_NeG5.seed + dataset_HaPC_NeG5.seed |> Hashtbl.hash) } in
213
  let dataset_concat_hypos = if use_concat then [concat_H0HaPC;] else [] in
214
  let dataset_with_indels = if add_indels then [indel_H0_NeG5 ; indel_HaPC_NeG5] else [] in
Carine Rey's avatar
Carine Rey committed
215
  List.concat [ (*dataset_basis_hyps;*) dataset_per_hypo ; dataset_concat_hypos; dataset_with_indels ]
216

217
let derive_sim ~tree_dir ~trees ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test ~add_indels ~seed =
LANORE Vincent's avatar
LANORE Vincent committed
218
  List.map trees ~f:(fun tree ->
219
      derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~add_indels ~ns ~no_Ne ~no_HaPC ~ne_test ~seed)
LANORE Vincent's avatar
LANORE Vincent committed
220 221
  |> List.concat

Carine Rey's avatar
Carine Rey committed
222

Philippe Veber's avatar
Philippe Veber committed
223 224 225
let repo_of_detection_result res =
  let det_meth_prefix = Convergence_detection.meth_string_of_result res in
  Repo.[
226
    [ match res with
Philippe Veber's avatar
Philippe Veber committed
227 228
      | `Pcoc w -> item ["pcoc.results.tsv"] (Pcoc.results w)
      | `Pcoc_gamma w -> item ["pcoc_gamma.results.tsv"] (Pcoc.results w)
Carine Rey's avatar
Carine Rey committed
229
      | `Pcoc_C60 w -> item ["pcoc_C60.results.tsv"] (Pcoc.results w)
Philippe Veber's avatar
Philippe Veber committed
230
      | `Diffsel w -> item ["diffsel.results.tsv"] (Diffsel.selector w)
Carine Rey's avatar
Carine Rey committed
231 232
      | `Identical_LG w -> item ["Identical_LG.results.tsv"] (Identical.results w)
      | `Identical_WAG w -> item ["Identical_WAG.results.tsv"] (Identical.results w)
233 234 235
      | `Topological_LG w -> item ["Topological_LG.results.tsv"] (Topological.results w)
      | `Topological_WAG w -> item ["Topological_WAG.results.tsv"] (Topological.results w)
      | `Tdg09 w -> item ["Tdg09.results.tsv"] (Tamuri.results w)
236
      | `Multinomial w -> item ["Multinomial.results.tsv"] (Multinomial.results w)
237
      | `Msd (w, e) -> item [sprintf "Msd.%f.results.tsv" e] (Msd.results w)
238 239
    ] ;
    [
Philippe Veber's avatar
Philippe Veber committed
240 241 242
      match res with
      | `Pcoc w -> item ["raw_results"]  w
      | `Pcoc_gamma w -> item ["raw_results"] w
Carine Rey's avatar
Carine Rey committed
243
      | `Pcoc_C60 w -> item ["raw_results"] w
Philippe Veber's avatar
Philippe Veber committed
244
      | `Diffsel w -> item ["raw_results"] w
Carine Rey's avatar
Carine Rey committed
245 246
      | `Identical_LG w -> item ["raw_results"] w
      | `Identical_WAG w -> item ["raw_results"] w
247 248 249
      | `Topological_LG w -> item ["raw_results"] w
      | `Topological_WAG w -> item ["raw_results"] w
      | `Tdg09 w -> item ["raw_results"] w
250
      | `Multinomial w -> item ["raw_results"] w
251
      | `Msd (w, _) -> item ["raw_results"] w
252
    ] ;
253 254 255
    match res with
    | `Diffsel w -> [item ["chain_convergence_checking.html"] ((Diffsel.check_conv w) / selector ["out.html"])]
    | _ -> []
256
  ] |> List.concat
Philippe Veber's avatar
Philippe Veber committed
257 258 259
  |> Repo.shift det_meth_prefix
  |> Repo.shift "Detection_tools"

260
let repo_of_dataset_results_l ~dataset_results_l =
Carine Rey's avatar
Carine Rey committed
261
  List.map dataset_results_l ~f:(fun dataset_results ->
LANORE Vincent's avatar
LANORE Vincent committed
262 263 264
      let det_results_l = dataset_results.res_by_tools in
      let merged_results = dataset_results.merged_results in
      let plot_merge_results = dataset_results.plot_merged_results in
265 266
      let model_prefix = dataset_results.model_prefix in
      let tree_prefix = dataset_results.tree_prefix in
Carine Rey's avatar
Carine Rey committed
267
      let merged_results_item = Repo.item [tree_prefix ^"."^model_prefix^".merged_results.tsv"] merged_results in
268
      let plot_merged_results_item = Repo.item [tree_prefix ^"."^model_prefix^".plot_merged_results.svg"] plot_merge_results in
LANORE Vincent's avatar
LANORE Vincent committed
269 270 271 272 273 274 275 276
      let repo =
        merged_results_item ::
        plot_merged_results_item ::
        (List.map det_results_l ~f:repo_of_detection_result |> List.concat)
      in
      repo
      |> Repo.shift dataset_results.model_prefix
    )
Carine Rey's avatar
Carine Rey committed
277 278
  |> List.concat

279 280 281
let derive_from_det_meth ~det_meth ~(dataset : Dataset.t) ~preview =
  let faa = dataset.dataset.faa in
  let fna = dataset.dataset.fna in
Carine Rey's avatar
Carine Rey committed
282
  let phy_n = Bppsuite.fa2phy ~fna in
Carine Rey's avatar
Carine Rey committed
283 284
  let tree_sc = Tree_dataset.tree dataset.dataset.tree_dataset `Detection in
  let tree_id = Tree_dataset.tree dataset.dataset.tree_dataset `Simulation in
285
  let diffsel_tree = Tree_dataset.diffsel_tree dataset.dataset.tree_dataset in
286
  let tree_conv = Tree_dataset.topological_tree dataset.dataset.tree_dataset in
287 288
  let w_every = if preview then 1 else 1 in
  let n_cycles = if preview then 10 else 2000 in
289
  let seed = Hashtbl.hash dataset.seed in
Philippe Veber's avatar
Philippe Veber committed
290
  match det_meth with
LANORE Vincent's avatar
LANORE Vincent committed
291 292
  | `Pcoc -> `Pcoc (Pcoc.pcoc ~catx_est:10 ~plot_complete:true ~gamma:false ~faa ~tree:tree_sc)
  | `Pcoc_gamma -> `Pcoc_gamma (Pcoc.pcoc ~catx_est:10 ~plot_complete: true ~gamma:true ~faa ~tree:tree_sc)
Carine Rey's avatar
Carine Rey committed
293
  | `Pcoc_C60 -> `Pcoc_C60 (Pcoc.pcoc ~catx_est:60 ~plot_complete: true ~gamma:false ~faa ~tree:tree_sc)
LANORE Vincent's avatar
LANORE Vincent committed
294
  | `Tdg09 -> `Tdg09 (Tamuri.tdg09 ~faa ~tree:tree_sc)
295
  | `Diffsel -> `Diffsel (Diffsel.diffsel ~phy_n ~tree:diffsel_tree ~w_every ~n_cycles ~id:1 ~seed ())
LANORE Vincent's avatar
LANORE Vincent committed
296 297 298 299
  | `Identical_LG -> `Identical_LG (Identical.identical ~faa ~tree_id ~tree_sc ~prot_model:"LG08")
  | `Identical_WAG -> `Identical_WAG (Identical.identical ~faa ~tree_id ~tree_sc ~prot_model:"WAG01")
  | `Topological_LG -> `Topological_LG (Topological.topological ~faa ~tree:tree_id ~tree_conv ~prot_model:"LG08")
  | `Topological_WAG -> `Topological_WAG (Topological.topological ~faa ~tree:tree_id ~tree_conv ~prot_model:"WAG01")
300
  | `Multinomial -> `Multinomial (Multinomial.multinomial ~faa ~tree_id ~tree_sc)
301
  | `Msd e -> `Msd (Msd.msd ~e ~faa ~tree_sc, e)
Carine Rey's avatar
Carine Rey committed
302 303


304
let derive_from_dataset ~dataset ~preview ~fast_mode=
Carine Rey's avatar
Carine Rey committed
305
  let det_meths = [
Carine Rey's avatar
Carine Rey committed
306
    ([
Carine Rey's avatar
Carine Rey committed
307 308 309 310
      `Identical_LG;
      `Topological_LG;
      `Multinomial;
      `Pcoc;
Carine Rey's avatar
Carine Rey committed
311
    ] @ List.map [0.05] (fun x -> `Msd x)) ;
LANORE Vincent's avatar
LANORE Vincent committed
312 313 314
    if preview then
      []
    else
Carine Rey's avatar
update  
Carine Rey committed
315
      [ `Tdg09;
316 317 318
        `Pcoc_gamma;
        `Identical_WAG;
        `Topological_WAG;
LANORE Vincent's avatar
LANORE Vincent committed
319 320
      ]
    ;
321
    if fast_mode then
LANORE Vincent's avatar
LANORE Vincent committed
322 323
      []
    else
Carine Rey's avatar
Carine Rey committed
324
      [`Diffsel; `Pcoc_C60]  ;
LANORE Vincent's avatar
LANORE Vincent committed
325 326
  ]
    |> List.concat in
327
  let res_by_tools = List.map det_meths ~f:(fun det_meth ->
LANORE Vincent's avatar
LANORE Vincent committed
328
      derive_from_det_meth ~det_meth ~dataset ~preview
329
    ) in
Carine Rey's avatar
Carine Rey committed
330 331
  let fna_infos = dataset.dataset.fna_infos in
  let merged_results = merge_results ~fna_infos ~res_by_tools () in
Carine Rey's avatar
Carine Rey committed
332
  let tsv = merged_results in
333 334
  let faa = dataset.dataset.faa in
  let tree = Tree_dataset.tree dataset.dataset.tree_dataset `Detection in
Carine Rey's avatar
Carine Rey committed
335
  let plot_all_sites = if dataset.is_real then false else true in
336
  let plot_merged_results = plot_merge_results ~plot_all_sites ~res_by_tools ~tsv ~faa ~tree () in
Carine Rey's avatar
Carine Rey committed
337 338
  let model_prefix = dataset.model_prefix in
  let tree_prefix = dataset.tree_prefix in
Carine Rey's avatar
Carine Rey committed
339
  {model_prefix; tree_prefix; dataset; res_by_tools ; merged_results ; plot_merged_results}
Carine Rey's avatar
Carine Rey committed
340

341
let derive_det ~dataset_l ~preview ~fast_mode =
Carine Rey's avatar
Carine Rey committed
342
  List.map dataset_l ~f:(fun dataset ->
343
      derive_from_dataset ~preview ~dataset ~fast_mode)
Carine Rey's avatar
Carine Rey committed
344

Carine Rey's avatar
Carine Rey committed
345
let derive_profile ?(indir = "") ?(ns = 0) ~preview ~fast_mode ~no_Ne ~ne_test ~no_HaPC ~tree_dir ~profile ~use_concat ~add_indels ~only_simu ~seed () =
LANORE Vincent's avatar
LANORE Vincent committed
346
  let trees = Array.to_list @@ Sys.readdir tree_dir in
Carine Rey's avatar
Carine Rey committed
347
  let repo_and_post_analyses_per_tree_simu = List.map trees ~f:(fun tree -> (*to keep together all models per tree*)
348 349 350
      let trees = [tree] in
      let tree_prefix = Filename.chop_extension tree in
      let dataset_l =
351
        derive_sim ~tree_dir ~trees ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test ~add_indels ~seed in
Carine Rey's avatar
Carine Rey committed
352
      let dataset_results_l =
353
        if only_simu then
Carine Rey's avatar
Carine Rey committed
354 355 356 357
          []
        else
          derive_det ~dataset_l ~preview ~fast_mode
      in
358
      let post_analyses_res = Post_analyses.post_analyses_res_of_dataset_results_l ~tree_prefix ~dataset_results_l in
359 360
      let repo_per_tree = [
        Dataset.repo dataset_l ~preview ;
Carine Rey's avatar
Carine Rey committed
361 362 363 364 365 366
        Repo.shift "Results_per_hypothesis" (repo_of_dataset_results_l ~dataset_results_l);
        Post_analyses.repo_of_post_analyses_res ~prefix:tree_prefix ~post_analyses_res;
      ]
        |> List.concat
        |> Repo.shift tree_prefix
        |> Repo.shift "Results_per_tree"
367
      in
368
      (repo_per_tree, post_analyses_res, dataset_results_l, dataset_l)
Carine Rey's avatar
Carine Rey committed
369
    )
370
  in
371 372 373 374 375 376
  let all_repo_per_tree_simu = List.concat_map repo_and_post_analyses_per_tree_simu ~f:(fun (r, _, _, _) -> r) in
  let all_post_analyses_per_tree = List.map repo_and_post_analyses_per_tree_simu ~f:(fun (_, p, _, _) -> p) in
  let all_dataset_results = List.concat_map repo_and_post_analyses_per_tree_simu ~f:(fun (_, _, dr, _) -> dr) in
  let simu_dataset_l = List.concat_map repo_and_post_analyses_per_tree_simu ~f:(fun (_, _, _, d) -> d) in
  let post_analyses_simu = Post_analyses.post_analyses_simu_of_simu_dataset_l ~simu_dataset_l in
  let repo_of_post_analyses_simu = Post_analyses.repo_of_post_analyses_simu ~post_analyses_simu in
377 378
  let profile_prefix = profile.profile_n in
  let repo_post_analyses_all_trees = Post_analyses.repo_post_analyses_all_trees_of_all_post_analyses_per_tree ~all_post_analyses_per_tree ~profile_prefix in
Carine Rey's avatar
Carine Rey committed
379
  let repo_post_analyses_all_trees = [] in
380 381 382 383 384
  let repo_post_analyses_all_trees = if only_simu then
      []
    else
      repo_post_analyses_all_trees
  in
385 386 387 388 389 390 391 392
  let repo =
    repo_of_post_analyses_simu @ all_repo_per_tree_simu @ repo_post_analyses_all_trees
    |> Repo.shift profile_prefix
  in
  object
    method repo = repo
    method dataset_results = all_dataset_results
  end
393

394
let time_logger = Time_logger.create ()
395 396 397 398 399 400

let logger =
  Logger.tee [
    Console_logger.create () ;
    Dot_output.create "dag.dot" ; (*dot -Tpdf example/dag.dot -o dag.pdf*)
    Bistro_utils.Html_logger.create "report.html" ;
401
    time_logger#logger ;
402 403
  ]

Carine Rey's avatar
Carine Rey committed
404 405
let detection_main ~outdir ~indir ?(np = 2) ?(mem = 2) ~preview ~fast_mode ?(seed = Random.int Int.max_value) () =
  let dataset_l = parse_input_data ~seed indir in
406 407 408 409
  let dataset_results_l = derive_det ~dataset_l ~preview ~fast_mode in
  let repo = repo_of_dataset_results_l ~dataset_results_l in
  Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo

410
let simulation_main ~outdir ?(ns = 0) ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn ~preview ~use_concat ~no_Ne ~no_HaPC ~add_indels ?(seed = Random.int Int.max_value) () =
411
  let nb_sites = if ns = 0 then (if preview then 20 else 50) else ns in
Carine Rey's avatar
Carine Rey committed
412
  let profile = Profile.profile_l_of_splitted_profile ~nb_cat:All ~nb_sites profile_fn ~seed:(Random.int Int.max_value) in
413
  let trees = Array.to_list @@ Sys.readdir tree_dir in
414 415
  let dataset_l = derive_sim ~tree_dir ~trees ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test:false ~add_indels ~seed in

416 417 418
  let repo = Dataset.repo dataset_l ~preview in
  Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo

Carine Rey's avatar
Carine Rey committed
419
let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~preview ~fast_mode ~no_Ne ~ne_test ~no_HaPC ~tree_dir ~profile_fn ~use_concat ~add_indels ~only_simu ?(seed = Random.int Int.max_value) () =
Carine Rey's avatar
Carine Rey committed
420
  printf "Global seed: %i\n" seed;
421
  Out_channel.write_all "global.seed" ~data:(string_of_int seed);
422
  (* simulated trees *)
Philippe Veber's avatar
Philippe Veber committed
423
  Random.init seed ;
424
  let nb_sites = if ns = 0 then (if preview then 20 else 50) else ns in
425
  let profile = Profile.profile_l_of_splitted_profile ~nb_cat:Unif_3 ~nb_sites profile_fn ~seed:(Random.int Int.max_value) in
Carine Rey's avatar
Carine Rey committed
426
  let sim_repo_l = derive_profile ~indir ~ns ~preview ~fast_mode ~no_Ne ~ne_test ~no_HaPC ~tree_dir ~profile ~use_concat ~add_indels ~only_simu ~seed () in
427
  (* real trees *)
Carine Rey's avatar
Carine Rey committed
428
  let indir_dataset_l = if indir = "" then [] else parse_input_data ~seed indir in
429 430
  let dataset_l = indir_dataset_l in
  let dataset_results_l =
431
    if only_simu then
432 433 434 435 436 437 438 439
      []
    else
      derive_det ~dataset_l ~preview ~fast_mode
  in
  let repo_real_trees = [
    Dataset.repo dataset_l ~preview ;
    repo_of_dataset_results_l ~dataset_results_l ;
  ] |> List.concat
Carine Rey's avatar
Carine Rey committed
440
  in
441 442
  let repo = (Repo.shift "Simulated_datasets" sim_repo_l#repo) @ (Repo.shift "Real_datasets" repo_real_trees) in
  Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo ;
443
  time_logger#report sim_repo_l#dataset_results (Filename.concat outdir ("elapsed_time_" ^ (Unix.time () |> int_of_float |> string_of_int) ^ ".tsv"))
Philippe Veber's avatar
Philippe Veber committed
444

LANORE Vincent's avatar
LANORE Vincent committed
445
let simulation_command =
Philippe Veber's avatar
Philippe Veber committed
446 447 448 449 450
  let open Command.Let_syntax in
  Command.basic
    ~summary:"Run simulation pipeline"
    [%map_open
      let outdir =
LANORE Vincent's avatar
LANORE Vincent committed
451 452 453
        flag "--outdir" (required string) ~doc:"PATH Output directory"
      and preview =
        flag "--preview-mode" no_arg ~doc:" Preview mode"
Carine Rey's avatar
Carine Rey committed
454 455
      and no_Ne =
        flag "--no-ne" no_arg ~doc:" mode without hypothesis including different Ne"
Carine Rey's avatar
Carine Rey committed
456 457
      and no_HaPC =
        flag "--no-hapc" no_arg ~doc:" mode without ~HaPC hypothesis"
458 459
      and ns =
        flag "--ns" (optional int) ~doc:"INT Number of sites to simulate"
Philippe Veber's avatar
Philippe Veber committed
460 461 462 463
      and np =
        flag "--np" (optional int) ~doc:"INT Number of available processors"
      and mem =
        flag "--mem" (optional int) ~doc:"INT Available memory (in GB)"
464 465
      and use_concat =
        flag "--use-concat" no_arg ~doc:" Use concatenation H0+Ha_pcoc"
Carine Rey's avatar
Carine Rey committed
466 467
      and add_indels =
        flag "--add-indels" no_arg ~doc:" add indels in H*NeG5"
LANORE Vincent's avatar
LANORE Vincent committed
468 469
      and tree_dir =
        flag "--tree-dir" (required string) ~doc:"PATH Path to tree directory"
470 471
      and profile_fn =
        flag "--profile-fn" (required string) ~doc:"PATH Path to profile file"
Philippe Veber's avatar
Philippe Veber committed
472 473
      and seed =
        flag "--seed" (optional int) ~doc:"INT Global seed"
Philippe Veber's avatar
Philippe Veber committed
474
      in
475
      simulation_main ~outdir ?ns ?np ?mem ~no_Ne ~no_HaPC ~tree_dir ~profile_fn ~preview ~use_concat ~add_indels ?seed
LANORE Vincent's avatar
LANORE Vincent committed
476 477 478 479 480 481 482 483 484 485 486 487 488
    ]

let detection_command =
  let open Command.Let_syntax in
  Command.basic
    ~summary:"Run simulation pipeline"
    [%map_open
      let outdir =
        flag "--outdir" (required string) ~doc:"PATH Output directory"
      and indir =
        flag "--indir" (required string) ~doc:"PATH Input directory"
      and preview =
        flag "--preview-mode" no_arg ~doc:" Preview mode"
489 490
      and fast_mode =
        flag "--fast" no_arg ~doc:" 'Fast' mode without the most costly methods"
LANORE Vincent's avatar
LANORE Vincent committed
491 492 493 494
      and np =
        flag "--np" (optional int) ~doc:"INT Number of available processors"
      and mem =
        flag "--mem" (optional int) ~doc:"INT Available memory (in GB)"
Carine Rey's avatar
Carine Rey committed
495 496
      and seed =
        flag "--seed" (optional int) ~doc:"INT Global seed"
LANORE Vincent's avatar
LANORE Vincent committed
497
      in
Carine Rey's avatar
Carine Rey committed
498
      detection_main ~outdir ~indir ?np ?mem ~preview ~fast_mode ?seed
LANORE Vincent's avatar
LANORE Vincent committed
499 500
    ]

Philippe Veber's avatar
Philippe Veber committed
501
let validation_command =
502
  let _ = Random.self_init() in
Philippe Veber's avatar
Philippe Veber committed
503 504 505 506 507 508 509
  let open Command.Let_syntax in
  Command.basic
    ~summary:"Run simulation pipeline"
    [%map_open
      let outdir =
        flag "--outdir" (required string) ~doc:"PATH Output directory"
      and indir =
510
        flag "--indir" (optional string) ~doc:"PATH Input directory"
Philippe Veber's avatar
Philippe Veber committed
511 512
      and preview =
        flag "--preview-mode" no_arg ~doc:" Preview mode"
513 514
      and fast_mode =
        flag "--fast" no_arg ~doc:" 'Fast' mode without the most costly methods"
Carine Rey's avatar
Carine Rey committed
515 516
      and ne_test =
        flag "--ne-test" no_arg ~doc:" mode with hypothesis in test including different Ne"
Carine Rey's avatar
Carine Rey committed
517 518
      and no_Ne =
        flag "--no-ne" no_arg ~doc:" mode without hypothesis including different Ne"
Carine Rey's avatar
Carine Rey committed
519 520
      and no_HaPC =
        flag "--no-hapc" no_arg ~doc:" mode without ~HaPC hypothesis"
Carine Rey's avatar
Carine Rey committed
521 522
      and only_simu =
        flag "--only-simu" no_arg ~doc:" mode only simulation"
523 524
      and use_concat =
        flag "--use-concat" no_arg ~doc:" Use concatenation H0+Ha_pcoc"
Carine Rey's avatar
Carine Rey committed
525 526
      and add_indels =
        flag "--add-indels" no_arg ~doc:" add indels in H*NeG5"
527
      and ns =
528
        flag "--ns" (optional int) ~doc:"INT Number of sites to simulate (WARNING: will be multiplicated per 3)"
Philippe Veber's avatar
Philippe Veber committed
529 530 531 532 533 534 535 536
      and np =
        flag "--np" (optional int) ~doc:"INT Number of available processors"
      and mem =
        flag "--mem" (optional int) ~doc:"INT Available memory (in GB)"
      and tree_dir =
        flag "--tree-dir" (required string) ~doc:"PATH Path to tree directory"
      and profile_fn =
        flag "--profile-fn" (required string) ~doc:"PATH Path to profile file"
Philippe Veber's avatar
Philippe Veber committed
537 538
      and seed =
        flag "--seed" (optional int) ~doc:"INT Global seed"
Philippe Veber's avatar
Philippe Veber committed
539
      in
Carine Rey's avatar
Carine Rey committed
540
      validation_main ~outdir ?indir ?ns ?np ?mem ~preview ~fast_mode ~no_Ne ~ne_test ~no_HaPC ~tree_dir ~profile_fn ~use_concat ~only_simu ~add_indels ?seed
Philippe Veber's avatar
Philippe Veber committed
541
    ]