pipeline.ml 5.72 KB
Newer Older
Philippe Veber's avatar
Philippe Veber committed
1
open Core
Philippe Veber's avatar
Philippe Veber committed
2
open Bistro_utils
LANORE Vincent's avatar
LANORE Vincent committed
3
open Bistro.EDSL
Carine Rey's avatar
typing  
Carine Rey committed
4
open Bistro.Std
5 6
open File_formats

7
let ready_dataset_of_raw_dataset raw_dataset =
8
    let input_tree = raw_dataset.input_tree in
9
    let fna = raw_dataset.fna in
Philippe Veber's avatar
Philippe Veber committed
10
    let tree_dataset = Tree_dataset.prepare input_tree in
11
    let faa = Bppsuite.fna2faa ~fna in
Philippe Veber's avatar
Philippe Veber committed
12
    { input_tree; tree_dataset ; fna; faa}
13

Carine Rey's avatar
Carine Rey committed
14 15 16 17 18
let raw_dataset_of_ready_dataset ready_dataset =
    let input_tree = ready_dataset.input_tree in
    let fna = ready_dataset.fna in
    { input_tree; fna}

19
let repo_of_ready_dataset (rd : ready_dataset) =
20
  Repo.[
21 22 23 24 25 26 27 28
    item ["input_tree.nhx"] rd.input_tree ;
    item ["tree.H0.node_ids" ] (Tree_dataset.nodes rd.tree_dataset H0) ;
    item ["tree.Ha.node_ids" ] (Tree_dataset.nodes rd.tree_dataset Ha) ;
    item ["tree.only_convergent_tags.nhx" ] (Tree_dataset.tree rd.tree_dataset `Detection) ;
    item ["tree.only_node_ids.nhx" ] (Tree_dataset.tree rd.tree_dataset `Simulation) ;
    item ["tree.diffsel" ] (Tree_dataset.diffsel_tree rd.tree_dataset) ;
    item ["simulated_sequences.fna"] rd.fna ;
    item ["simulated_sequences.faa"] rd.faa ;
29
  ]
30
  |> Repo.shift "ready_dataset"
31

32
let repo_of_raw_dataset (raw_dataset:raw_dataset) =
33
  Repo.[
34 35
    item ["input_tree.nhx"] raw_dataset.input_tree ;
    item ["simulated_sequences.fna"] raw_dataset.fna ;
36
  ]
37
  |> Repo.shift "raw_dataset"
38

Carine Rey's avatar
Carine Rey committed
39

Carine Rey's avatar
Carine Rey committed
40
let repo_of_dataset_l ~preview dataset_l =
Carine Rey's avatar
Carine Rey committed
41 42 43
  List.map dataset_l ~f:(fun dataset ->
    let model_prefix = dataset.model_prefix in
    let tree_prefix = dataset.tree_prefix in
Carine Rey's avatar
Carine Rey committed
44 45 46 47
    let w_dataset =
      if preview then
        Ready_dataset dataset.ready_dataset
      else Raw_dataset (raw_dataset_of_ready_dataset dataset.ready_dataset) in
Carine Rey's avatar
Carine Rey committed
48 49 50 51 52 53 54 55 56
    let repo_d = match w_dataset with
      | Ready_dataset d -> repo_of_ready_dataset d
      | Raw_dataset d ->  repo_of_raw_dataset d
    in
      Repo.shift tree_prefix (Repo.shift model_prefix repo_d)
    )
    |> List.concat

let derive_from_model ~model ~tree ~tree_dataset ~tree_prefix ~preview =
LANORE Vincent's avatar
LANORE Vincent committed
57
  let nb_sites = if preview then 10 else 100 in
58 59
  let nodes = Tree_dataset.nodes tree_dataset model in
  let config = Convergence_hypothesis.bpp_config nodes model in
Philippe Veber's avatar
Philippe Veber committed
60 61
  let tree = Tree_dataset.tree tree_dataset `Simulation in
  let fna = Bppsuite.bppseqgen ~nb_sites ~tree ~config in
62
  let faa = Bppsuite.fna2faa ~fna in
Philippe Veber's avatar
Philippe Veber committed
63
  let ready_dataset = { input_tree = tree ; tree_dataset ; fna; faa} in
Carine Rey's avatar
Carine Rey committed
64
  let model_prefix = Convergence_hypothesis.string_of_model model in
Carine Rey's avatar
Carine Rey committed
65
    { model_prefix; tree_prefix; ready_dataset }
Carine Rey's avatar
Carine Rey committed
66

67
let derive_from_tree ~tree_dir ~tree ~preview =
Carine Rey's avatar
Carine Rey committed
68
  let tree_prefix = Filename.chop_extension tree in
69
  let tree = input (Filename.concat tree_dir tree) in
Philippe Veber's avatar
Philippe Veber committed
70
  let tree_dataset = Tree_dataset.prepare tree in
71
  let models = Convergence_hypothesis.[H0; Ha] in
72
  List.map models ~f:(fun model ->
Carine Rey's avatar
Carine Rey committed
73
      derive_from_model ~model ~tree ~tree_dataset ~tree_prefix ~preview
Philippe Veber's avatar
Philippe Veber committed
74
    )
75

Carine Rey's avatar
Carine Rey committed
76
let derive_sim ~tree_dir ~trees ~preview =
LANORE Vincent's avatar
LANORE Vincent committed
77
  List.map trees ~f:(fun tree ->
Carine Rey's avatar
Carine Rey committed
78
      derive_from_tree ~tree_dir ~tree ~preview)
LANORE Vincent's avatar
LANORE Vincent committed
79 80
  |> List.concat

Carine Rey's avatar
Carine Rey committed
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126

let repo_of_det_results_l ~det_results_l =
  List.map det_results_l ~f:(fun det_results ->
    let model_prefix = det_results.dataset.model_prefix in
    let tree_prefix = det_results.dataset.tree_prefix in
    let det_meth_prefix = string_of_det_meth det_results.det_meth in
    let open Convergence_detection in
    let w = det_results.det_result
      in
    let repo_d = Repo.[
      item [det_meth_prefix] (w (*/ selector ["RUN*"]*))  ]
    in
    Repo.shift tree_prefix (Repo.shift model_prefix repo_d)
  )
  |> List.concat

let derive_from_det_meth ~det_meth ~dataset ~preview =
  let open Convergence_detection in
  let faa = dataset.ready_dataset.faa in
  let fna = dataset.ready_dataset.fna in
  let phy_n = Bppsuite.fa2phy ~fna in
  let pcoc_tree = Tree_dataset.tree dataset.ready_dataset.tree_dataset `Detection in
  let diffsel_tree = Tree_dataset.diffsel_tree dataset.ready_dataset.tree_dataset in
  let w_every = if preview then 1 else 10 in
  let n_cycles = if preview then 100 else 1000 in
  let det_result = match det_meth with
    | Pcoc -> Convergence_detection.pcoc ~gamma:false ~faa ~tree:pcoc_tree
    | Pcoc_gamma -> Convergence_detection.pcoc ~gamma:true ~faa ~tree:pcoc_tree
    | Diffsel -> Convergence_detection.diffsel ~phy_n ~tree:diffsel_tree ~w_every ~n_cycles
  in
  {det_meth; det_result; dataset}




let derive_from_dataset ~dataset ~preview=
  let det_meths = [Pcoc;Pcoc_gamma;Diffsel] in
  List.map det_meths ~f:(fun det_meth ->
    derive_from_det_meth ~det_meth ~dataset ~preview
    )

let derive_det ~dataset_l ~preview=
  List.map dataset_l ~f:(fun dataset ->
      derive_from_dataset ~preview ~dataset)
  |> List.concat

LANORE Vincent's avatar
LANORE Vincent committed
127
let main ~outdir ?(np = 2) ?(mem = 2) ~tree_dir ~preview () =
Carine Rey's avatar
Carine Rey committed
128 129 130 131 132
 let logger =
    Logger.tee [
      Console_logger.create () ;
      Dot_output.create "dag.dot" (*dot -Tpdf example/dag.dot -o dag.pdf*)
    ] in
LANORE Vincent's avatar
LANORE Vincent committed
133
  let trees = Array.to_list @@ Sys.readdir tree_dir in
Carine Rey's avatar
Carine Rey committed
134 135 136 137 138 139 140 141
  let dataset_l = derive_sim ~tree_dir ~trees ~preview in
  let det_results_l = derive_det ~dataset_l ~preview in
  let repo = [
    repo_of_dataset_l dataset_l ~preview ;
    repo_of_det_results_l det_results_l;
  ]
  |> List.concat
  in
Carine Rey's avatar
Carine Rey committed
142
  Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo
Philippe Veber's avatar
Philippe Veber committed
143 144 145 146 147 148 149

let command =
  let open Command.Let_syntax in
  Command.basic
    ~summary:"Run simulation pipeline"
    [%map_open
      let outdir =
LANORE Vincent's avatar
LANORE Vincent committed
150 151 152
        flag "--outdir" (required string) ~doc:"PATH Output directory"
      and preview =
        flag "--preview-mode" no_arg ~doc:" Preview mode"
Philippe Veber's avatar
Philippe Veber committed
153 154 155 156
      and np =
        flag "--np" (optional int) ~doc:"INT Number of available processors"
      and mem =
        flag "--mem" (optional int) ~doc:"INT Available memory (in GB)"
LANORE Vincent's avatar
LANORE Vincent committed
157 158
      and tree_dir =
        flag "--tree-dir" (required string) ~doc:"PATH Path to tree directory"
Philippe Veber's avatar
Philippe Veber committed
159
      in
LANORE Vincent's avatar
LANORE Vincent committed
160
      main ~outdir ?np ?mem ~tree_dir ~preview
Philippe Veber's avatar
Philippe Veber committed
161
    ]