pipeline.ml 3.49 KB
Newer Older
Philippe Veber's avatar
Philippe Veber committed
1
open Core
Philippe Veber's avatar
Philippe Veber committed
2
open Bistro_utils
LANORE Vincent's avatar
LANORE Vincent committed
3
open Bistro.EDSL
Carine Rey's avatar
typing  
Carine Rey committed
4
open Bistro.Std
5 6 7
open File_formats


Carine Rey's avatar
typing  
Carine Rey committed
8 9
let parse_input_tree ~tree : parsed_input_tree directory workflow =
    workflow ~descr:"utils.parse_input_tree" [
10 11
    (*let env = docker_image ~account:"carinerey" ~name:"ete3:3.0.0b35" () in*)
    cmd "python" (*~env*) [
Carine Rey's avatar
Carine Rey committed
12
      string "../etc/utils/bin/parse_input_tree.py";
13 14 15 16 17 18 19 20 21 22 23
      opt "-t" dep tree;
      opt "-o" ident dest;
    ]
  ]

let select_out parsed_tree t = match t with
    | Nodes_H0     -> parsed_tree / selector [ "tree.H0.node_ids" ]
    | Nodes_Ha     -> parsed_tree / selector [ "tree.Ha.node_ids" ]
    | Tree4detect  -> parsed_tree / selector [ "tree.only_convergent_tags.nhx" ]
    | Tree4simu    -> parsed_tree / selector [ "tree.only_node_ids.nhx" ]
    | Tree_diffsel -> parsed_tree / selector [ "tree.diffsel" ]
Philippe Veber's avatar
Philippe Veber committed
24

25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
let ready_dataset_of_raw_dataset raw_dataset =
    let tree = raw_dataset.input_tree in
    let fna = raw_dataset.fna in
    let parsed_tree = parse_input_tree ~tree in
    let faa = Bppsuite.fna2faa ~fna in
    { parsed_tree; fna; faa}

let repo_of_ready_dataset ready_dataset =
  Repo.[
    item ["ready_dataset/tree.H0.node_ids" ] (ready_dataset.parsed_tree / selector [ "tree.H0.node_ids" ]) ;
    item ["ready_dataset/tree.Ha.node_ids" ] (ready_dataset.parsed_tree / selector [ "tree.Ha.node_ids" ]) ;
    item ["ready_dataset/tree.only_convergent_tags.nhx" ] (ready_dataset.parsed_tree / selector [ "tree.only_convergent_tags.nhx" ]) ;
    item ["ready_dataset/tree.only_node_ids.nhx" ] (ready_dataset.parsed_tree / selector [ "tree.only_node_ids.nhx" ]) ;
    item ["ready_dataset/tree.diffsel" ] (ready_dataset.parsed_tree / selector [ "tree.diffsel" ]) ;
    item ["ready_dataset/simulated_sequences.fna"] ready_dataset.fna ;
    item ["ready_dataset/simulated_sequences.faa"] ready_dataset.faa ;
  ]

let repo_of_raw_dataset raw_dataset =
  Repo.[
    item ["raw_dataset/input_tree.nhx"] raw_dataset.input_tree ;
    item ["raw_dataset/simulated_sequences.fna"] raw_dataset.fna ;
  ]

LANORE Vincent's avatar
LANORE Vincent committed
49 50 51
let derive_from_tree ~tree_dir ~tree ~preview =
  let tree = input (Filename.concat tree_dir tree) in
  let nb_sites = if preview then 10 else 100 in
52 53
  let parsed_tree = parse_input_tree ~tree in
  let fna = Bppsuite.bppseqgen ~nb_sites ~tree:(select_out parsed_tree Tree4simu) in
54 55 56 57 58 59 60
  let faa = Bppsuite.fna2faa ~fna in
  let raw_dataset = { input_tree = tree ; fna} in
  let ready_dataset = { parsed_tree; fna; faa} in
  if preview then
    repo_of_ready_dataset ready_dataset
  else
    repo_of_raw_dataset raw_dataset
Philippe Veber's avatar
Philippe Veber committed
61

LANORE Vincent's avatar
LANORE Vincent committed
62 63 64 65 66 67 68 69 70 71
let derive ~tree_dir ~trees ~preview =
  List.map trees ~f:(fun tree ->
      let id = Filename.chop_extension tree in
      Repo.shift id (derive_from_tree ~tree_dir ~tree ~preview))
  |> List.concat

let main ~outdir ?(np = 2) ?(mem = 2) ~tree_dir ~preview () =
  let trees = Array.to_list @@ Sys.readdir tree_dir in
  let repo = derive ~tree_dir ~trees ~preview in
  Repo.build ~outdir ~np ~mem:(`GB mem) repo
Philippe Veber's avatar
Philippe Veber committed
72 73 74 75 76 77 78

let command =
  let open Command.Let_syntax in
  Command.basic
    ~summary:"Run simulation pipeline"
    [%map_open
      let outdir =
LANORE Vincent's avatar
LANORE Vincent committed
79 80 81
        flag "--outdir" (required string) ~doc:"PATH Output directory"
      and preview =
        flag "--preview-mode" no_arg ~doc:" Preview mode"
Philippe Veber's avatar
Philippe Veber committed
82 83 84 85
      and np =
        flag "--np" (optional int) ~doc:"INT Number of available processors"
      and mem =
        flag "--mem" (optional int) ~doc:"INT Available memory (in GB)"
LANORE Vincent's avatar
LANORE Vincent committed
86 87
      and tree_dir =
        flag "--tree-dir" (required string) ~doc:"PATH Path to tree directory"
Philippe Veber's avatar
Philippe Veber committed
88
      in
LANORE Vincent's avatar
LANORE Vincent committed
89
      main ~outdir ?np ?mem ~tree_dir ~preview
Philippe Veber's avatar
Philippe Veber committed
90
    ]