ready_dataset.ml 1.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
open Bistro.Std
open Bistro_utils
open File_formats

type t = {
  input_tree: nhx workflow ;
  tree_dataset : [`tree_dataset] directory workflow ;
  fna: nucleotide_fasta workflow ;
  faa: aminoacid_fasta workflow ;
}


let of_raw (raw_dataset : Raw_dataset.t) =
  let input_tree = raw_dataset.input_tree in
  let fna = raw_dataset.fna in
  let tree_dataset = Tree_dataset.prepare input_tree in
  let faa = Bppsuite.fna2faa ~fna in
  { input_tree; tree_dataset ; fna; faa}

let repo rd =
  Repo.[
    item ["input_tree.nhx"] rd.input_tree ;
    item ["tree.H0.node_ids" ] (Tree_dataset.nodes rd.tree_dataset H0) ;
    item ["tree.Ha.node_ids" ] (Tree_dataset.nodes rd.tree_dataset Ha) ;
    item ["tree.only_convergent_tags.nhx" ] (Tree_dataset.tree rd.tree_dataset `Detection) ;
    item ["tree.only_node_ids.nhx" ] (Tree_dataset.tree rd.tree_dataset `Simulation) ;
    item ["tree.diffsel" ] (Tree_dataset.diffsel_tree rd.tree_dataset) ;
28
    item ["tree.convergent_topology" ] (Tree_dataset.topological_tree rd.tree_dataset) ;
29 30 31 32 33 34 35
    item ["simulated_sequences.fna"] rd.fna ;
    item ["simulated_sequences.faa"] rd.faa ;
  ]
  |> Repo.shift "ready_dataset"

let to_raw { input_tree ; fna } =
  { Raw_dataset.input_tree ; fna }