identical.ml 3.48 KB
Newer Older
Carine Rey's avatar
Carine Rey committed
1 2 3 4 5 6 7 8 9 10 11
open Core
open Bistro.Std
open Bistro.EDSL
open Bistro_bioinfo.Std
open File_formats

let assign k v =
  seq ~sep:"=" [ string k ; v ]

let conf_file_bppml ~tree ~faa ~out ~config =
  seq ~sep:"\n" (
12
    [
Carine Rey's avatar
Carine Rey committed
13 14 15
      assign "OUT" (out) ;
      assign "input.sequence.file" (dep faa) ;
      assign "alphabet" (string "Protein") ;
16 17
      assign "input.sequence.remove_stop_codons" (string "no") ;
      assign "input.sequence.sites_to_use" (string "all") ;
Carine Rey's avatar
Carine Rey committed
18
      assign "input.sequence.remove_saturated_sites" (string "yes") ;
Carine Rey's avatar
Carine Rey committed
19 20 21 22

      assign "input.tree.file"  (dep tree) ;
      assign "init.tree" (string "user") ;
      assign "input.tree.format" (string "Nhx") ;
23

Carine Rey's avatar
Carine Rey committed
24 25 26 27 28 29 30 31 32 33 34 35 36
      assign "optimization.topology" (string "false") ;

      assign "output.tree.file" (string "$(OUT)/tree.nhx") ;
      assign "output.tree.format" (string "Nhx") ;


      assign "output.infos" (string "$(OUT)/infos.tsv") ;
      assign "output.estimates" (string "$(OUT)/estimates.tsv") ;
    ]
    @ config
  )

let bppml ?(descr="") ~faa ~tree ~config : _ workflow =
37
  let env = docker_image ~account:"carinerey" ~name:"bppsuite" ~tag:"07192018" () in
Carine Rey's avatar
Carine Rey committed
38 39 40 41 42 43 44 45
  let config_f = dest // "config_bppml.bpp" in
  let out = ident dest in
  workflow ~descr:("bppsuite.bppml" ^ descr) [
    docker env (
      and_list [
        mkdir_p dest;
        cmd "cat" ~stdout:config_f [(file_dump (conf_file_bppml ~tree ~faa ~out ~config ))];
        cmd "bppml" [
46 47
          assign "param"  config_f;
        ]
Carine Rey's avatar
Carine Rey committed
48 49 50
      ]
    )
  ]
51

Carine Rey's avatar
Carine Rey committed
52 53
let conf_file_bppancestor ~tree ~faa ~out ~config =
  seq ~sep:"\n" (
54
    [
Carine Rey's avatar
Carine Rey committed
55 56 57 58 59 60 61
      assign "OUT" (out) ;
      assign "input.sequence.file" (dep faa) ;
      assign "alphabet" (string "Protein") ;

      assign "input.tree.file"  (dep tree) ;
      assign "init.tree" (string "user") ;
      assign "input.tree.format" (string "Nhx") ;
62

Carine Rey's avatar
Carine Rey committed
63 64 65 66 67 68 69 70 71 72 73 74 75 76
      assign "optimization.topology" (string "false") ;

      assign "output.sequence.file" (string "$(OUT)/output_anc.fa") ;
      assign "asr.add_extant" (string "true") ;
      assign "asr.probabilities" (string "true") ;


      assign "output.sites.file" (string "$(OUT)/sites.tsv") ;
      assign "output.nodes.file" (string "$(OUT)/nodes.tsv") ;
    ]
    @ config
  )

let bppancestor ?(descr="") ~faa ~tree ~config : _ workflow =
77
  let env = docker_image ~account:"carinerey" ~name:"bppsuite" ~tag:"07192018" () in
Carine Rey's avatar
Carine Rey committed
78 79 80 81 82 83 84 85
  let config_f = dest // "config_bppancestor.bpp" in
  let out = ident dest in
  workflow ~descr:("bppsuite.bppancestor" ^ descr) [
    docker env (
      and_list [
        mkdir_p dest;
        cmd "cat" ~stdout:config_f [(file_dump (conf_file_bppancestor ~tree ~faa ~out ~config))];
        cmd "bppancestor" [
86 87
          assign "param"  config_f;
        ]
Carine Rey's avatar
Carine Rey committed
88 89 90 91 92 93 94 95 96 97 98 99 100 101
      ]
    )
  ]

let identical ~(tree_id:_ workflow) ~(tree_sc:_ workflow) ~(faa:aminoacid_fasta workflow) ~prot_model : [`identical] directory workflow =
  let config = [assign "model" (string prot_model)] in
  let out1 = dest // "out1.tsv" in
  let out2 = dest // "out2.tsv" in
  let run_bppancestor = bppancestor ~descr:"" ~tree:tree_id ~faa ~config in
  let proba = run_bppancestor / selector ["sites.tsv"] in
  let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"07022018" () in
  workflow ~descr:("identical."^prot_model) [
    mkdir dest ;
    cmd "python" ~env [
102 103 104 105 106 107
      file_dump (string Scripts.calc_identical) ;
      opt "-t" dep tree_sc;
      opt "-a" dep faa;
      opt "-p" dep proba;
      opt "-o"  ident out1 ;
      opt "-o2"  ident out2 ;
Carine Rey's avatar
Carine Rey committed
108 109 110 111 112
    ]
  ]

let results run_identical : text_file workflow =
  run_identical / selector ["out1.tsv"]