topological.ml 2.67 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
open Core
open Bistro.Std
open Bistro.EDSL
open Bistro_bioinfo.Std
open File_formats

let assign k v =
  seq ~sep:"=" [ string k ; v ]

let conf_file_bppml ~tree ~faa ~out ~config =
  seq ~sep:"\n" (
12
    [
13 14 15
      assign "OUT" (out) ;
      assign "input.sequence.file" (dep faa) ;
      assign "alphabet" (string "Protein") ;
16 17
      assign "input.sequence.remove_stop_codons" (string "no") ;
      assign "input.sequence.sites_to_use" (string "all") ;
Carine Rey's avatar
Carine Rey committed
18 19
      assign "input.sequence.remove_saturated_sites" (string "yes") ;

20 21 22 23

      assign "input.tree.file"  (dep tree) ;
      assign "init.tree" (string "user") ;
      assign "input.tree.format" (string "Nhx") ;
24

25 26 27 28 29 30 31 32 33 34 35 36 37
      assign "optimization.topology" (string "false") ;

      assign "output.tree.file" (string "$(OUT)/tree.nhx") ;
      assign "output.tree.format" (string "Nhx") ;


      assign "output.infos" (string "$(OUT)/infos.tsv") ;
      assign "output.estimates" (string "$(OUT)/estimates.tsv") ;
    ]
    @ config
  )

let bppml ?(descr="") ~faa ~tree ~config : _ workflow =
38
  let env = Env.env_bppsuite in
39 40 41 42 43 44 45 46
  let config_f = dest // "config_bppml.bpp" in
  let out = ident dest in
  workflow ~descr:("bppsuite.bppml" ^ descr) [
    docker env (
      and_list [
        mkdir_p dest;
        cmd "cat" ~stdout:config_f [(file_dump (conf_file_bppml ~tree ~faa ~out ~config ))];
        cmd "bppml" [
47 48
          assign "param"  config_f;
        ]
49 50 51 52 53 54 55 56 57 58 59 60 61
      ]
    )
  ]

let topological ~(tree:_ workflow) ~(tree_conv:_ workflow) ~(faa:aminoacid_fasta workflow) ~prot_model : [`topological] directory workflow =
  let config = [assign "model" (string prot_model)] in
  let run_bppml = bppml ~descr:"" ~tree ~config ~faa in
  let run_bppml_conv = bppml ~descr:".conv" ~tree:tree_conv ~config ~faa in
  let bppml_config = run_bppml / selector ["config_bppml.bpp"] in
  let bppml_config_conv = run_bppml_conv / selector ["config_bppml.bpp"] in
  let bppml_out = run_bppml / selector ["infos.tsv"] in
  let bppml_out_conv = run_bppml_conv / selector ["infos.tsv"] in
  let out = dest // "out.tsv" in
62
  let env = Env.env_pcoc in
Carine Rey's avatar
Carine Rey committed
63
  workflow ~descr:("topological.parse_"^prot_model) [
64 65 66 67 68 69
    mkdir dest ;
    cmd "cp" [dep bppml_out ; dest // "estimates.bppml_out.tsv" ];
    cmd "cp" [dep bppml_out_conv ; dest // "estimates.bppml_out_conv.tsv" ];
    cmd "cp" [dep bppml_config ; dest // "estimates.bppml_config" ];
    cmd "cp" [dep bppml_config_conv ; dest // "estimates.bppml_config_conv" ];
    cmd "python" ~env [
70 71 72 73
      file_dump (string Scripts.calc_topological) ;
      opt "-bppml_non_conv" dep bppml_out ;
      opt "-bppml_conv" dep bppml_out_conv ;
      opt "-o"  ident out ;
74 75 76 77 78
    ]
  ]

let results run_topological : text_file workflow =
  run_topological / selector ["out.tsv"]