bppsuite.ml 4.4 KB
Newer Older
LANORE Vincent's avatar
LANORE Vincent committed
1
open Core
2 3 4
open Bistro.Std
open Bistro.EDSL
open Bistro_bioinfo.Std
Carine Rey's avatar
Carine Rey committed
5
open File_formats
LANORE Vincent's avatar
LANORE Vincent committed
6 7


Carine Rey's avatar
Carine Rey committed
8

Carine Rey's avatar
Carine Rey committed
9
let env = docker_image ~account:"carinerey" ~name:"bppsuite" ~tag:"07052018" ()
LANORE Vincent's avatar
LANORE Vincent committed
10 11 12 13

let assign k v =
  seq ~sep:"=" [ string k ; v ]

Carine Rey's avatar
Carine Rey committed
14 15 16 17 18 19 20 21 22 23
let bash_script args code =
  let prelude =
    args
    |> List.map ~f:(fun (k, v) ->
        assign k v
      )
    |> seq ~sep:"\n"
  in
  seq ~sep:"\n" [ prelude ; string code ]

24

Carine Rey's avatar
Carine Rey committed
25 26

let conf_file_bppseqgen ~tree ~out ~nb_sites ~config =
27 28 29
  seq ~sep:"\n" (
    [
      assign "input.tree.file" (dep tree) ;
30
      assign "output.sequence.file" out ;
31 32 33 34
      assign "number_of_sites" (int nb_sites) ;
    ]
    @ config
  )
LANORE Vincent's avatar
LANORE Vincent committed
35

Carine Rey's avatar
Carine Rey committed
36
let bppseqgen ?(descr="") ~nb_sites ~tree ~config : nucleotide_fasta workflow =
37 38 39 40 41 42
  let config_f = dest // "config.bpp" in
  let out = dest // "seq.fa" in
  workflow ~descr:("bppsuite.bppseqgen" ^ descr) [
    docker env (
      and_list [
        mkdir_p dest;
Carine Rey's avatar
Carine Rey committed
43
        cmd "cat" ~stdout:config_f [(file_dump (conf_file_bppseqgen ~tree ~out ~nb_sites ~config))];
Carine Rey's avatar
Carine Rey committed
44
        cmd "bppseqgen" [
45 46 47 48 49
         assign "param"  config_f;
         ]
      ]
    )
  ] / selector ["seq.fa"]
Carine Rey's avatar
Carine Rey committed
50 51


Carine Rey's avatar
Carine Rey committed
52

53
let conf_file_bppseqgen_multi_profiles ~tree ~profile_f ~ne_c ~config ~nb_sites_per_profile =
Carine Rey's avatar
Carine Rey committed
54 55 56
  seq ~sep:"\n" (
    [
      assign "input.tree.file" (dep tree) ;
57 58 59 60 61
      assign "PROFILE_F" (dep profile_f) ;
      assign "number_of_sites" (int nb_sites_per_profile) ;
      assign "NE_1"            (int 1     ) ;
      assign "NE_C"            (float ne_c) ;
      assign "NE_T"            (float ne_c) ;
Carine Rey's avatar
Carine Rey committed
62 63 64 65 66 67
    ]
    @ config
  )



68
let bppseqgen_multi_profiles_script ~config ~nb_combis  ~out ~profile_f =
Carine Rey's avatar
Carine Rey committed
69 70 71 72 73 74 75 76 77 78 79
  let vars = [
    "FINAL_OUT", ident out ;
    "PARAM", config ;
    "PROFILE_F", dep profile_f ;
    "NB_COMBI_PROFILES", int nb_combis ;
  ]
  in
  bash_script vars {|

  start_i=1
  end_i=$NB_COMBI_PROFILES
80
  NB_COLS=`head -n 1 $PROFILE_F | awk '{print NF}'`
81

Carine Rey's avatar
Carine Rey committed
82 83 84 85 86 87
  for ((i=start_i; i<=end_i; i++))
  do
   echo "i: $i"
   shuf -i1-$NB_COLS -n2 -o random_num
   COL_M1=`head -n 1 random_num`
   COL_M2=`tail -n 1 random_num`
88
   bppseqgen param=$PARAM i=$i COL_M1=$COL_M1 COL_M2=$COL_M2 output.sequence.file=out_int_"$i".fa
Carine Rey's avatar
Carine Rey committed
89 90 91 92 93 94 95 96
  done

  # horizontal concatenation of fasta
  catfasta2phyml.pl -f out_int_* > $FINAL_OUT

|}


97
let bppseqgen_multi_profiles ?(descr="") ~profile_f ~nb_sites ~tree ~config ~ne_c : nucleotide_fasta workflow =
Carine Rey's avatar
Carine Rey committed
98 99 100 101 102 103 104 105 106 107
  let nb_sites_per_profile = if nb_sites > 100 then 2 else 1 in
  let nb_combis = Pervasives.(nb_sites / nb_sites_per_profile) in
  let config_f = dest // "config.bpp" in
  let out = dest // "seq.fa" in
  workflow ~descr:("bppsuite.bppseqgen" ^ descr) [
    docker env (
      and_list [
        mkdir_p dest;
        mkdir_p tmp;
        cd tmp;
108 109
        cmd "cat" ~stdout:config_f [(file_dump (conf_file_bppseqgen_multi_profiles ~tree ~profile_f ~config ~ne_c ~nb_sites_per_profile))];
        cmd "bash" [(file_dump (bppseqgen_multi_profiles_script ~config:config_f ~nb_combis ~out ~profile_f))];
Carine Rey's avatar
Carine Rey committed
110 111 112 113 114
      ]
    )
  ] / selector ["seq.fa"]


Carine Rey's avatar
Carine Rey committed
115
let conf_file_bppseqman_fna2faa ~fna =
Carine Rey's avatar
Carine Rey committed
116 117 118 119 120
  seq ~sep:"\n" [
    assign "input.sequence.file" (dep fna) ;
    assign "output.sequence.file" dest ;
    string {|alphabet=Codon(letter=DNA)
             genetic_code = Standard
121 122
             input.sequence.remove_stop_codons = no
             input.sequence.sites_to_use = all
Carine Rey's avatar
Carine Rey committed
123 124 125 126 127 128 129 130
             input.alignment = true
             sequence.manip = Translate
           |}
  ]

let fna2faa ~(fna:nucleotide_fasta workflow) : aminoacid_fasta workflow =
  workflow ~descr:"bppsuite.fna2faa" [
    cmd "bppseqman" ~env [
Carine Rey's avatar
Carine Rey committed
131 132 133 134 135 136 137 138 139 140
      assign "param" (file_dump (conf_file_bppseqman_fna2faa ~fna)) ;
    ]
  ]

let conf_file_bppseqman_fa2phy ~fna =
  seq ~sep:"\n" [
    assign "input.sequence.file" (dep fna) ;
    assign "output.sequence.file" dest ;
    assign "output.sequence.format" (string "Phylip") ;
    string {| input.alignment = true
141 142
             input.sequence.remove_stop_codons = no
             input.sequence.sites_to_use = all
Carine Rey's avatar
Carine Rey committed
143 144 145 146 147 148 149 150
             sequence.manip =
           |}
  ]

let fa2phy ~(fna: nucleotide_fasta workflow) : nucleotide_phylip workflow =
  workflow ~descr:"bppsuite.fa2phy" [
    cmd "bppseqman" ~env [
      assign "param" (file_dump (conf_file_bppseqman_fa2phy ~fna)) ;
LANORE Vincent's avatar
LANORE Vincent committed
151 152
    ]
  ]
Carine Rey's avatar
Carine Rey committed
153 154 155 156 157 158 159 160 161


let paste_fna  ~(fna_l: nucleotide_fasta workflow list) : nucleotide_fasta workflow =
  workflow ~descr:"bppsuite.catfasta" [
    cmd "catfasta2phyml.pl" ~stdout:dest ~env (List.concat [
      [string "-f" ] ;
      List.map fna_l ~f:(fun fna -> dep fna) ;
    ])
  ]