bppsuite.ml 4.66 KB
Newer Older
LANORE Vincent's avatar
LANORE Vincent committed
1
open Core
2 3 4
open Bistro.Std
open Bistro.EDSL
open Bistro_bioinfo.Std
Carine Rey's avatar
Carine Rey committed
5
open File_formats
LANORE Vincent's avatar
LANORE Vincent committed
6

Carine Rey's avatar
Carine Rey committed
7
type bppseqgen_multi_profiles
Carine Rey's avatar
Carine Rey committed
8

9
let env = docker_image ~account:"carinerey" ~name:"bppsuite" ~tag:"07192018" ()
LANORE Vincent's avatar
LANORE Vincent committed
10 11 12 13

let assign k v =
  seq ~sep:"=" [ string k ; v ]

Carine Rey's avatar
Carine Rey committed
14 15 16 17 18 19 20 21 22 23 24
let bash_script args code =
  let prelude =
    args
    |> List.map ~f:(fun (k, v) ->
        assign k v
      )
    |> seq ~sep:"\n"
  in
  seq ~sep:"\n" [ prelude ; string code ]

let conf_file_bppseqgen ~tree ~out ~nb_sites ~config =
25 26 27
  seq ~sep:"\n" (
    [
      assign "input.tree.file" (dep tree) ;
28
      assign "output.sequence.file" out ;
29 30 31 32
      assign "number_of_sites" (int nb_sites) ;
    ]
    @ config
  )
LANORE Vincent's avatar
LANORE Vincent committed
33

Carine Rey's avatar
Carine Rey committed
34
let bppseqgen ?(descr="") ~nb_sites ~tree ~config : nucleotide_fasta workflow =
35 36 37 38 39 40
  let config_f = dest // "config.bpp" in
  let out = dest // "seq.fa" in
  workflow ~descr:("bppsuite.bppseqgen" ^ descr) [
    docker env (
      and_list [
        mkdir_p dest;
Carine Rey's avatar
Carine Rey committed
41
        cmd "cat" ~stdout:config_f [(file_dump (conf_file_bppseqgen ~tree ~out ~nb_sites ~config))];
Carine Rey's avatar
Carine Rey committed
42
        cmd "bppseqgen" [
43 44
          assign "param"  config_f;
        ]
45 46 47
      ]
    )
  ] / selector ["seq.fa"]
Carine Rey's avatar
Carine Rey committed
48

49
let conf_file_bppseqgen_multi_profiles ~tree ~profile_f ~ne_c ~ne_a ~config ~nb_sites_per_profile =
Carine Rey's avatar
Carine Rey committed
50 51 52
  seq ~sep:"\n" (
    [
      assign "input.tree.file" (dep tree) ;
53
      assign "PROFILE_F" (dep profile_f) ;
54
      assign "number_of_sites" (int nb_sites_per_profile) ;
55
      assign "NE_1"            (float ne_a) ;
56 57
      assign "NE_C"            (float ne_c) ;
      assign "NE_T"            (float ne_c) ;
Carine Rey's avatar
Carine Rey committed
58 59 60 61
    ]
    @ config
  )

62
let bppseqgen_multi_profiles_script ~config  ~out ~profile_c =
Carine Rey's avatar
Carine Rey committed
63 64 65
  let vars = [
    "FINAL_OUT", ident out ;
    "PARAM", config ;
66
    "PROFILE_C", dep profile_c ;
Carine Rey's avatar
Carine Rey committed
67 68 69 70
  ]
  in
  bash_script vars {|

71 72
  i=0
  while read -r line
Carine Rey's avatar
Carine Rey committed
73
  do
74 75 76 77 78 79 80 81 82
    echo "i: $i"
    ((i++))
    name="$line"
    COL_M1=`echo $line | cut -f 1 -d " "`
    COL_M2=`echo $line | cut -f 2 -d " "`
    bppseqgen param=$PARAM i=$i COL_M1=$COL_M1 COL_M2=$COL_M2 output.sequence.file=out_int_"$i".fa
  done < "$PROFILE_C"

  cp $PROFILE_C $FINAL_OUT.info
Carine Rey's avatar
Carine Rey committed
83 84 85 86 87 88 89

  # horizontal concatenation of fasta
  catfasta2phyml.pl -f out_int_* > $FINAL_OUT

|}


90
let bppseqgen_multi_profiles ?(descr="") ~profile_f ~profile_c ~nb_sites ~tree ~config ~ne_c ~ne_a : bppseqgen_multi_profiles directory workflow =
Carine Rey's avatar
Carine Rey committed
91
  let nb_sites_per_profile = 1 in
Carine Rey's avatar
Carine Rey committed
92 93 94 95 96 97 98 99 100
  let nb_combis = Pervasives.(nb_sites / nb_sites_per_profile) in
  let config_f = dest // "config.bpp" in
  let out = dest // "seq.fa" in
  workflow ~descr:("bppsuite.bppseqgen" ^ descr) [
    docker env (
      and_list [
        mkdir_p dest;
        mkdir_p tmp;
        cd tmp;
101 102
        cmd "cat" ~stdout:config_f [(file_dump (conf_file_bppseqgen_multi_profiles ~tree ~profile_f ~config ~ne_c ~ne_a ~nb_sites_per_profile))];
        cmd "bash" [(file_dump (bppseqgen_multi_profiles_script ~config:config_f  ~out  ~profile_c))];
Carine Rey's avatar
Carine Rey committed
103 104
      ]
    )
Carine Rey's avatar
Carine Rey committed
105 106 107 108
  ]

let bppseqgen_multi_profiles_get_fa run_bppseqgen_multi_profiles : nucleotide_fasta workflow =
  run_bppseqgen_multi_profiles / selector ["seq.fa"]
Carine Rey's avatar
Carine Rey committed
109

Carine Rey's avatar
Carine Rey committed
110
let bppseqgen_multi_profiles_get_info run_bppseqgen_multi_profiles : text_file workflow =
Carine Rey's avatar
Carine Rey committed
111
  run_bppseqgen_multi_profiles / selector ["seq.fa.info"]
Carine Rey's avatar
Carine Rey committed
112

Carine Rey's avatar
Carine Rey committed
113
let conf_file_bppseqman_fna2faa ~fna =
Carine Rey's avatar
Carine Rey committed
114 115 116 117 118
  seq ~sep:"\n" [
    assign "input.sequence.file" (dep fna) ;
    assign "output.sequence.file" dest ;
    string {|alphabet=Codon(letter=DNA)
             genetic_code = Standard
119 120
             input.sequence.remove_stop_codons = no
             input.sequence.sites_to_use = all
Carine Rey's avatar
Carine Rey committed
121 122 123 124 125 126 127 128
             input.alignment = true
             sequence.manip = Translate
           |}
  ]

let fna2faa ~(fna:nucleotide_fasta workflow) : aminoacid_fasta workflow =
  workflow ~descr:"bppsuite.fna2faa" [
    cmd "bppseqman" ~env [
Carine Rey's avatar
Carine Rey committed
129 130 131 132 133 134 135 136
      assign "param" (file_dump (conf_file_bppseqman_fna2faa ~fna)) ;
    ]
  ]

let conf_file_bppseqman_fa2phy ~fna =
  seq ~sep:"\n" [
    assign "input.sequence.file" (dep fna) ;
    assign "output.sequence.file" dest ;
137
    assign "output.sequence.format" (string "Phylip(order=interleaved, type=extended)") ;
Carine Rey's avatar
Carine Rey committed
138
    string {| input.alignment = true
139 140 141
              input.sequence.remove_stop_codons = no
              input.sequence.sites_to_use = all
              sequence.manip =
Carine Rey's avatar
Carine Rey committed
142 143 144 145
           |}
  ]

let fa2phy ~(fna: nucleotide_fasta workflow) : nucleotide_phylip workflow =
146
  workflow ~descr:"bppsuite.fa2phy_interleaved" [
Carine Rey's avatar
Carine Rey committed
147 148
    cmd "bppseqman" ~env [
      assign "param" (file_dump (conf_file_bppseqman_fa2phy ~fna)) ;
LANORE Vincent's avatar
LANORE Vincent committed
149 150
    ]
  ]
Carine Rey's avatar
Carine Rey committed
151 152 153 154 155


let paste_fna  ~(fna_l: nucleotide_fasta workflow list) : nucleotide_fasta workflow =
  workflow ~descr:"bppsuite.catfasta" [
    cmd "catfasta2phyml.pl" ~stdout:dest ~env (List.concat [
156 157 158
        [string "-f" ] ;
        List.map fna_l ~f:(fun fna -> dep fna) ;
      ])
Carine Rey's avatar
Carine Rey committed
159
  ]