bppsuite.ml 4.77 KB
Newer Older
LANORE Vincent's avatar
LANORE Vincent committed
1
open Core
2 3 4
open Bistro.Std
open Bistro.EDSL
open Bistro_bioinfo.Std
Carine Rey's avatar
Carine Rey committed
5
open File_formats
LANORE Vincent's avatar
LANORE Vincent committed
6

Carine Rey's avatar
Carine Rey committed
7
type bppseqgen_multi_profiles
Carine Rey's avatar
Carine Rey committed
8

9
let env = docker_image ~account:"carinerey" ~name:"bppsuite" ~tag:"07192018" ()
LANORE Vincent's avatar
LANORE Vincent committed
10 11 12 13

let assign k v =
  seq ~sep:"=" [ string k ; v ]

Carine Rey's avatar
Carine Rey committed
14 15 16 17 18 19 20 21 22 23 24
let bash_script args code =
  let prelude =
    args
    |> List.map ~f:(fun (k, v) ->
        assign k v
      )
    |> seq ~sep:"\n"
  in
  seq ~sep:"\n" [ prelude ; string code ]

let conf_file_bppseqgen ~tree ~out ~nb_sites ~config =
25 26 27
  seq ~sep:"\n" (
    [
      assign "input.tree.file" (dep tree) ;
28
      assign "output.sequence.file" out ;
29 30 31 32
      assign "number_of_sites" (int nb_sites) ;
    ]
    @ config
  )
LANORE Vincent's avatar
LANORE Vincent committed
33

Carine Rey's avatar
Carine Rey committed
34
let bppseqgen ?(descr="") ~nb_sites ~tree ~config : nucleotide_fasta workflow =
35 36 37 38 39 40
  let config_f = dest // "config.bpp" in
  let out = dest // "seq.fa" in
  workflow ~descr:("bppsuite.bppseqgen" ^ descr) [
    docker env (
      and_list [
        mkdir_p dest;
Carine Rey's avatar
Carine Rey committed
41
        cmd "cat" ~stdout:config_f [(file_dump (conf_file_bppseqgen ~tree ~out ~nb_sites ~config))];
Carine Rey's avatar
Carine Rey committed
42
        cmd "bppseqgen" [
43 44
          assign "param"  config_f;
        ]
45 46 47
      ]
    )
  ] / selector ["seq.fa"]
Carine Rey's avatar
Carine Rey committed
48

49
let conf_file_bppseqgen_multi_profiles ~tree ~profile_w ~ne_c ~ne_a ~config ~nb_sites_per_profile =
Carine Rey's avatar
Carine Rey committed
50 51 52
  seq ~sep:"\n" (
    [
      assign "input.tree.file" (dep tree) ;
53
      assign "PROFILE_F" (dep profile_w) ;
54
      assign "number_of_sites" (int nb_sites_per_profile) ;
55
      assign "NE_1"            (float ne_a) ;
56 57
      assign "NE_C"            (float ne_c) ;
      assign "NE_T"            (float ne_c) ;
Carine Rey's avatar
Carine Rey committed
58 59 60 61
    ]
    @ config
  )

62
let bppseqgen_multi_profiles_script ~config ~nb_combis  ~out ~profile_w =
Carine Rey's avatar
Carine Rey committed
63 64 65
  let vars = [
    "FINAL_OUT", ident out ;
    "PARAM", config ;
66
    "PROFILE_F", dep profile_w ;
Carine Rey's avatar
Carine Rey committed
67 68 69 70 71 72 73
    "NB_COMBI_PROFILES", int nb_combis ;
  ]
  in
  bash_script vars {|

  start_i=1
  end_i=$NB_COMBI_PROFILES
74
  NB_COLS=`head -n 1 $PROFILE_F | awk '{print NF}'`
75

76
  rm -f $FINAL_OUT.info
Carine Rey's avatar
Carine Rey committed
77

Carine Rey's avatar
Carine Rey committed
78 79 80 81 82 83
  for ((i=start_i; i<=end_i; i++))
  do
   echo "i: $i"
   shuf -i1-$NB_COLS -n2 -o random_num
   COL_M1=`head -n 1 random_num`
   COL_M2=`tail -n 1 random_num`
Carine Rey's avatar
Carine Rey committed
84
   echo -e $COL_M1'\t'$COL_M2 >> $FINAL_OUT.info
85
   bppseqgen param=$PARAM i=$i COL_M1=$COL_M1 COL_M2=$COL_M2 output.sequence.file=out_int_"$i".fa
Carine Rey's avatar
Carine Rey committed
86 87 88 89 90 91 92 93
  done

  # horizontal concatenation of fasta
  catfasta2phyml.pl -f out_int_* > $FINAL_OUT

|}


94
let bppseqgen_multi_profiles ?(descr="") ~profile_w ~nb_sites ~tree ~config ~ne_c ~ne_a : bppseqgen_multi_profiles directory workflow =
Carine Rey's avatar
Carine Rey committed
95
  let nb_sites_per_profile = 1 in
Carine Rey's avatar
Carine Rey committed
96 97 98 99 100 101 102 103 104
  let nb_combis = Pervasives.(nb_sites / nb_sites_per_profile) in
  let config_f = dest // "config.bpp" in
  let out = dest // "seq.fa" in
  workflow ~descr:("bppsuite.bppseqgen" ^ descr) [
    docker env (
      and_list [
        mkdir_p dest;
        mkdir_p tmp;
        cd tmp;
105 106
        cmd "cat" ~stdout:config_f [(file_dump (conf_file_bppseqgen_multi_profiles ~tree ~profile_w ~config ~ne_c ~ne_a ~nb_sites_per_profile))];
        cmd "bash" [(file_dump (bppseqgen_multi_profiles_script ~config:config_f ~nb_combis ~out ~profile_w))];
Carine Rey's avatar
Carine Rey committed
107 108
      ]
    )
Carine Rey's avatar
Carine Rey committed
109 110 111 112
  ]

let bppseqgen_multi_profiles_get_fa run_bppseqgen_multi_profiles : nucleotide_fasta workflow =
  run_bppseqgen_multi_profiles / selector ["seq.fa"]
Carine Rey's avatar
Carine Rey committed
113

Carine Rey's avatar
Carine Rey committed
114
let bppseqgen_multi_profiles_get_info run_bppseqgen_multi_profiles : text_file workflow =
Carine Rey's avatar
Carine Rey committed
115
  run_bppseqgen_multi_profiles / selector ["seq.fa.info"]
Carine Rey's avatar
Carine Rey committed
116

Carine Rey's avatar
Carine Rey committed
117
let conf_file_bppseqman_fna2faa ~fna =
Carine Rey's avatar
Carine Rey committed
118 119 120 121 122
  seq ~sep:"\n" [
    assign "input.sequence.file" (dep fna) ;
    assign "output.sequence.file" dest ;
    string {|alphabet=Codon(letter=DNA)
             genetic_code = Standard
123 124
             input.sequence.remove_stop_codons = no
             input.sequence.sites_to_use = all
Carine Rey's avatar
Carine Rey committed
125 126 127 128 129 130 131 132
             input.alignment = true
             sequence.manip = Translate
           |}
  ]

let fna2faa ~(fna:nucleotide_fasta workflow) : aminoacid_fasta workflow =
  workflow ~descr:"bppsuite.fna2faa" [
    cmd "bppseqman" ~env [
Carine Rey's avatar
Carine Rey committed
133 134 135 136 137 138 139 140 141 142
      assign "param" (file_dump (conf_file_bppseqman_fna2faa ~fna)) ;
    ]
  ]

let conf_file_bppseqman_fa2phy ~fna =
  seq ~sep:"\n" [
    assign "input.sequence.file" (dep fna) ;
    assign "output.sequence.file" dest ;
    assign "output.sequence.format" (string "Phylip") ;
    string {| input.alignment = true
143 144 145
              input.sequence.remove_stop_codons = no
              input.sequence.sites_to_use = all
              sequence.manip =
Carine Rey's avatar
Carine Rey committed
146 147 148 149 150 151 152
           |}
  ]

let fa2phy ~(fna: nucleotide_fasta workflow) : nucleotide_phylip workflow =
  workflow ~descr:"bppsuite.fa2phy" [
    cmd "bppseqman" ~env [
      assign "param" (file_dump (conf_file_bppseqman_fa2phy ~fna)) ;
LANORE Vincent's avatar
LANORE Vincent committed
153 154
    ]
  ]
Carine Rey's avatar
Carine Rey committed
155 156 157 158 159


let paste_fna  ~(fna_l: nucleotide_fasta workflow list) : nucleotide_fasta workflow =
  workflow ~descr:"bppsuite.catfasta" [
    cmd "catfasta2phyml.pl" ~stdout:dest ~env (List.concat [
160 161 162
        [string "-f" ] ;
        List.map fna_l ~f:(fun fna -> dep fna) ;
      ])
Carine Rey's avatar
Carine Rey committed
163
  ]