diffsel.ml 5.11 KB
Newer Older
Philippe Veber's avatar
Philippe Veber committed
1 2 3 4 5
open Core_kernel
open Bistro.Std
open Bistro.EDSL
open File_formats

6 7 8 9 10 11 12 13 14 15 16 17 18 19
let assign k v =
  seq ~sep:"=" [ string k ; v ]

let bash_script args code =
  let prelude =
    args
    |> List.map ~f:(fun (k, v) ->
        assign k v
      )
    |> seq ~sep:"\n"
  in
  seq ~sep:"\n" [ prelude ; string code ]


Carine Rey's avatar
Carine Rey committed
20
let diffsel_add_iterations_script ~chainname ~ali ~tree ~seed =
21 22
  let vars = [
    "CHAIN", chainname ;
23 24
    "ALI", ali ;
    "TREE", tree ;
Carine Rey's avatar
Carine Rey committed
25
    "SEED", int seed ;
26 27 28 29 30 31 32 33
  ]
  in
  bash_script vars {|

#!/bin/bash
set -e

continue=true
Carine Rey's avatar
Carine Rey committed
34
i=0
35

36 37
#while $continue 
#do
Carine Rey's avatar
Carine Rey committed
38 39
i=$((i+1))
echo i=$i
40
ls
41
# check convergence
42
Rscript -e "rmarkdown::render(\"DiffselMCMCConvergenceAnalysis.Rmd\", params=list(set_trace1=\"../tmp/myrun_tmp.trace\"),output_file=\"../dest/output_"$i".html\")"
43 44 45 46 47 48 49 50
new_max=`tail -n 1 new_iterations.txt | cut -f 1`
continue=`tail -n 1 new_iterations.txt | cut -f 2`
end_it=`tail -n 1 new_iterations.txt | cut -f 3`

echo new_max=$new_max
echo continue=$continue
echo end_it=$end_it

51 52 53 54 55 56 57 58 59
#if $continue
#then
#    echo "diffsel $CHAIN $new_max"
#    /diffsel/_build/diffsel $CHAIN $new_max
#fi
#
#done
#
#cat new_iterations.txt > $CHAIN.iterations
60

Carine Rey's avatar
Carine Rey committed
61
/diffsel/_build/diffsel -t $TREE -d $ALI -ncond 2 -x 1 $end_it -seed $SEED $CHAIN 
62 63 64 65

|}


Carine Rey's avatar
Carine Rey committed
66
let diffsel ~(phy_n:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:int) ~(n_cycles: int) ~(id: int) ?(descr = "") ?seed () : [`diffsel] directory workflow =
67
  let env = Env.env_diffsel in
Philippe Veber's avatar
Philippe Veber committed
68 69 70 71
  let tmp_tree = tmp // "myrun.tree" in
  let tmp_ali = tmp // "myrun.ali" in
  let dest_tree = dest // "myrun.tree" in
  let dest_ali = dest // "myrun.ali" in
72
  let chainname_tmp = tmp // "myrun_tmp" in
Philippe Veber's avatar
Philippe Veber committed
73
  let chainname = dest // "myrun" in
Carine Rey's avatar
Carine Rey committed
74 75 76 77
  let seed = match seed with
    | Some s -> s
    | None -> (Random.int Int.max_value)
  in
78
  let n_cycles = if (n_cycles > 200) then 20 else n_cycles in
79
  let script_r = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
Philippe Veber's avatar
Philippe Veber committed
80
  (*_build/diffsel -t data/samhd1.tree -d data/samhd1.ali -ncond 3 -x 1 10000 myrun*)
Carine Rey's avatar
Carine Rey committed
81
  workflow ~descr:("convergence_detection.run_diffsel." ^(string_of_int id) ^ "." ^ descr)  [
Philippe Veber's avatar
Philippe Veber committed
82 83 84
    docker env (
      and_list [
        mkdir_p dest;
85
        cd tmp;
Carine Rey's avatar
Carine Rey committed
86
        cmd "echo" [string "Run chain:"; int id];
87
        cmd "cp" [ file_dump (string Scripts.diffselMCMCConvergenceAnalysis) ; script_r] ;
Philippe Veber's avatar
Philippe Veber committed
88 89 90 91 92
        cmd "cp" [dep phy_n; dest_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep tree; dest_tree]; (* required dep to link the file in the env *)
        cmd "cp" [dep phy_n; tmp_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep tree; tmp_tree]; (* required dep to link the file in the env *)
        cmd "/diffsel/_build/diffsel" [
93 94 95 96
          opt "-t" ident tmp_tree;
          opt "-d" ident tmp_ali ;
          opt "-ncond"  int 2 ;
          opt "-x" seq [ int w_every; string " "; int n_cycles];
Carine Rey's avatar
Carine Rey committed
97
          opt "-seed" int seed ;
98
          ident chainname_tmp ;
Philippe Veber's avatar
Philippe Veber committed
99
        ];
Carine Rey's avatar
Carine Rey committed
100
        cmd "bash" [(file_dump (diffsel_add_iterations_script ~chainname ~ali:tmp_ali ~tree:tmp_tree ~seed))];
Philippe Veber's avatar
Philippe Veber committed
101 102 103 104
      ]
    )
  ]

105
let check_conv run_diffsel : text_file directory workflow =
106
  let env = Env.env_r in
107 108 109
  let script = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
  let trace = run_diffsel / selector["myrun.trace"] in
  let out = dest // "out.html" in
110
  let nb_new_iterations = dest // "new_iterations.txt" in
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
  workflow ~descr:"convergence_detection.DiffselMCMCConvergenceAnalysis" [
    docker env (
      and_list [
        mkdir_p tmp ;
        mkdir_p dest ;
        cd tmp ;
        cmd "cp" [ file_dump (string Scripts.diffselMCMCConvergenceAnalysis) ; script] ;
        cmd "Rscript" [
          string "-e" ;
          string {|"rmarkdown::render(\"DiffselMCMCConvergenceAnalysis.Rmd\",|} ;
          string {|params=list(set_trace1=\"|} ;
          dep trace ;
          string {|\"))"|};
        ] ;
        cmd "cp" [string "DiffselMCMCConvergenceAnalysis.html" ; ident out] ;
        cmd "cp" [string "new_iterations.txt" ; ident nb_new_iterations]
127
      ]
128 129 130
    )
  ]

Philippe Veber's avatar
Philippe Veber committed
131
let selector run_diffsel : text_file workflow =
132
  let env = Env.env_diffsel in
Philippe Veber's avatar
Philippe Veber committed
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
  let package = tmp // "diffsel_script_utils.py" in
  let script = tmp // "diffsel_analyze_result.py" in
  let tmp_tree = tmp // "myrun.tree" in
  let tmp_ali = tmp // "myrun.ali" in
  let dep_tree = (dep run_diffsel) // "myrun.tree" in
  let dep_ali = (dep run_diffsel) // "myrun.ali" in
  let chainname = (dep run_diffsel) // "myrun" in
  let out = dest in
  workflow ~descr:"convergence_detection.parse_diffsel" [
    docker env (
      and_list [
        mkdir_p tmp ;
        cd tmp ;

        cmd "cp" [dep_ali; tmp_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep_tree; tmp_tree]; (* required dep to link the file in the env *)

        (*python diffsel_analyze_result.py [-r /path/to/readdiffsel] [-o output_file] chainname *)
        cmd "cp"  [ file_dump (string Scripts.diffsel_script_utils) ; package] ;
        cmd "cp" [ file_dump (string Scripts.diffsel_analyze_result) ; script] ;

        cmd "python" [
          string "diffsel_analyze_result.py" ;
          opt "-r" string "/diffsel/_build/readdiffsel" ;
          opt "-o" ident out ;
          ident chainname ;
        ]
      ]
    )
  ]