diffsel.ml 4.89 KB
Newer Older
Philippe Veber's avatar
Philippe Veber committed
1 2 3 4
open Core_kernel
open Bistro.Std
open Bistro.EDSL
open File_formats
5
open Utils
6

Carine Rey's avatar
Carine Rey committed
7
let diffsel_add_iterations_script ~chainname ~ali ~tree ~seed =
8 9
  let vars = [
    "CHAIN", chainname ;
10 11
    "ALI", ali ;
    "TREE", tree ;
Carine Rey's avatar
Carine Rey committed
12
    "SEED", int seed ;
13 14 15 16 17 18 19 20
  ]
  in
  bash_script vars {|

#!/bin/bash
set -e

continue=true
Carine Rey's avatar
Carine Rey committed
21
i=0
22

23 24
#while $continue 
#do
Carine Rey's avatar
Carine Rey committed
25 26
i=$((i+1))
echo i=$i
27
ls
28
# check convergence
29
Rscript -e "rmarkdown::render(\"DiffselMCMCConvergenceAnalysis.Rmd\", params=list(set_trace1=\"../tmp/myrun_tmp.trace\"),output_file=\"../dest/output_"$i".html\")"
30 31 32 33 34 35 36 37
new_max=`tail -n 1 new_iterations.txt | cut -f 1`
continue=`tail -n 1 new_iterations.txt | cut -f 2`
end_it=`tail -n 1 new_iterations.txt | cut -f 3`

echo new_max=$new_max
echo continue=$continue
echo end_it=$end_it

38 39 40 41 42 43 44 45 46
#if $continue
#then
#    echo "diffsel $CHAIN $new_max"
#    /diffsel/_build/diffsel $CHAIN $new_max
#fi
#
#done
#
#cat new_iterations.txt > $CHAIN.iterations
47

Carine Rey's avatar
Carine Rey committed
48
/diffsel/_build/diffsel -t $TREE -d $ALI -ncond 2 -x 1 $end_it -seed $SEED $CHAIN 
49 50 51 52

|}


Carine Rey's avatar
Carine Rey committed
53
let diffsel ~(phy_n:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:int) ~(n_cycles: int) ~(id: int) ?(descr = "") ?seed () : [`diffsel] directory workflow =
54
  let env = Env.env_diffsel in
Philippe Veber's avatar
Philippe Veber committed
55 56 57 58
  let tmp_tree = tmp // "myrun.tree" in
  let tmp_ali = tmp // "myrun.ali" in
  let dest_tree = dest // "myrun.tree" in
  let dest_ali = dest // "myrun.ali" in
59
  let chainname_tmp = tmp // "myrun_tmp" in
Philippe Veber's avatar
Philippe Veber committed
60
  let chainname = dest // "myrun" in
Carine Rey's avatar
Carine Rey committed
61 62 63 64
  let seed = match seed with
    | Some s -> s
    | None -> (Random.int Int.max_value)
  in
65
  let n_cycles = if (n_cycles > 200) then 20 else n_cycles in
66
  let script_r = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
Philippe Veber's avatar
Philippe Veber committed
67
  (*_build/diffsel -t data/samhd1.tree -d data/samhd1.ali -ncond 3 -x 1 10000 myrun*)
Carine Rey's avatar
Carine Rey committed
68
  workflow ~descr:("convergence_detection.run_diffsel." ^(string_of_int id) ^ "." ^ descr)  [
Philippe Veber's avatar
Philippe Veber committed
69 70 71
    docker env (
      and_list [
        mkdir_p dest;
72
        cd tmp;
Carine Rey's avatar
Carine Rey committed
73
        cmd "echo" [string "Run chain:"; int id];
74
        cmd "cp" [ file_dump (string Scripts.diffselMCMCConvergenceAnalysis) ; script_r] ;
Philippe Veber's avatar
Philippe Veber committed
75 76 77 78 79
        cmd "cp" [dep phy_n; dest_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep tree; dest_tree]; (* required dep to link the file in the env *)
        cmd "cp" [dep phy_n; tmp_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep tree; tmp_tree]; (* required dep to link the file in the env *)
        cmd "/diffsel/_build/diffsel" [
80 81 82 83
          opt "-t" ident tmp_tree;
          opt "-d" ident tmp_ali ;
          opt "-ncond"  int 2 ;
          opt "-x" seq [ int w_every; string " "; int n_cycles];
Carine Rey's avatar
Carine Rey committed
84
          opt "-seed" int seed ;
85
          ident chainname_tmp ;
Philippe Veber's avatar
Philippe Veber committed
86
        ];
Carine Rey's avatar
Carine Rey committed
87
        cmd "bash" [(file_dump (diffsel_add_iterations_script ~chainname ~ali:tmp_ali ~tree:tmp_tree ~seed))];
Philippe Veber's avatar
Philippe Veber committed
88 89 90 91
      ]
    )
  ]

92
let check_conv run_diffsel : text_file directory workflow =
93
  let env = Env.env_r in
94 95 96
  let script = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
  let trace = run_diffsel / selector["myrun.trace"] in
  let out = dest // "out.html" in
97
  let nb_new_iterations = dest // "new_iterations.txt" in
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
  workflow ~descr:"convergence_detection.DiffselMCMCConvergenceAnalysis" [
    docker env (
      and_list [
        mkdir_p tmp ;
        mkdir_p dest ;
        cd tmp ;
        cmd "cp" [ file_dump (string Scripts.diffselMCMCConvergenceAnalysis) ; script] ;
        cmd "Rscript" [
          string "-e" ;
          string {|"rmarkdown::render(\"DiffselMCMCConvergenceAnalysis.Rmd\",|} ;
          string {|params=list(set_trace1=\"|} ;
          dep trace ;
          string {|\"))"|};
        ] ;
        cmd "cp" [string "DiffselMCMCConvergenceAnalysis.html" ; ident out] ;
        cmd "cp" [string "new_iterations.txt" ; ident nb_new_iterations]
114
      ]
115 116 117
    )
  ]

Philippe Veber's avatar
Philippe Veber committed
118
let selector run_diffsel : text_file workflow =
119
  let env = Env.env_diffsel in
Philippe Veber's avatar
Philippe Veber committed
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
  let package = tmp // "diffsel_script_utils.py" in
  let script = tmp // "diffsel_analyze_result.py" in
  let tmp_tree = tmp // "myrun.tree" in
  let tmp_ali = tmp // "myrun.ali" in
  let dep_tree = (dep run_diffsel) // "myrun.tree" in
  let dep_ali = (dep run_diffsel) // "myrun.ali" in
  let chainname = (dep run_diffsel) // "myrun" in
  let out = dest in
  workflow ~descr:"convergence_detection.parse_diffsel" [
    docker env (
      and_list [
        mkdir_p tmp ;
        cd tmp ;

        cmd "cp" [dep_ali; tmp_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep_tree; tmp_tree]; (* required dep to link the file in the env *)

        (*python diffsel_analyze_result.py [-r /path/to/readdiffsel] [-o output_file] chainname *)
        cmd "cp"  [ file_dump (string Scripts.diffsel_script_utils) ; package] ;
        cmd "cp" [ file_dump (string Scripts.diffsel_analyze_result) ; script] ;

        cmd "python" [
          string "diffsel_analyze_result.py" ;
          opt "-r" string "/diffsel/_build/readdiffsel" ;
          opt "-o" ident out ;
          ident chainname ;
        ]
      ]
    )
  ]