diffsel.ml 4.95 KB
Newer Older
Philippe Veber's avatar
Philippe Veber committed
1 2 3 4 5
open Core_kernel
open Bistro.Std
open Bistro.EDSL
open File_formats

6 7 8 9 10 11 12 13 14 15 16 17 18 19
let assign k v =
  seq ~sep:"=" [ string k ; v ]

let bash_script args code =
  let prelude =
    args
    |> List.map ~f:(fun (k, v) ->
        assign k v
      )
    |> seq ~sep:"\n"
  in
  seq ~sep:"\n" [ prelude ; string code ]


20
let diffsel_add_iterations_script ~chainname ~ali ~tree =
21 22
  let vars = [
    "CHAIN", chainname ;
23 24
    "ALI", ali ;
    "TREE", tree ;
25 26 27 28 29 30 31 32
  ]
  in
  bash_script vars {|

#!/bin/bash
set -e

continue=true
Carine Rey's avatar
Carine Rey committed
33
i=0
34

35 36
#while $continue 
#do
Carine Rey's avatar
Carine Rey committed
37 38
i=$((i+1))
echo i=$i
39
ls
40
# check convergence
41
Rscript -e "rmarkdown::render(\"DiffselMCMCConvergenceAnalysis.Rmd\", params=list(set_trace1=\"../tmp/myrun_tmp.trace\"),output_file=\"../dest/output_"$i".html\")"
42 43 44 45 46 47 48 49
new_max=`tail -n 1 new_iterations.txt | cut -f 1`
continue=`tail -n 1 new_iterations.txt | cut -f 2`
end_it=`tail -n 1 new_iterations.txt | cut -f 3`

echo new_max=$new_max
echo continue=$continue
echo end_it=$end_it

50 51 52 53 54 55 56 57 58
#if $continue
#then
#    echo "diffsel $CHAIN $new_max"
#    /diffsel/_build/diffsel $CHAIN $new_max
#fi
#
#done
#
#cat new_iterations.txt > $CHAIN.iterations
59

60
/diffsel/_build/diffsel -t $TREE -d $ALI -ncond 2 -x 1 $end_it $CHAIN
61 62 63 64

|}


65 66
let diffsel ~(phy_n:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:int) ~(n_cycles: int) ~(id: int) ?seed () : [`diffsel] directory workflow =
  let env = Env.env_diffsel in
Philippe Veber's avatar
Philippe Veber committed
67 68 69 70
  let tmp_tree = tmp // "myrun.tree" in
  let tmp_ali = tmp // "myrun.ali" in
  let dest_tree = dest // "myrun.tree" in
  let dest_ali = dest // "myrun.ali" in
71
  let chainname_tmp = tmp // "myrun_tmp" in
Philippe Veber's avatar
Philippe Veber committed
72
  let chainname = dest // "myrun" in
73
  let n_cycles = if (n_cycles > 200) then 20 else n_cycles in
74
  let script_r = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
Philippe Veber's avatar
Philippe Veber committed
75
  (*_build/diffsel -t data/samhd1.tree -d data/samhd1.ali -ncond 3 -x 1 10000 myrun*)
76
  workflow ~descr:("convergence_detection.run_diffsel." ^(string_of_int id))  [
Philippe Veber's avatar
Philippe Veber committed
77 78 79
    docker env (
      and_list [
        mkdir_p dest;
80
        cd tmp;
Carine Rey's avatar
Carine Rey committed
81
        cmd "echo" [string "Run chain:"; int id];
82
        cmd "cp" [ file_dump (string Scripts.diffselMCMCConvergenceAnalysis) ; script_r] ;
Philippe Veber's avatar
Philippe Veber committed
83 84 85 86 87
        cmd "cp" [dep phy_n; dest_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep tree; dest_tree]; (* required dep to link the file in the env *)
        cmd "cp" [dep phy_n; tmp_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep tree; tmp_tree]; (* required dep to link the file in the env *)
        cmd "/diffsel/_build/diffsel" [
88 89 90 91
          opt "-t" ident tmp_tree;
          opt "-d" ident tmp_ali ;
          opt "-ncond"  int 2 ;
          opt "-x" seq [ int w_every; string " "; int n_cycles];
Philippe Veber's avatar
Philippe Veber committed
92
          option (opt "-seed" int) seed ;
93
          ident chainname_tmp ;
Philippe Veber's avatar
Philippe Veber committed
94
        ];
95
        cmd "bash" [(file_dump (diffsel_add_iterations_script ~chainname ~ali:tmp_ali ~tree:tmp_tree ))];
Philippe Veber's avatar
Philippe Veber committed
96 97 98 99
      ]
    )
  ]

100
let check_conv run_diffsel : text_file directory workflow =
101
  let env = Env.env_r in
102 103 104
  let script = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
  let trace = run_diffsel / selector["myrun.trace"] in
  let out = dest // "out.html" in
105
  let nb_new_iterations = dest // "new_iterations.txt" in
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
  workflow ~descr:"convergence_detection.DiffselMCMCConvergenceAnalysis" [
    docker env (
      and_list [
        mkdir_p tmp ;
        mkdir_p dest ;
        cd tmp ;
        cmd "cp" [ file_dump (string Scripts.diffselMCMCConvergenceAnalysis) ; script] ;
        cmd "Rscript" [
          string "-e" ;
          string {|"rmarkdown::render(\"DiffselMCMCConvergenceAnalysis.Rmd\",|} ;
          string {|params=list(set_trace1=\"|} ;
          dep trace ;
          string {|\"))"|};
        ] ;
        cmd "cp" [string "DiffselMCMCConvergenceAnalysis.html" ; ident out] ;
        cmd "cp" [string "new_iterations.txt" ; ident nb_new_iterations]
122
      ]
123 124 125
    )
  ]

Philippe Veber's avatar
Philippe Veber committed
126
let selector run_diffsel : text_file workflow =
127
  let env = Env.env_diffsel in
Philippe Veber's avatar
Philippe Veber committed
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
  let package = tmp // "diffsel_script_utils.py" in
  let script = tmp // "diffsel_analyze_result.py" in
  let tmp_tree = tmp // "myrun.tree" in
  let tmp_ali = tmp // "myrun.ali" in
  let dep_tree = (dep run_diffsel) // "myrun.tree" in
  let dep_ali = (dep run_diffsel) // "myrun.ali" in
  let chainname = (dep run_diffsel) // "myrun" in
  let out = dest in
  workflow ~descr:"convergence_detection.parse_diffsel" [
    docker env (
      and_list [
        mkdir_p tmp ;
        cd tmp ;

        cmd "cp" [dep_ali; tmp_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep_tree; tmp_tree]; (* required dep to link the file in the env *)

        (*python diffsel_analyze_result.py [-r /path/to/readdiffsel] [-o output_file] chainname *)
        cmd "cp"  [ file_dump (string Scripts.diffsel_script_utils) ; package] ;
        cmd "cp" [ file_dump (string Scripts.diffsel_analyze_result) ; script] ;

        cmd "python" [
          string "diffsel_analyze_result.py" ;
          opt "-r" string "/diffsel/_build/readdiffsel" ;
          opt "-o" ident out ;
          ident chainname ;
        ]
      ]
    )
  ]