diffsel.ml 4.8 KB
Newer Older
Philippe Veber's avatar
Philippe Veber committed
1 2 3 4 5
open Core_kernel
open Bistro.Std
open Bistro.EDSL
open File_formats

6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
let assign k v =
  seq ~sep:"=" [ string k ; v ]

let bash_script args code =
  let prelude =
    args
    |> List.map ~f:(fun (k, v) ->
        assign k v
      )
    |> seq ~sep:"\n"
  in
  seq ~sep:"\n" [ prelude ; string code ]


let diffsel_add_iterations_script ~chainname =
  let vars = [
    "CHAIN", chainname ;
  ]
  in
  bash_script vars {|

#!/bin/bash
set -e

continue=true

while $continue 
do

# check convergence
Rscript -e "rmarkdown::render(\"DiffselMCMCConvergenceAnalysis.Rmd\", params=list(set_trace1=\"../dest/myrun.trace\"))"
new_max=`tail -n 1 new_iterations.txt | cut -f 1`
continue=`tail -n 1 new_iterations.txt | cut -f 2`
end_it=`tail -n 1 new_iterations.txt | cut -f 3`

echo new_max=$new_max
echo continue=$continue
echo end_it=$end_it

if $continue
then
    echo "diffsel $CHAIN $new_max"
    /diffsel/_build/diffsel $CHAIN $new_max
fi

done

cat new_iterations.txt > $CHAIN.iterations

|}


58 59
let diffsel ~(phy_n:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:int) ~(n_cycles: int) ~(id: int) ~tag : [`diffsel] directory workflow =
  let env = docker_image ~account:"vlanore" ~name:"diffsel" ~tag () in
Philippe Veber's avatar
Philippe Veber committed
60 61 62 63 64
  let tmp_tree = tmp // "myrun.tree" in
  let tmp_ali = tmp // "myrun.ali" in
  let dest_tree = dest // "myrun.tree" in
  let dest_ali = dest // "myrun.ali" in
  let chainname = dest // "myrun" in
65 66
  let n_cycles = if (n_cycles > 200) then 200 else n_cycles in
  let script_r = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
Philippe Veber's avatar
Philippe Veber committed
67
  (*_build/diffsel -t data/samhd1.tree -d data/samhd1.ali -ncond 3 -x 1 10000 myrun*)
68
  workflow ~descr:("convergence_detection.run_diffsel." ^ tag ^ "." ^(string_of_int id))  [
Philippe Veber's avatar
Philippe Veber committed
69 70 71
    docker env (
      and_list [
        mkdir_p dest;
72
        cd tmp;
Carine Rey's avatar
Carine Rey committed
73
        cmd "echo" [string "Run chain:"; int id];
74
        cmd "cp" [ file_dump (string Scripts.diffselMCMCConvergenceAnalysis) ; script_r] ;
Philippe Veber's avatar
Philippe Veber committed
75 76 77 78 79
        cmd "cp" [dep phy_n; dest_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep tree; dest_tree]; (* required dep to link the file in the env *)
        cmd "cp" [dep phy_n; tmp_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep tree; tmp_tree]; (* required dep to link the file in the env *)
        cmd "/diffsel/_build/diffsel" [
80 81 82 83 84
          opt "-t" ident tmp_tree;
          opt "-d" ident tmp_ali ;
          opt "-ncond"  int 2 ;
          opt "-x" seq [ int w_every; string " "; int n_cycles];
          ident chainname ;
Philippe Veber's avatar
Philippe Veber committed
85
        ];
86
       cmd "bash" [(file_dump (diffsel_add_iterations_script ~chainname ))];
Philippe Veber's avatar
Philippe Veber committed
87 88 89 90
      ]
    )
  ]

91
let check_conv run_diffsel : text_file directory workflow =
92
  let env = docker_image ~account:"carinerey" ~name:"r_basics" ~tag:"07232018" () in
93 94 95
  let script = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
  let trace = run_diffsel / selector["myrun.trace"] in
  let out = dest // "out.html" in
96
  let nb_new_iterations = dest // "new_iterations.txt" in
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
  workflow ~descr:"convergence_detection.DiffselMCMCConvergenceAnalysis" [
    docker env (
      and_list [
        mkdir_p tmp ;
        mkdir_p dest ;
        cd tmp ;
        cmd "cp" [ file_dump (string Scripts.diffselMCMCConvergenceAnalysis) ; script] ;
        cmd "Rscript" [
          string "-e" ;
          string {|"rmarkdown::render(\"DiffselMCMCConvergenceAnalysis.Rmd\",|} ;
          string {|params=list(set_trace1=\"|} ;
          dep trace ;
          string {|\"))"|};
        ] ;
        cmd "cp" [string "DiffselMCMCConvergenceAnalysis.html" ; ident out] ;
        cmd "cp" [string "new_iterations.txt" ; ident nb_new_iterations]
        ]
    )
  ]

Philippe Veber's avatar
Philippe Veber committed
117
let selector run_diffsel : text_file workflow =
118
  let env = docker_image ~account:"vlanore" ~name:"diffsel" ~tag:"master_2304" () in
Philippe Veber's avatar
Philippe Veber committed
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
  let package = tmp // "diffsel_script_utils.py" in
  let script = tmp // "diffsel_analyze_result.py" in
  let tmp_tree = tmp // "myrun.tree" in
  let tmp_ali = tmp // "myrun.ali" in
  let dep_tree = (dep run_diffsel) // "myrun.tree" in
  let dep_ali = (dep run_diffsel) // "myrun.ali" in
  let chainname = (dep run_diffsel) // "myrun" in
  let out = dest in
  workflow ~descr:"convergence_detection.parse_diffsel" [
    docker env (
      and_list [
        mkdir_p tmp ;
        cd tmp ;

        cmd "cp" [dep_ali; tmp_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep_tree; tmp_tree]; (* required dep to link the file in the env *)

        (*python diffsel_analyze_result.py [-r /path/to/readdiffsel] [-o output_file] chainname *)
        cmd "cp"  [ file_dump (string Scripts.diffsel_script_utils) ; package] ;
        cmd "cp" [ file_dump (string Scripts.diffsel_analyze_result) ; script] ;

        cmd "python" [
          string "diffsel_analyze_result.py" ;
          opt "-r" string "/diffsel/_build/readdiffsel" ;
          opt "-o" ident out ;
          ident chainname ;
        ]
      ]
    )
  ]