diffsel.ml 4.79 KB
Newer Older
Philippe Veber's avatar
Philippe Veber committed
1
open Core_kernel
Philippe Veber's avatar
Philippe Veber committed
2 3
open Bistro
open Bistro.Shell_dsl
Philippe Veber's avatar
Philippe Veber committed
4
open File_formats
5
open Utils
6

Carine Rey's avatar
Carine Rey committed
7
let diffsel_add_iterations_script ~chainname ~ali ~tree ~seed =
8 9
  let vars = [
    "CHAIN", chainname ;
10 11
    "ALI", ali ;
    "TREE", tree ;
12
    "SEED_OPTION", option (opt "-seed" int) seed ;
13 14 15 16 17 18 19 20
  ]
  in
  bash_script vars {|

#!/bin/bash
set -e

continue=true
Carine Rey's avatar
Carine Rey committed
21
i=0
22

23 24
#while $continue 
#do
Carine Rey's avatar
Carine Rey committed
25 26
i=$((i+1))
echo i=$i
27
ls
28
# check convergence
29
Rscript -e "rmarkdown::render(\"DiffselMCMCConvergenceAnalysis.Rmd\", params=list(set_trace1=\"../tmp/myrun_tmp.trace\"),output_file=\"../dest/output_"$i".html\")"
30 31 32 33 34 35 36 37
new_max=`tail -n 1 new_iterations.txt | cut -f 1`
continue=`tail -n 1 new_iterations.txt | cut -f 2`
end_it=`tail -n 1 new_iterations.txt | cut -f 3`

echo new_max=$new_max
echo continue=$continue
echo end_it=$end_it

38 39 40 41 42 43 44 45 46
#if $continue
#then
#    echo "diffsel $CHAIN $new_max"
#    /diffsel/_build/diffsel $CHAIN $new_max
#fi
#
#done
#
#cat new_iterations.txt > $CHAIN.iterations
47

48
/diffsel/_build/diffsel -t $TREE -d $ALI -ncond 2 -x 1 $end_it ${SEED_OPTION} $CHAIN
49 50 51 52

|}


53
let diffsel ~(phy_n:nucleotide_phylip pworkflow) ~(tree: _ pworkflow) ~(w_every:int) ~(n_cycles: int) ?(descr = "") ?seed () : [`diffsel] dworkflow =
54
  let env = Env.env_diffsel in
Philippe Veber's avatar
Philippe Veber committed
55 56 57 58
  let tmp_tree = tmp // "myrun.tree" in
  let tmp_ali = tmp // "myrun.ali" in
  let dest_tree = dest // "myrun.tree" in
  let dest_ali = dest // "myrun.ali" in
59
  let chainname_tmp = tmp // "myrun_tmp" in
Philippe Veber's avatar
Philippe Veber committed
60
  let chainname = dest // "myrun" in
61
  let n_cycles = if (n_cycles > 200) then 20 else n_cycles in
62
  let script_r = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
Philippe Veber's avatar
Philippe Veber committed
63
  (*_build/diffsel -t data/samhd1.tree -d data/samhd1.ali -ncond 3 -x 1 10000 myrun*)
64
  Workflow.shell ~descr:("convergence_detection.run_diffsel." ^ descr)  [
Philippe Veber's avatar
Philippe Veber committed
65
    within_container env (
Philippe Veber's avatar
Philippe Veber committed
66 67
      and_list [
        mkdir_p dest;
68 69
        cd tmp;
        cmd "cp" [ file_dump (string Scripts.diffselMCMCConvergenceAnalysis) ; script_r] ;
Philippe Veber's avatar
Philippe Veber committed
70 71 72 73 74
        cmd "cp" [dep phy_n; dest_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep tree; dest_tree]; (* required dep to link the file in the env *)
        cmd "cp" [dep phy_n; tmp_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep tree; tmp_tree]; (* required dep to link the file in the env *)
        cmd "/diffsel/_build/diffsel" [
75 76 77 78
          opt "-t" ident tmp_tree;
          opt "-d" ident tmp_ali ;
          opt "-ncond"  int 2 ;
          opt "-x" seq [ int w_every; string " "; int n_cycles];
79
          option (opt "-seed" int) seed ;
80
          ident chainname_tmp ;
Philippe Veber's avatar
Philippe Veber committed
81
        ];
Carine Rey's avatar
Carine Rey committed
82
        cmd "bash" [(file_dump (diffsel_add_iterations_script ~chainname ~ali:tmp_ali ~tree:tmp_tree ~seed))];
Philippe Veber's avatar
Philippe Veber committed
83 84 85 86
      ]
    )
  ]

Philippe Veber's avatar
Philippe Veber committed
87
let check_conv run_diffsel : directory pworkflow =
88
  let env = Env.env_r in
89
  let script = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
Philippe Veber's avatar
Philippe Veber committed
90
  let trace = Workflow.select run_diffsel ["myrun.trace"] in
91
  let out = dest // "out.html" in
92
  let nb_new_iterations = dest // "new_iterations.txt" in
Philippe Veber's avatar
Philippe Veber committed
93 94
  Workflow.shell ~descr:"convergence_detection.DiffselMCMCConvergenceAnalysis" [
    within_container env (
95 96 97 98 99 100 101 102 103 104 105 106 107 108
      and_list [
        mkdir_p tmp ;
        mkdir_p dest ;
        cd tmp ;
        cmd "cp" [ file_dump (string Scripts.diffselMCMCConvergenceAnalysis) ; script] ;
        cmd "Rscript" [
          string "-e" ;
          string {|"rmarkdown::render(\"DiffselMCMCConvergenceAnalysis.Rmd\",|} ;
          string {|params=list(set_trace1=\"|} ;
          dep trace ;
          string {|\"))"|};
        ] ;
        cmd "cp" [string "DiffselMCMCConvergenceAnalysis.html" ; ident out] ;
        cmd "cp" [string "new_iterations.txt" ; ident nb_new_iterations]
109
      ]
110 111 112
    )
  ]

Philippe Veber's avatar
Philippe Veber committed
113
let selector run_diffsel : text_file pworkflow =
114
  let env = Env.env_diffsel in
Philippe Veber's avatar
Philippe Veber committed
115 116 117 118 119 120 121 122
  let package = tmp // "diffsel_script_utils.py" in
  let script = tmp // "diffsel_analyze_result.py" in
  let tmp_tree = tmp // "myrun.tree" in
  let tmp_ali = tmp // "myrun.ali" in
  let dep_tree = (dep run_diffsel) // "myrun.tree" in
  let dep_ali = (dep run_diffsel) // "myrun.ali" in
  let chainname = (dep run_diffsel) // "myrun" in
  let out = dest in
Philippe Veber's avatar
Philippe Veber committed
123 124
  Workflow.shell ~descr:"convergence_detection.parse_diffsel" [
    within_container env (
Philippe Veber's avatar
Philippe Veber committed
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
      and_list [
        mkdir_p tmp ;
        cd tmp ;

        cmd "cp" [dep_ali; tmp_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep_tree; tmp_tree]; (* required dep to link the file in the env *)

        (*python diffsel_analyze_result.py [-r /path/to/readdiffsel] [-o output_file] chainname *)
        cmd "cp"  [ file_dump (string Scripts.diffsel_script_utils) ; package] ;
        cmd "cp" [ file_dump (string Scripts.diffsel_analyze_result) ; script] ;

        cmd "python" [
          string "diffsel_analyze_result.py" ;
          opt "-r" string "/diffsel/_build/readdiffsel" ;
          opt "-o" ident out ;
          ident chainname ;
        ]
      ]
    )
  ]