Commit d8ca8294 authored by Philippe Veber's avatar Philippe Veber
Browse files

first working run of diffseldsparse

parent f1d0bcae
open Core_kernel
open Bistro.Std
open Bistro.EDSL
open File_formats
open Utils
let env = docker_image ~account:"pveber" ~name:"bayescode" ~tag:"latest" ()
let diffseldsparse_add_iterations_script ~chainname ~ali ~tree =
let vars = [
"CHAIN", chainname ;
"ALI", ali ;
"TREE", tree ;
]
in
bash_script vars {|
#!/bin/bash
set -e
continue=true
i=0
#while $continue
#do
i=$((i+1))
echo i=$i
ls
# check convergence
Rscript -e "rmarkdown::render(\"DiffselMCMCConvergenceAnalysis.Rmd\", params=list(set_trace1=\"../tmp/myrun_tmp.trace\"),output_file=\"../dest/output_"$i".html\")"
new_max=`tail -n 1 new_iterations.txt | cut -f 1`
continue=`tail -n 1 new_iterations.txt | cut -f 2`
end_it=`tail -n 1 new_iterations.txt | cut -f 3`
echo new_max=$new_max
echo continue=$continue
echo end_it=$end_it
/bayescode/data/diffseldsparse -t $TREE -d $ALI -ncond 2 -x 1 $end_it $CHAIN
|}
let diffseldsparse ~(alignment:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:int) ~(n_cycles: int) ?(descr = "") () : [`diffseldsparse] directory workflow =
let tmp_tree = tmp // "myrun.tree" in
let tmp_ali = tmp // "myrun.ali" in
let dest_tree = dest // "myrun.tree" in
let dest_ali = dest // "myrun.ali" in
let chainname_tmp = tmp // "myrun_tmp" in
let chainname = dest // "myrun" in
let n_cycles = if (n_cycles > 200) then 200 else n_cycles in
let script_r = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
workflow ~descr:("convergence_detection.run_diffseldsparse." ^ descr) [
docker env (
and_list [
mkdir_p dest;
cd tmp;
cmd "cp" [ file_dump (string Scripts.diffselMCMCConvergenceAnalysis) ; script_r] ;
cmd "cp" [dep alignment; dest_ali]; (* required dep to link the file in the env *)
cmd "cp" [dep tree; dest_tree]; (* required dep to link the file in the env *)
cmd "cp" [dep alignment; tmp_ali]; (* required dep to link the file in the env *)
cmd "cp" [dep tree; tmp_tree]; (* required dep to link the file in the env *)
cmd "/bayescode/data/diffseldsparse" [
opt "-t" ident tmp_tree;
opt "-d" ident tmp_ali ;
opt "-ncond" int 2 ;
opt "-x" seq [ int w_every; string " "; int n_cycles];
(* opt "-seed" int seed ; *) (* not yet implemented *)
ident chainname_tmp ;
];
cmd "bash" [(file_dump (diffseldsparse_add_iterations_script ~chainname ~ali:tmp_ali ~tree:tmp_tree))];
]
)
]
let check_conv run_diffseldsparse : text_file directory workflow =
let env = Env.env_r in
let script = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
let trace = run_diffseldsparse / selector["myrun.trace"] in
let out = dest // "out.html" in
let nb_new_iterations = dest // "new_iterations.txt" in
workflow ~descr:"convergence_detection.DiffselMCMCConvergenceAnalysis" [
docker env (
and_list [
mkdir_p tmp ;
mkdir_p dest ;
cd tmp ;
cmd "cp" [ file_dump (string Scripts.diffselMCMCConvergenceAnalysis) ; script] ;
cmd "Rscript" [
string "-e" ;
string {|"rmarkdown::render(\"DiffselMCMCConvergenceAnalysis.Rmd\",|} ;
string {|params=list(set_trace1=\"|} ;
dep trace ;
string {|\"))"|};
] ;
cmd "cp" [string "DiffselMCMCConvergenceAnalysis.html" ; ident out] ;
cmd "cp" [string "new_iterations.txt" ; ident nb_new_iterations]
]
)
]
let selector run_diffseldsparse : text_file workflow =
let package = tmp // "diffsel_script_utils.py" in
let script = tmp // "diffsel_analyze_result.py" in
let tmp_tree = tmp // "myrun.tree" in
let tmp_ali = tmp // "myrun.ali" in
let dep_tree = (dep run_diffseldsparse) // "myrun.tree" in
let dep_ali = (dep run_diffseldsparse) // "myrun.ali" in
let chainname = (dep run_diffseldsparse) // "myrun" in
let out = dest in
workflow ~descr:"convergence_detection.parse_diffseldsparse" [
docker env (
and_list [
mkdir_p tmp ;
cd tmp ;
cmd "cp" [dep_ali; tmp_ali]; (* required dep to link the file in the env *)
cmd "cp" [dep_tree; tmp_tree]; (* required dep to link the file in the env *)
(*python diffseldsparse_analyze_result.py [-r /path/to/readdiffsel] [-o output_file] chainname *)
cmd "cp" [ file_dump (string Scripts.diffsel_script_utils) ; package] ;
cmd "cp" [ file_dump (string Scripts.diffsel_analyze_result) ; script] ;
cmd "python" [
string "diffsel_analyze_result.py" ;
opt "-r" string "/bayescode/data/readdiffseldsparse" ;
opt "-o" ident out ;
ident chainname ;
]
]
)
]
open Bistro.Std
open File_formats
val diffseldsparse :
alignment:nucleotide_phylip workflow ->
tree : _ workflow ->
w_every : int ->
n_cycles: int ->
?descr:string ->
unit ->
[`diffseldsparse] directory workflow
val selector :
[`diffseldsparse] directory workflow ->
text_file workflow
val check_conv :
[`diffseldsparse] directory workflow ->
text_file directory workflow
......@@ -14,6 +14,8 @@ and simulation = {
seed : int ;
}
let seed = ref 42
let calc_fixed_seed ~(str:string) (seed:int) : int =
let str_hash = Hashtbl.hash str in
Hashtbl.hash (str_hash + seed)
......@@ -45,7 +47,7 @@ let profile sim =
~nb_cat:All
~nb_sites:sim.nb_sites
sim.profiles
~seed:(Random.int Int.max_value)
~seed:(calc_fixed_seed ~str:sim.profiles !seed)
let bppseqgen (Simulation sim as d) =
let model_prefix = Convergence_hypothesis.string_of_model sim.hypothesis in
......@@ -66,13 +68,13 @@ let nucleotide_alignment (Simulation sim as d) =
let phylip_nucleotide_alignment d =
Bppsuite.fna2phy (nucleotide_alignment d)
(* let diffseldsparse d =
* Diffseldsparse.diffseldsparse
* ~alignment:(phylip_nucleotide_alignment d)
* ~tree:(tree d)
* ~w_every:1
* ~n_cycles:2
* () *)
let diffseldsparse d =
Diffseldsparse.diffseldsparse
~alignment:(phylip_nucleotide_alignment d)
~tree:(tree d)
~w_every:1
~n_cycles:50
()
let eval x =
Bistro_utils.Term.(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment