open Core_kernel open Bistro open Bistro.Shell_dsl open File_formats open Utils let diffsel_add_iterations_script ~chainname ~ali ~tree ~seed = let vars = [ "CHAIN", chainname ; "ALI", ali ; "TREE", tree ; "SEED_OPTION", option (opt "-seed" int) seed ; ] in bash_script vars {| #!/bin/bash set -e continue=true i=0 #while $continue #do i=$((i+1)) echo i=$i ls # check convergence Rscript -e "rmarkdown::render(\"DiffselMCMCConvergenceAnalysis.Rmd\", params=list(set_trace1=\"../tmp/myrun_tmp.trace\"),output_file=\"../dest/output_"$i".html\")" new_max=`tail -n 1 new_iterations.txt | cut -f 1` continue=`tail -n 1 new_iterations.txt | cut -f 2` end_it=`tail -n 1 new_iterations.txt | cut -f 3` echo new_max=$new_max echo continue=$continue echo end_it=$end_it #if $continue #then # echo "diffsel $CHAIN $new_max" # /diffsel/_build/diffsel $CHAIN $new_max #fi # #done # #cat new_iterations.txt > $CHAIN.iterations /diffsel/_build/diffsel -t $TREE -d $ALI -ncond 2 -x 1 $end_it ${SEED_OPTION} $CHAIN |} let diffsel ~(phy_n:nucleotide_phylip file) ~(tree: _ file) ~(w_every:int) ~(n_cycles: int) ?(descr = "") ?seed () : [`diffsel] directory = let env = Env.env_diffsel in let tmp_tree = tmp // "myrun.tree" in let tmp_ali = tmp // "myrun.ali" in let dest_tree = dest // "myrun.tree" in let dest_ali = dest // "myrun.ali" in let chainname_tmp = tmp // "myrun_tmp" in let chainname = dest // "myrun" in let n_cycles = if (n_cycles > 200) then 20 else n_cycles in let script_r = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in (*_build/diffsel -t data/samhd1.tree -d data/samhd1.ali -ncond 3 -x 1 10000 myrun*) Workflow.shell ~descr:("convergence_detection.run_diffsel." ^ descr) ~img:env [ and_list [ mkdir_p dest; cd tmp; cmd "cp" [ file_dump (string Scripts.diffselMCMCConvergenceAnalysis) ; script_r] ; cmd "cp" [dep phy_n; dest_ali]; (* required dep to link the file in the env *) cmd "cp" [dep tree; dest_tree]; (* required dep to link the file in the env *) cmd "cp" [dep phy_n; tmp_ali]; (* required dep to link the file in the env *) cmd "cp" [dep tree; tmp_tree]; (* required dep to link the file in the env *) cmd "/diffsel/_build/diffsel" [ opt "-t" ident tmp_tree; opt "-d" ident tmp_ali ; opt "-ncond" int 2 ; opt "-x" seq [ int w_every; string " "; int n_cycles]; option (opt "-seed" int) seed ; ident chainname_tmp ; ]; cmd "bash" [(file_dump (diffsel_add_iterations_script ~chainname ~ali:tmp_ali ~tree:tmp_tree ~seed))]; ] ] let check_conv run_diffsel : [`diffsel_check_conv] directory = let env = Env.env_r in let script = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in let trace = Workflow.select run_diffsel ["myrun.trace"] in let out = dest // "out.html" in let nb_new_iterations = dest // "new_iterations.txt" in Workflow.shell ~descr:"convergence_detection.DiffselMCMCConvergenceAnalysis" ~img:env [ and_list [ mkdir_p tmp ; mkdir_p dest ; cd tmp ; cmd "cp" [ file_dump (string Scripts.diffselMCMCConvergenceAnalysis) ; script] ; cmd "Rscript" [ string "-e" ; string {|"rmarkdown::render(\"DiffselMCMCConvergenceAnalysis.Rmd\",|} ; string {|params=list(set_trace1=\"|} ; dep trace ; string {|\"))"|}; ] ; cmd "cp" [string "DiffselMCMCConvergenceAnalysis.html" ; ident out] ; cmd "cp" [string "new_iterations.txt" ; ident nb_new_iterations] ] ] let selector run_diffsel : cpt file = let env = Env.env_diffsel in let package = tmp // "diffsel_script_utils.py" in let script = tmp // "diffsel_analyze_result.py" in let tmp_tree = tmp // "myrun.tree" in let tmp_ali = tmp // "myrun.ali" in let dep_tree = (dep run_diffsel) // "myrun.tree" in let dep_ali = (dep run_diffsel) // "myrun.ali" in let chainname = (dep run_diffsel) // "myrun" in let out = dest in Workflow.shell ~descr:"convergence_detection.parse_diffsel" ~img:env [ and_list [ mkdir_p tmp ; cd tmp ; cmd "cp" [dep_ali; tmp_ali]; (* required dep to link the file in the env *) cmd "cp" [dep_tree; tmp_tree]; (* required dep to link the file in the env *) (*python diffsel_analyze_result.py [-r /path/to/readdiffsel] [-o output_file] chainname *) cmd "cp" [ file_dump (string Scripts.diffsel_script_utils) ; package] ; cmd "cp" [ file_dump (string Scripts.diffsel_analyze_result) ; script] ; cmd "python" [ string "diffsel_analyze_result.py" ; opt "-r" string "/diffsel/_build/readdiffsel" ; opt "-o" ident out ; ident chainname ; ] ] ]