Commit 32f11ea6 authored by Philippe Veber's avatar Philippe Veber
Browse files

update wrt bistro

parent 383de3a6
open Core
open Bistro.Std
open Bistro.EDSL
open Bistro_bioinfo.Std
open Bistro
open Bistro.Shell_dsl
open File_formats
open Utils
let env = Env.env_bppsuite
let img = Env.env_bppsuite
let conf_file_bppseqgen ~tree ~out ~nb_sites ~config =
let _conf_file_bppseqgen ~tree ~out ~nb_sites ~config =
seq ~sep:"\n" (
[
assign "input.tree.file" (dep tree) ;
......@@ -128,8 +127,8 @@ rate_distribution=Constant()
let ne_g = Convergence_hypothesis.neg_of_model hypothesis in
let ne_c = Convergence_hypothesis.nec_of_model hypothesis in
let ne_a = ne_g in
workflow ~descr:("bppsuite.bppseqgen" ^ descr) [
docker env (
Workflow.shell ~descr:("bppsuite.bppseqgen" ^ descr) [
within_container img (
and_list [
mkdir_p dest;
mkdir_p tmp;
......@@ -143,11 +142,11 @@ rate_distribution=Constant()
)
]
let alignment run_bppseqgen_multi_profiles : nucleotide_fasta workflow =
run_bppseqgen_multi_profiles / selector ["seq.fa"]
let alignment run_bppseqgen_multi_profiles : nucleotide_fasta pworkflow =
Workflow.select run_bppseqgen_multi_profiles ["seq.fa"]
let info run_bppseqgen_multi_profiles : text_file workflow =
run_bppseqgen_multi_profiles / selector ["seq.fa.info"]
let info run_bppseqgen_multi_profiles : text_file pworkflow =
Workflow.select run_bppseqgen_multi_profiles ["seq.fa.info"]
end
......@@ -167,9 +166,9 @@ let conf_file_bppseqman_fna2faa ~fna =
|}
]
let fna2faa ~(fna:nucleotide_fasta workflow) : aminoacid_fasta workflow =
workflow ~descr:"bppsuite.fna2faa" [
cmd "bppseqman" ~env [
let fna2faa ~(fna:nucleotide_fasta pworkflow) : aminoacid_fasta pworkflow =
Workflow.shell ~descr:"bppsuite.fna2faa" [
cmd "bppseqman" ~img [
assign "param" (file_dump (conf_file_bppseqman_fna2faa ~fna)) ;
]
]
......@@ -198,24 +197,24 @@ let conf_file_bppseqman_faa2phy ~faa =
|}
]
let fna2phy ~(fna: nucleotide_fasta workflow) : nucleotide_phylip workflow =
workflow ~descr:"bppsuite.fna2phy_interleaved" [
cmd "bppseqman" ~env [
let fna2phy ~(fna: nucleotide_fasta pworkflow) : nucleotide_phylip pworkflow =
Workflow.shell ~descr:"bppsuite.fna2phy_interleaved" [
cmd "bppseqman" ~img [
assign "param" (file_dump (conf_file_bppseqman_fna2phy ~fna)) ;
]
]
let faa2phy ~(faa: aminoacid_fasta workflow) : aminoacid_phylip workflow =
workflow ~descr:"bppsuite.faa2phy_interleaved" [
cmd "bppseqman" ~env [
let faa2phy ~(faa: aminoacid_fasta pworkflow) : aminoacid_phylip pworkflow =
Workflow.shell ~descr:"bppsuite.faa2phy_interleaved" [
cmd "bppseqman" ~img [
assign "param" (file_dump (conf_file_bppseqman_faa2phy ~faa)) ;
]
]
let paste_fna ~(fna_l: nucleotide_fasta workflow list) : nucleotide_fasta workflow =
workflow ~descr:"bppsuite.catfasta" [
cmd "catfasta2phyml.pl" ~stdout:dest ~env (List.concat [
let paste_fna ~(fna_l: nucleotide_fasta pworkflow list) : nucleotide_fasta pworkflow =
Workflow.shell ~descr:"bppsuite.catfasta" [
cmd "catfasta2phyml.pl" ~stdout:dest ~img (List.concat [
[string "-f" ] ;
List.map fna_l ~f:(fun fna -> dep fna) ;
])
......
open Bistro.Std
open Bistro_bioinfo.Std
open Bistro
open File_formats
module Bppseqgen : sig
val multi_profiles :
?descr : string ->
profile_f: text_file workflow ->
profile_c: text_file workflow ->
tree_dataset:[`tree_dataset] directory workflow ->
profile_f: text_file pworkflow ->
profile_c: text_file pworkflow ->
tree_dataset:[`tree_dataset] dworkflow ->
hypothesis:Convergence_hypothesis.t ->
seed:int ->
[`bppseqgen] directory workflow
[`bppseqgen] dworkflow
val alignment :
[`bppseqgen] directory workflow ->
nucleotide_fasta workflow
[`bppseqgen] dworkflow ->
nucleotide_fasta pworkflow
val info :
[`bppseqgen] directory workflow ->
text_file workflow
[`bppseqgen] dworkflow ->
text_file pworkflow
end
val fna2faa :
fna:nucleotide_fasta workflow ->
aminoacid_fasta workflow
fna:nucleotide_fasta pworkflow ->
aminoacid_fasta pworkflow
val fna2phy :
fna: nucleotide_fasta workflow ->
nucleotide_phylip workflow
fna: nucleotide_fasta pworkflow ->
nucleotide_phylip pworkflow
val faa2phy :
faa: aminoacid_fasta workflow ->
aminoacid_phylip workflow
faa: aminoacid_fasta pworkflow ->
aminoacid_phylip pworkflow
val paste_fna:
fna_l: nucleotide_fasta workflow list ->
nucleotide_fasta workflow
fna_l: nucleotide_fasta pworkflow list ->
nucleotide_fasta pworkflow
open Core
open Bistro.EDSL
open Bistro.Std
open File_formats
open Bistro_bioinfo.Std
open Defs
open Bistro.Shell_dsl
open Bistro
type result = [
| `Pcoc of [`pcoc] directory workflow
| `Pcoc_gamma of [`pcoc] directory workflow
| `Pcoc_C60 of [`pcoc] directory workflow
| `Diffsel of [`diffsel] directory workflow
| `Identical_LG of [`identical] directory workflow
| `Identical_WAG of [`identical] directory workflow
| `Topological_LG of [`topological] directory workflow
| `Topological_WAG of [`topological] directory workflow
| `Tdg09 of [`tdg09] directory workflow
| `Multinomial of [`multinomial] directory workflow
| `Msd of [`msd] directory workflow * float
| `Pcoc of [`pcoc] dworkflow
| `Pcoc_gamma of [`pcoc] dworkflow
| `Pcoc_C60 of [`pcoc] dworkflow
| `Diffsel of [`diffsel] dworkflow
| `Identical_LG of [`identical] dworkflow
| `Identical_WAG of [`identical] dworkflow
| `Topological_LG of [`topological] dworkflow
| `Topological_WAG of [`topological] dworkflow
| `Tdg09 of [`tdg09] dworkflow
| `Multinomial of [`multinomial] dworkflow
| `Msd of [`msd] dworkflow * float
]
let meth_string_of_result = function
......@@ -37,12 +34,11 @@ type dataset_res = {
tree_prefix : string ;
dataset : Dataset.t ;
res_by_tools: result list ;
merged_results : text_file workflow ;
plot_merged_results : svg workflow ;
merged_results : text_file pworkflow ;
plot_merged_results : svg pworkflow ;
}
let merge_results ?fna_infos ~res_by_tools () : text_file workflow =
let env = Env.env_py in
let merge_results ?fna_infos ~res_by_tools () : text_file pworkflow =
let fna_infos = match fna_infos with
| Some (sw) -> sw
| _ -> None
......@@ -59,7 +55,7 @@ let merge_results ?fna_infos ~res_by_tools () : text_file workflow =
| `Topological_WAG d -> Topological.results d
| `Tdg09 d -> Tamuri.results d
| `Multinomial d -> Multinomial.results d
| `Msd (d, e) -> Msd.results d
| `Msd (d, _) -> Msd.results d
in
let opt = match res with
| `Pcoc _ -> string "--pcoc"
......@@ -72,13 +68,13 @@ let merge_results ?fna_infos ~res_by_tools () : text_file workflow =
| `Topological_WAG _ -> string "--topological_WAG"
| `Tdg09 _ -> string "--tdg09"
| `Multinomial _ -> string "--multinomial"
| `Msd (w, e) -> string (sprintf "--msd %f" e)
| `Msd (_, e) -> string (sprintf "--msd %f" e)
in
seq ~sep:" " [opt; dep w]
)
in
workflow ~descr:"convergence_detection.merge_results" [
cmd "python" ~env [
Workflow.shell ~descr:"convergence_detection.merge_results" [
cmd "python" ~img:Env.env_py [
file_dump (string Scripts.merge_det_results) ;
opt "-o" ident dest ;
seq ~sep:" " command ;
......@@ -86,8 +82,8 @@ let merge_results ?fna_infos ~res_by_tools () : text_file workflow =
] ;
]
let plot_merge_results ?t_choices ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv (): svg workflow =
let env = Env.env_pcoc in
let plot_merge_results ?t_choices ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv (): svg pworkflow =
let img = Pcoc.img in
(* use of pcoc env due to its working X server for dra plot with ete3 *)
let meths = List.map res_by_tools ~f:(fun res ->
let opt = match res with
......@@ -132,28 +128,31 @@ let plot_merge_results ?t_choices ~plot_all_sites ~(res_by_tools:result list) ~t
let package_plot_data = tmp // "plot_data.py" in
let script_plot_convergent_sites = tmp // "plot_convergent_sites.py" in
let out = dest // "results.svg" in
workflow ~descr:"convergence_detection.plot_results" [
docker env (
and_list [
mkdir_p tmp ;
let inner =
Workflow.shell ~descr:"convergence_detection.plot_results" [
within_container img (
and_list [
mkdir_p tmp ;
mkdir_p dest ;
cd tmp ;
cd tmp ;
cmd "cp" [ file_dump (string Scripts.diffsel_script_utils) ; package_diffsel_script_utils ] ;
cmd "cp" [ file_dump (string Scripts.plot_data) ; package_plot_data] ;
cmd "cp" [ file_dump (string Scripts.plot_convergent_sites); script_plot_convergent_sites ] ;
cmd "cp" [ file_dump (string Scripts.diffsel_script_utils) ; package_diffsel_script_utils ] ;
cmd "cp" [ file_dump (string Scripts.plot_data) ; package_plot_data] ;
cmd "cp" [ file_dump (string Scripts.plot_convergent_sites); script_plot_convergent_sites ] ;
cmd "python" [
string "plot_convergent_sites.py" ;
opt "-msa" dep faa ;
opt "-tsv" dep tsv ;
opt "-tree" dep tree ;
opt "-out" ident out ;
opt "-meth" ident meths ;
option (opt "-t" ident) meths_t ;
option (opt "--t_tsv" dep) t_choices ;
flag string "--all_sites" plot_all_sites ;
cmd "python" [
string "plot_convergent_sites.py" ;
opt "-msa" dep faa ;
opt "-tsv" dep tsv ;
opt "-tree" dep tree ;
opt "-out" ident out ;
opt "-meth" ident meths ;
option (opt "-t" ident) meths_t ;
option (opt "--t_tsv" dep) t_choices ;
flag string "--all_sites" plot_all_sites ;
]
]
]
)
] / selector ["results.svg"]
)
]
in
Workflow.select inner ["results.svg"]
open Core
open Bistro.EDSL
open Bistro.Std
open Bistro_bioinfo.Std
open Bistro
open File_formats
open Defs
type result = [
| `Pcoc of [`pcoc] directory workflow
| `Pcoc_gamma of [`pcoc] directory workflow
| `Pcoc_C60 of [`pcoc] directory workflow
| `Diffsel of [`diffsel] directory workflow
| `Identical_LG of [`identical] directory workflow
| `Identical_WAG of [`identical] directory workflow
| `Topological_LG of [`topological] directory workflow
| `Topological_WAG of [`topological] directory workflow
| `Tdg09 of [`tdg09] directory workflow
| `Multinomial of [`multinomial] directory workflow
| `Msd of [`msd] directory workflow * float
| `Pcoc of [`pcoc] dworkflow
| `Pcoc_gamma of [`pcoc] dworkflow
| `Pcoc_C60 of [`pcoc] dworkflow
| `Diffsel of [`diffsel] dworkflow
| `Identical_LG of [`identical] dworkflow
| `Identical_WAG of [`identical] dworkflow
| `Topological_LG of [`topological] dworkflow
| `Topological_WAG of [`topological] dworkflow
| `Tdg09 of [`tdg09] dworkflow
| `Multinomial of [`multinomial] dworkflow
| `Msd of [`msd] dworkflow * float
]
val meth_string_of_result : result -> string
......@@ -26,22 +22,22 @@ type dataset_res = {
tree_prefix : string ;
dataset : Dataset.t ;
res_by_tools: result list ;
merged_results : text_file workflow ;
plot_merged_results : svg workflow
merged_results : text_file pworkflow ;
plot_merged_results : svg pworkflow
}
val merge_results :
?fna_infos : text_file workflow option ->
?fna_infos : text_file pworkflow option ->
res_by_tools : result list ->
unit ->
text_file workflow
text_file pworkflow
val plot_merge_results :
? t_choices : text_file workflow ->
? t_choices : text_file pworkflow ->
plot_all_sites: bool ->
res_by_tools : result list ->
tree:nhx workflow ->
faa:aminoacid_fasta workflow ->
tsv:text_file workflow ->
tree:nhx pworkflow ->
faa:aminoacid_fasta pworkflow ->
tsv:text_file pworkflow ->
unit ->
svg workflow
svg pworkflow
open Core
open Bistro.EDSL
open Bistro.Std
open Bistro.Shell_dsl
type nes_spec =
| Fixed of float
......@@ -33,7 +32,7 @@ let nec_of_model m = match m with
| Fixed c | Variable (_, c) -> c
let h0_hapc_forall nes_list =
List.map nes_list (fun nes -> [H0 nes; HaPC nes]) |> List.concat
List.map nes_list ~f:(fun nes -> [H0 nes; HaPC nes]) |> List.concat
let assign k v =
seq ~sep:"=" [ string k ; v ]
......@@ -9,7 +9,7 @@ type t = {
seed : int ;
}
let repo ~preview dataset_l =
let repo dataset_l =
List.map dataset_l ~f:(fun dataset ->
let model_prefix = dataset.model_prefix in
let tree_prefix = dataset.tree_prefix in
......
open Core
open Lwt.Infix
open Bistro
open Bistro_engine
let with_workflow w ~f =
let open Scheduler in
let db = Db.init_exn "_bistro" in
let sched = create ~np:8 ~mem:(`GB 8) db in
let thread = eval_exn sched w in
start sched ;
Lwt_main.run thread
|> f
let path w =
let open Bistro_utils.Term in
let Path p = run (pureW w) in
p
with_workflow (Workflow.eval_path w) ~f:(fun x -> x)
let less w =
Sys.command (sprintf "less %s" (path w))
|> ignore
let workflow_of_template t =
let open Bistro.EDSL in
workflow [
let open Bistro.Shell_dsl in
Workflow.shell [
cmd "cp" [ file_dump t ; dest ]
]
......
open File_formats
open Bistro.Std
type parsed_input_tree
type output_parse_input_tree =
......
open Core_kernel
open Bistro.Std
open Bistro.EDSL
open Bistro
open Bistro.Shell_dsl
open File_formats
open Utils
......@@ -50,7 +50,7 @@ echo end_it=$end_it
|}
let diffsel ~(phy_n:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:int) ~(n_cycles: int) ~(id: int) ?(descr = "") ?seed () : [`diffsel] directory workflow =
let diffsel ~(phy_n:nucleotide_phylip pworkflow) ~(tree: _ pworkflow) ~(w_every:int) ~(n_cycles: int) ~(id: int) ?(descr = "") ?seed () : [`diffsel] dworkflow =
let env = Env.env_diffsel in
let tmp_tree = tmp // "myrun.tree" in
let tmp_ali = tmp // "myrun.ali" in
......@@ -65,8 +65,8 @@ let diffsel ~(phy_n:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:in
let n_cycles = if (n_cycles > 200) then 20 else n_cycles in
let script_r = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
(*_build/diffsel -t data/samhd1.tree -d data/samhd1.ali -ncond 3 -x 1 10000 myrun*)
workflow ~descr:("convergence_detection.run_diffsel." ^(string_of_int id) ^ "." ^ descr) [
docker env (
Workflow.shell ~descr:("convergence_detection.run_diffsel." ^(string_of_int id) ^ "." ^ descr) [
within_container env (
and_list [
mkdir_p dest;
cd tmp;
......@@ -89,14 +89,14 @@ let diffsel ~(phy_n:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:in
)
]
let check_conv run_diffsel : text_file directory workflow =
let check_conv run_diffsel : directory pworkflow =
let env = Env.env_r in
let script = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
let trace = run_diffsel / selector["myrun.trace"] in
let trace = Workflow.select run_diffsel ["myrun.trace"] in
let out = dest // "out.html" in
let nb_new_iterations = dest // "new_iterations.txt" in
workflow ~descr:"convergence_detection.DiffselMCMCConvergenceAnalysis" [
docker env (
Workflow.shell ~descr:"convergence_detection.DiffselMCMCConvergenceAnalysis" [
within_container env (
and_list [
mkdir_p tmp ;
mkdir_p dest ;
......@@ -115,7 +115,7 @@ let check_conv run_diffsel : text_file directory workflow =
)
]
let selector run_diffsel : text_file workflow =
let selector run_diffsel : text_file pworkflow =
let env = Env.env_diffsel in
let package = tmp // "diffsel_script_utils.py" in
let script = tmp // "diffsel_analyze_result.py" in
......@@ -125,8 +125,8 @@ let selector run_diffsel : text_file workflow =
let dep_ali = (dep run_diffsel) // "myrun.ali" in
let chainname = (dep run_diffsel) // "myrun" in
let out = dest in
workflow ~descr:"convergence_detection.parse_diffsel" [
docker env (
Workflow.shell ~descr:"convergence_detection.parse_diffsel" [
within_container env (
and_list [
mkdir_p tmp ;
cd tmp ;
......
open Bistro.Std
open Bistro
open File_formats
val diffsel :
phy_n : nucleotide_phylip workflow ->
tree : _ workflow ->
w_every : int ->
n_cycles: int ->
id: int ->
phy_n:nucleotide_phylip pworkflow ->
tree:_ pworkflow ->
w_every:int ->
n_cycles:int ->
id:int ->
?descr:string ->
?seed:int ->
unit ->
[`diffsel] directory workflow
[`diffsel] dworkflow
val selector :
[`diffsel] directory workflow ->
text_file workflow
[`diffsel] dworkflow ->
text_file pworkflow
val check_conv :
[`diffsel] directory workflow ->
text_file directory workflow
[`diffsel] dworkflow ->
directory pworkflow
open Core_kernel
open Bistro.Std
open Bistro.EDSL
open Bistro
open Bistro.Shell_dsl
open File_formats
open Utils
let env = docker_image ~account:"pveber" ~name:"bayescode" ~tag:"latest" ()
let img = [ docker_image ~account:"pveber" ~name:"bayescode" ~tag:"latest" () ]
let diffseldsparse_add_iterations_script ~chainname ~ali ~tree =
let vars = [
......@@ -41,7 +41,7 @@ echo end_it=$end_it
|}
let diffseldsparse ~(alignment:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:int) ~(n_cycles: int) ?(descr = "") () : [`diffseldsparse] directory workflow =
let diffseldsparse ~(alignment:nucleotide_phylip pworkflow) ~(tree: _ pworkflow) ~(w_every:int) ~(n_cycles: int) ?(descr = "") () : [`diffseldsparse] dworkflow =
let tmp_tree = tmp // "myrun.tree" in
let tmp_ali = tmp // "myrun.ali" in
let dest_tree = dest // "myrun.tree" in
......@@ -50,8 +50,8 @@ let diffseldsparse ~(alignment:nucleotide_phylip workflow) ~(tree: _ workflow) ~
let chainname = dest // "myrun" in
let n_cycles = if (n_cycles > 200) then 200 else n_cycles in
let script_r = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
workflow ~descr:("convergence_detection.run_diffseldsparse." ^ descr) [
docker env (
Workflow.shell ~descr:("convergence_detection.run_diffseldsparse." ^ descr) [
within_container img (
and_list [
mkdir_p dest;
cd tmp;
......@@ -73,14 +73,13 @@ let diffseldsparse ~(alignment:nucleotide_phylip workflow) ~(tree: _ workflow) ~
)
]
let check_conv run_diffseldsparse : text_file directory workflow =
let env = Env.env_r in
let check_conv run_diffseldsparse : directory pworkflow =
let script = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
let trace = run_diffseldsparse / selector["myrun.trace"] in
let trace = Workflow.select run_diffseldsparse ["myrun.trace"] in
let out = dest // "out.html" in
let nb_new_iterations = dest // "new_iterations.txt" in
workflow ~descr:"convergence_detection.DiffselMCMCConvergenceAnalysis" [
docker env (
Workflow.shell ~descr:"convergence_detection.DiffselMCMCConvergenceAnalysis" [
within_container Env.env_r (
and_list [
mkdir_p tmp ;
mkdir_p dest ;
......@@ -99,7 +98,7 @@ let check_conv run_diffseldsparse : text_file directory workflow =
)
]
let selector run_diffseldsparse : text_file workflow =
let selector run_diffseldsparse : text_file pworkflow =
let package = tmp // "diffsel_script_utils.py" in
let script = tmp // "diffsel_analyze_result.py" in
let tmp_tree = tmp // "myrun.tree" in
......@@ -108,8 +107,8 @@ let selector run_diffseldsparse : text_file workflow =
let dep_ali = (dep run_diffseldsparse) // "myrun.ali" in
let chainname = (dep run_diffseldsparse) // "myrun" in
let out = dest in
workflow ~descr:"convergence_detection.parse_diffseldsparse" [
docker env (
Workflow.shell ~descr:"convergence_detection.parse_diffseldsparse" [
within_container img (
and_list [
mkdir_p tmp ;
cd tmp ;
......
open Bistro.Std
open Bistro
open File_formats
val diffseldsparse :
alignment:nucleotide_phylip workflow ->
tree : _ workflow ->
w_every : int ->
n_cycles: int ->
alignment:nucleotide_phylip pworkflow ->
tree:_ pworkflow ->
w_every:int ->
n_cycles:int ->
?descr:string ->
unit ->