Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit 2a8d34e1 authored by Carine Rey's avatar Carine Rey
Browse files

add type profile + split profile_fn in 3 classes at the beginning of the pipeline

parent 525b1d4f
......@@ -46,11 +46,11 @@ let bppseqgen ?(descr="") ~nb_sites ~tree ~config : nucleotide_fasta workflow =
)
] / selector ["seq.fa"]
let conf_file_bppseqgen_multi_profiles ~tree ~profile_f ~ne_c ~ne_a ~config ~nb_sites_per_profile =
let conf_file_bppseqgen_multi_profiles ~tree ~profile_w ~ne_c ~ne_a ~config ~nb_sites_per_profile =
seq ~sep:"\n" (
[
assign "input.tree.file" (dep tree) ;
assign "PROFILE_F" (dep profile_f) ;
assign "PROFILE_F" (dep profile_w) ;
assign "number_of_sites" (int nb_sites_per_profile) ;
assign "NE_1" (float ne_a) ;
assign "NE_C" (float ne_c) ;
......@@ -59,11 +59,11 @@ let conf_file_bppseqgen_multi_profiles ~tree ~profile_f ~ne_c ~ne_a ~config ~nb_
@ config
)
let bppseqgen_multi_profiles_script ~config ~nb_combis ~out ~profile_f =
let bppseqgen_multi_profiles_script ~config ~nb_combis ~out ~profile_w =
let vars = [
"FINAL_OUT", ident out ;
"PARAM", config ;
"PROFILE_F", dep profile_f ;
"PROFILE_F", dep profile_w ;
"NB_COMBI_PROFILES", int nb_combis ;
]
in
......@@ -91,7 +91,7 @@ let bppseqgen_multi_profiles_script ~config ~nb_combis ~out ~profile_f =
|}
let bppseqgen_multi_profiles ?(descr="") ~profile_f ~nb_sites ~tree ~config ~ne_c ~ne_a : bppseqgen_multi_profiles directory workflow =
let bppseqgen_multi_profiles ?(descr="") ~profile_w ~nb_sites ~tree ~config ~ne_c ~ne_a : bppseqgen_multi_profiles directory workflow =
let nb_sites_per_profile = 1 in
let nb_combis = Pervasives.(nb_sites / nb_sites_per_profile) in
let config_f = dest // "config.bpp" in
......@@ -102,8 +102,8 @@ let bppseqgen_multi_profiles ?(descr="") ~profile_f ~nb_sites ~tree ~config ~ne_
mkdir_p dest;
mkdir_p tmp;
cd tmp;
cmd "cat" ~stdout:config_f [(file_dump (conf_file_bppseqgen_multi_profiles ~tree ~profile_f ~config ~ne_c ~ne_a ~nb_sites_per_profile))];
cmd "bash" [(file_dump (bppseqgen_multi_profiles_script ~config:config_f ~nb_combis ~out ~profile_f))];
cmd "cat" ~stdout:config_f [(file_dump (conf_file_bppseqgen_multi_profiles ~tree ~profile_w ~config ~ne_c ~ne_a ~nb_sites_per_profile))];
cmd "bash" [(file_dump (bppseqgen_multi_profiles_script ~config:config_f ~nb_combis ~out ~profile_w))];
]
)
]
......
......@@ -13,7 +13,7 @@ val bppseqgen :
val bppseqgen_multi_profiles :
?descr : string ->
profile_f: _ workflow ->
profile_w: _ workflow ->
nb_sites:int ->
tree:nhx workflow ->
config:Bistro.Template.t list ->
......
......@@ -42,7 +42,7 @@ type dataset_res = {
}
let merge_results ~res_by_tools : text_file workflow =
let env = docker_image ~account:"carinerey" ~name:"python_basics" ~tag:"07182018" () in
let env = docker_image ~account:"carinerey" ~name:"python_basics" ~tag:"07252018" () in
let command = List.map res_by_tools ~f:(fun res ->
let w = match res with
| `Pcoc d -> Pcoc.results d
......
......@@ -5,6 +5,7 @@ open Bistro.Std
open File_formats
open Defs
open Convergence_detection
open Profile
let parse_input_data indir =
let datasets = Array.to_list @@ Sys.readdir indir in
......@@ -44,7 +45,7 @@ let parse_input_data indir =
)
|> List.concat
let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile_f ~preview ~ns =
let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns =
let model_prefix = Convergence_hypothesis.string_of_model model in
let nb_sites = if ns = 0 then (if preview then 20 else 50) else ns in
let nodes = Tree_dataset.nodes tree_dataset model in
......@@ -73,7 +74,8 @@ let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile_f ~
| H0_SmallNeInBigNe -> 10.
| _ -> 1.
in
let run_fna = Bppsuite.bppseqgen_multi_profiles ~descr ~nb_sites ~tree ~config:config_p ~profile_f ~ne_c ~ne_a in
let profile_w = profile.profile_w in
let run_fna = Bppsuite.bppseqgen_multi_profiles ~descr ~nb_sites ~tree ~config:config_p ~profile_w ~ne_c ~ne_a in
let fna = Bppsuite.bppseqgen_multi_profiles_get_fa run_fna in
let fna_infos = Some (Bppsuite.bppseqgen_multi_profiles_get_info run_fna) in
......@@ -81,7 +83,7 @@ let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile_f ~
let ready_dataset = { Ready_dataset.input_tree = input_tree ; tree_dataset ; fna; faa; fna_infos} in
{ Dataset.model_prefix; is_real= false; tree_prefix; dataset = ready_dataset }
let derive_from_tree ~tree_dir ~tree ~profile_f ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test =
let derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test =
let tree_prefix = Filename.chop_extension tree in
let input_tree = input (Filename.concat tree_dir tree) in
let tree_dataset = Tree_dataset.prepare input_tree in
......@@ -122,18 +124,17 @@ let derive_from_tree ~tree_dir ~tree ~profile_f ~preview ~use_concat ~ns ~no_Ne
] |> List.concat
in
let dataset_per_hypo = List.map models ~f:(fun model ->
derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile_f ~preview ~ns
derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns
) in
let ready_dataset_H0 = (derive_from_model ~model:H0 ~input_tree ~tree_dataset ~tree_prefix ~profile_f ~preview ~ns).dataset in
let ready_dataset_HaPCOC = (derive_from_model ~model:HaPCOC ~input_tree ~tree_dataset ~tree_prefix ~profile_f ~preview ~ns).dataset in
let ready_dataset_H0 = (derive_from_model ~model:H0 ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns).dataset in
let ready_dataset_HaPCOC = (derive_from_model ~model:HaPCOC ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns).dataset in
let concat_H0Ha = {Dataset.model_prefix="H0+HaPCOC"; tree_prefix; is_real = false; dataset = Ready_dataset.paste ready_dataset_H0 ready_dataset_HaPCOC} in
let dataset_concat_hypos = if use_concat then [concat_H0Ha;] else [] in
List.concat [ dataset_per_hypo ; dataset_concat_hypos ]
let derive_sim ~tree_dir ~trees ~profile_fn ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test =
let profile_f = input profile_fn in
let derive_sim ~tree_dir ~trees ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test =
List.map trees ~f:(fun tree ->
derive_from_tree ~tree_dir ~tree ~profile_f ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test)
derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test)
|> List.concat
......@@ -257,36 +258,16 @@ let derive_det ~dataset_l ~preview ~fast_mode =
List.map dataset_l ~f:(fun dataset ->
derive_from_dataset ~preview ~dataset ~fast_mode)
let logger =
Logger.tee [
Console_logger.create () ;
Dot_output.create "dag.dot" ; (*dot -Tpdf example/dag.dot -o dag.pdf*)
Bistro_utils.Html_logger.create "report.html" ;
]
let detection_main ~outdir ~indir ?(np = 2) ?(mem = 2) ~preview ~fast_mode () =
let dataset_l = parse_input_data indir in
let dataset_results_l = derive_det ~dataset_l ~preview ~fast_mode in
let repo = repo_of_dataset_results_l ~dataset_results_l in
Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo
let simulation_main ~outdir ?(ns = 0) ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn ~preview ~use_concat ~no_Ne ~no_HaPC () =
let derive_profile ?(indir = "") ?(ns = 0) ~preview ~fast_mode ~no_Ne ~ne_test ~no_HaPC ~tree_dir ~profile ~use_concat ~only_simu () =
let trees = Array.to_list @@ Sys.readdir tree_dir in
let dataset_l = derive_sim ~tree_dir ~trees ~profile_fn ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test:false in
let repo = Dataset.repo dataset_l ~preview in
Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo
let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~preview ~fast_mode ~no_Ne ~ne_test ~no_HaPC ~tree_dir ~profile_fn ~use_concat ~only_simu () =
(* simulated trees *)
let trees = Array.to_list @@ Sys.readdir tree_dir in
let simu_dataset_l = derive_sim ~tree_dir ~trees ~profile_fn ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test in
let simu_dataset_l = derive_sim ~tree_dir ~trees ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test in
let post_analyses_simu = Post_analyses.post_analyses_simu_of_simu_dataset_l ~simu_dataset_l in
let repo_of_post_analyses_simu = Post_analyses.repo_of_post_analyses_simu ~post_analyses_simu in
let repo_and_post_analyses_per_tree_simu = List.map trees ~f:(fun tree -> (*to keep together all models per tree*)
let trees = [tree] in
let tree_prefix = Filename.chop_extension tree in
let dataset_l =
derive_sim ~tree_dir ~trees ~profile_fn ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test in
derive_sim ~tree_dir ~trees ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test in
let dataset_results_l =
if only_simu then
[]
......@@ -305,8 +286,41 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe
in
let all_repo_per_tree_simu = List.map repo_and_post_analyses_per_tree_simu ~f:(fun (r,p) -> r) |> List.concat in
let all_post_analyses_per_tree = List.map repo_and_post_analyses_per_tree_simu ~f:(fun (r,p) -> p) in
let profile = Filename.chop_extension profile_fn in
let repo_post_analyses_all_trees = Post_analyses.repo_post_analyses_all_trees_of_all_post_analyses_per_tree ~all_post_analyses_per_tree ~profile in
let profile_prefix = profile.profile_n in
let repo_post_analyses_all_trees = Post_analyses.repo_post_analyses_all_trees_of_all_post_analyses_per_tree ~all_post_analyses_per_tree ~profile_prefix in
let repo = repo_of_post_analyses_simu @ all_repo_per_tree_simu @repo_post_analyses_all_trees in
Repo.shift profile_prefix repo
let logger =
Logger.tee [
Console_logger.create () ;
Dot_output.create "dag.dot" ; (*dot -Tpdf example/dag.dot -o dag.pdf*)
Bistro_utils.Html_logger.create "report.html" ;
]
let detection_main ~outdir ~indir ?(np = 2) ?(mem = 2) ~preview ~fast_mode () =
let dataset_l = parse_input_data indir in
let dataset_results_l = derive_det ~dataset_l ~preview ~fast_mode in
let repo = repo_of_dataset_results_l ~dataset_results_l in
Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo
let simulation_main ~outdir ?(ns = 0) ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn ~preview ~use_concat ~no_Ne ~no_HaPC () =
let profile_w = input profile_fn in
let profile_n = Filename.chop_extension profile_fn in
let profile = {profile_w; profile_n} in
let trees = Array.to_list @@ Sys.readdir tree_dir in
let dataset_l = derive_sim ~tree_dir ~trees ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test:false in
let repo = Dataset.repo dataset_l ~preview in
Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo
let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~preview ~fast_mode ~no_Ne ~ne_test ~no_HaPC ~tree_dir ~profile_fn ~use_concat ~only_simu () =
(* simulated trees *)
let profile_l = Profile.profile_l_of_splitted_profile (Profile.split_profile (input profile_fn)) in
let sim_repo_l = List.map profile_l ~f:(fun profile ->
derive_profile ~indir ~ns ~preview ~fast_mode ~no_Ne ~ne_test ~no_HaPC ~tree_dir ~profile ~use_concat ~only_simu ()
) |> List.concat in
(* real trees *)
let indir_dataset_l = if indir = "" then [] else parse_input_data indir in
let dataset_l = indir_dataset_l in
......@@ -321,7 +335,7 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe
repo_of_dataset_results_l ~dataset_results_l ;
] |> List.concat
in
let repo = repo_of_post_analyses_simu @ repo_real_trees @ all_repo_per_tree_simu @repo_post_analyses_all_trees in
let repo = sim_repo_l @ repo_real_trees in
Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo
let simulation_command =
......
......@@ -163,7 +163,7 @@ let post_analyses_simu_of_simu_dataset_l ~simu_dataset_l =
let plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile : sens_spe_t_choices_plot directory workflow =
let plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile_prefix : sens_spe_t_choices_plot directory workflow =
let env = r_env in
let t_choices_dir = tmp // "t_choices_dir" in
let merged_results_dir = tmp // "merged_results_dir" in
......@@ -188,7 +188,7 @@ let plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile : sens_spe_
file_dump (string Scripts.plot_sens_spe_all_trees) ;
opt "--input_dir" ident t_choices_dir;
opt "--input_dir2" ident merged_results_dir;
opt "--profil" string profile;
opt "--profil" string profile_prefix;
opt "--out " ident out;
];]
]
......@@ -196,7 +196,7 @@ let plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile : sens_spe_
)
]
let repo_post_analyses_all_trees_of_all_post_analyses_per_tree ~profile ~all_post_analyses_per_tree =
let repo_post_analyses_all_trees_of_all_post_analyses_per_tree ~profile_prefix ~all_post_analyses_per_tree =
let t_choices_l = List.map all_post_analyses_per_tree ~f:(fun post_analyses_res ->
match post_analyses_res.t_choices with
| Some w -> [w]
......@@ -206,7 +206,7 @@ let repo_post_analyses_all_trees_of_all_post_analyses_per_tree ~profile ~all_pos
let dataset_results_l = List.map all_post_analyses_per_tree ~f:(fun post_analyses_res ->
post_analyses_res.dataset_results_l) |> List.concat
in
let sens_spe_t_choices_plot = plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile in
let sens_spe_t_choices_plot = plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile_prefix in
Repo.[
item ["sens_spe.pdf"] (sens_spe_t_choices_plot / selector ["out.sens_spe_auto_t.pdf"]);
item ["all_t_choices.pdf"] (sens_spe_t_choices_plot / selector ["out.max_t_per_tree.pdf"]);
......
open Core_kernel
open Bistro.Std
open Bistro.EDSL
open File_formats
type profile = {
profile_w : text_file workflow;
profile_n : string;
}
let split_profile profile_fn : text_file directory workflow =
let env = docker_image ~account:"carinerey" ~name:"python_basics" ~tag:"07252018" () in
let package = tmp // "diffsel_script_utils.py" in
let script = tmp // "generate_pairs.py" in
let prefix = dest // "profile" in
workflow ~descr:"parse_profile.split_profile" [
docker env (
and_list [
mkdir_p tmp ;
mkdir_p dest ;
cd tmp ;
cmd "cp" [ file_dump (string Scripts.diffsel_script_utils) ; package] ;
cmd "cp" [ file_dump (string Scripts.generate_pairs) ; script] ;
(*generate_pairs.py [options...] -o <output-prefix> <profiles-file> *)
cmd "python" [
string "generate_pairs.py" ;
opt "-o" ident prefix ;
dep profile_fn ;
]
]
)
]
let profile_l_of_splitted_profile splitted_profile =
let p0 = splitted_profile / selector ["profile_0.tsv"] in
let p1 = splitted_profile / selector ["profile_1.tsv"] in
let p2 = splitted_profile / selector ["profile_2.tsv"] in
[{profile_w=p0; profile_n="p0"};
{profile_w=p1; profile_n="p1"};
{profile_w=p2; profile_n="p2"};
]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment