Commit 1337511d authored by boussau's avatar boussau
Browse files

Merged by hand calc_multinomial.py.

parents 8e6f0088 b45413d6
......@@ -22,6 +22,6 @@ RUN apt-get update && \
RUN pip install --upgrade pip
RUN pip install ete3==3.0.0b35
RUN pip install scipy==0.19.1
RUN pip install scipy==1.1.0
RUN pip install biopython==1.72
......@@ -4,7 +4,7 @@ set -e
IMAGE_NAME=python_basics
DOCKERFILE_DIR=.
TAG=07172018
TAG=07182018
REPO=carinerey/$IMAGE_NAME:$TAG
docker build -t $REPO -f ./Dockerfile $DOCKERFILE_DIR
......
......@@ -21,8 +21,6 @@ let bash_script args code =
in
seq ~sep:"\n" [ prelude ; string code ]
let conf_file_bppseqgen ~tree ~out ~nb_sites ~config =
seq ~sep:"\n" (
[
......@@ -42,14 +40,12 @@ let bppseqgen ?(descr="") ~nb_sites ~tree ~config : nucleotide_fasta workflow =
mkdir_p dest;
cmd "cat" ~stdout:config_f [(file_dump (conf_file_bppseqgen ~tree ~out ~nb_sites ~config))];
cmd "bppseqgen" [
assign "param" config_f;
]
assign "param" config_f;
]
]
)
] / selector ["seq.fa"]
let conf_file_bppseqgen_multi_profiles ~tree ~profile_f ~ne_c ~config ~nb_sites_per_profile =
seq ~sep:"\n" (
[
......@@ -63,8 +59,6 @@ let conf_file_bppseqgen_multi_profiles ~tree ~profile_f ~ne_c ~config ~nb_sites_
@ config
)
let bppseqgen_multi_profiles_script ~config ~nb_combis ~out ~profile_f =
let vars = [
"FINAL_OUT", ident out ;
......@@ -145,9 +139,9 @@ let conf_file_bppseqman_fa2phy ~fna =
assign "output.sequence.file" dest ;
assign "output.sequence.format" (string "Phylip") ;
string {| input.alignment = true
input.sequence.remove_stop_codons = no
input.sequence.sites_to_use = all
sequence.manip =
input.sequence.remove_stop_codons = no
input.sequence.sites_to_use = all
sequence.manip =
|}
]
......@@ -162,7 +156,7 @@ let fa2phy ~(fna: nucleotide_fasta workflow) : nucleotide_phylip workflow =
let paste_fna ~(fna_l: nucleotide_fasta workflow list) : nucleotide_fasta workflow =
workflow ~descr:"bppsuite.catfasta" [
cmd "catfasta2phyml.pl" ~stdout:dest ~env (List.concat [
[string "-f" ] ;
List.map fna_l ~f:(fun fna -> dep fna) ;
])
[string "-f" ] ;
List.map fna_l ~f:(fun fna -> dep fna) ;
])
]
......@@ -42,7 +42,7 @@ type dataset_res = {
}
let merge_results ~res_by_tools : text_file workflow =
let env = docker_image ~account:"carinerey" ~name:"python_basics" ~tag:"07172018" () in
let env = docker_image ~account:"carinerey" ~name:"python_basics" ~tag:"07182018" () in
let command = List.map res_by_tools ~f:(fun res ->
let w = match res with
| `Pcoc d -> Pcoc.results d
......@@ -75,49 +75,48 @@ let merge_results ~res_by_tools : text_file workflow =
in
workflow ~descr:"convergence_detection.merge_results" [
cmd "python" ~env [
file_dump (string Scripts.merge_det_results) ;
opt "-o" ident dest ;
seq ~sep:" " command ;
] ;
file_dump (string Scripts.merge_det_results) ;
opt "-o" ident dest ;
seq ~sep:" " command ;
] ;
]
let plot_merge_results ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv : svg workflow =
let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"06212018" () in
(* use of pcoc env due to its working X server for dra plot with ete3 *)
let meths = List.map res_by_tools ~f:(fun res ->
let opt = match res with
| `Pcoc _ -> "PCOC,PC,OC"
| `Pcoc_gamma _ -> "PCOC_gamma,PC_gamma,OC_gamma,"
| `Pcoc_C60 _ -> "PCOC_C60,PC_C60,OC_C60,"
| `Diffsel _ -> "Diffsel_mean,Diffsel_max"
| `Diffsel_bis _ -> "Diffsel_bis_mean,Diffsel_bis_max"
| `Identical_LG _ -> "Identical_LG08"
| `Identical_WAG _ -> "Identical_WAG01"
| `Topological_LG _ -> "Topological_LG08"
| `Topological_WAG _ -> "Topological_WAG01"
| `Tdg09 _ -> "Tdg09_1-FDR,Tdg09_prob_post"
| `Multinomial _ -> ""
in
string opt
) |> seq ~sep:","
let opt = match res with
| `Pcoc _ -> "PCOC,PC,OC"
| `Pcoc_gamma _ -> "PCOC_gamma,PC_gamma,OC_gamma,"
| `Pcoc_C60 _ -> "PCOC_C60,PC_C60,OC_C60,"
| `Diffsel _ -> "Diffsel_mean,Diffsel_max"
| `Diffsel_bis _ -> "Diffsel_bis_mean,Diffsel_bis_max"
| `Identical_LG _ -> "Identical_LG08"
| `Identical_WAG _ -> "Identical_WAG01"
| `Topological_LG _ -> "Topological_LG08"
| `Topological_WAG _ -> "Topological_WAG01"
| `Tdg09 _ -> "Tdg09_1-FDR,Tdg09_prob_post"
| `Multinomial _ -> "Mutinomial_LRT"
in
string opt
) |> seq ~sep:","
in
let meths_t = List.map res_by_tools ~f:(fun res ->
let opt = match res with
| `Pcoc _ -> "PCOC:0.99,PC:0.99,OC:0.99"
| `Pcoc_gamma _ -> "PCOC_gamma:0.99,PC_gamma:0.99,OC_gamma:0.99"
| `Pcoc_C60 _ -> "PCOC_C60:0.99,PC_C60:0.99,OC_C60:0.99"
| `Diffsel _ -> "Diffsel_mean:0.11,Diffsel_max:0.9"
| `Diffsel_bis _ -> "Diffsel_bis_mean:0.11,Diffsel_bis_max:0.9"
| `Identical_LG _ -> "Identical_LG08:0.9"
| `Identical_WAG _ -> "Identical_WAG01:0.9"
| `Topological_LG _ -> "Topological_LG08:0.9"
| `Topological_WAG _ -> "Topological_WAG01:0.9"
| `Tdg09 _ -> "Tdg09_1-FDR:0.9,Tdg09_prob_post:0.9"
| `Multinomial _ -> ""
in
string opt
) |> seq ~sep:","
let opt = match res with
| `Pcoc _ -> "PCOC:0.99,PC:0.99,OC:0.99"
| `Pcoc_gamma _ -> "PCOC_gamma:0.99,PC_gamma:0.99,OC_gamma:0.99"
| `Pcoc_C60 _ -> "PCOC_C60:0.99,PC_C60:0.99,OC_C60:0.99"
| `Diffsel _ -> "Diffsel_mean:0.11,Diffsel_max:0.9"
| `Diffsel_bis _ -> "Diffsel_bis_mean:0.11,Diffsel_bis_max:0.9"
| `Identical_LG _ -> "Identical_LG08:0.9"
| `Identical_WAG _ -> "Identical_WAG01:0.9"
| `Topological_LG _ -> "Topological_LG08:0.9"
| `Topological_WAG _ -> "Topological_WAG01:0.9"
| `Tdg09 _ -> "Tdg09_1-FDR:0.9,Tdg09_prob_post:0.9"
| `Multinomial _ -> "Mutinomial_LRT:0.9"
in
string opt
) |> seq ~sep:","
in
let package_diffsel_script_utils = tmp // "diffsel_script_utils.py" in
let package_plot_data = tmp // "plot_data.py" in
......
......@@ -28,16 +28,16 @@ type dataset_res = {
res_by_tools: result list ;
merged_results : text_file workflow ;
plot_merged_results : svg workflow
}
}
val merge_results :
res_by_tools : result list ->
text_file workflow
val plot_merge_results :
plot_all_sites: bool ->
res_by_tools : result list ->
tree:nhx workflow ->
faa:aminoacid_fasta workflow ->
tsv:text_file workflow ->
svg workflow
plot_all_sites: bool ->
res_by_tools : result list ->
tree:nhx workflow ->
faa:aminoacid_fasta workflow ->
tsv:text_file workflow ->
svg workflow
......@@ -37,57 +37,56 @@ nonhomogeneous = general
rate_distribution=Constant()
|}
let bpp_config_H0_F= seq ~sep:"\n" [
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))" ] ;
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))" ] ;
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
let bpp_config_HaPCOC_F = seq ~sep:"\n" [
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))" ] ;
seq [string "modelT=OneChange(model=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2))), register=DnDs, numReg=2)" ] ;
seq [string "modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))" ] ;
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))" ] ;
seq [string "modelT=OneChange(model=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2))), register=DnDs, numReg=2)" ] ;
seq [string "modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))" ] ;
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
let bpp_config_HaPC_F = seq ~sep:"\n" [
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))" ] ;
seq [string "modelT=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))" ] ;
seq [string "modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))" ] ;
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))" ] ;
seq [string "modelT=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))" ] ;
seq [string "modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))" ] ;
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
let bpp_config_H0_F_Ne = seq ~sep:"\n" [
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))" ] ;
seq [string "model2=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_C))" ] ;
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))" ] ;
seq [string "model2=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_C))" ] ;
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
let bpp_config_HaPCOC_F_Ne = seq ~sep:"\n" [
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))" ] ;
seq [string "modelT=OneChange(model=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2))), register=DnDs, numReg=2, Ns=$(NE_T))" ] ;
seq [string "modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)), Ns=$(NE_C))" ] ;
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))" ] ;
seq [string "modelT=OneChange(model=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2))), register=DnDs, numReg=2, Ns=$(NE_T))" ] ;
seq [string "modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)), Ns=$(NE_C))" ] ;
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
let bpp_config_HaPC_F_Ne = seq ~sep:"\n" [
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))" ] ;
seq [string "modelT=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))" ] ;
seq [string "modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)), Ns=$(NE_C))" ] ;
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))" ] ;
seq [string "modelT=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))" ] ;
seq [string "modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)), Ns=$(NE_C))" ] ;
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
let bpp_config_F nodes hyp = [
string bpp_config_base ;
insert nodes ;
match hyp with
| H0 -> bpp_config_H0_F
| HaPC -> bpp_config_HaPC_F
| HaPCOC -> bpp_config_HaPCOC_F
| H0_NeSmall -> bpp_config_H0_F_Ne
| HaPC_NeSmall -> bpp_config_HaPC_F_Ne
| HaPCOC_NeSmall -> bpp_config_HaPCOC_F_Ne
| H0_NeBig -> bpp_config_H0_F_Ne
| HaPC_NeBig -> bpp_config_HaPCOC_F_Ne
| HaPCOC_NeBig -> bpp_config_HaPCOC_F_Ne
;
match hyp with
| H0 -> bpp_config_H0_F
| HaPC -> bpp_config_HaPC_F
| HaPCOC -> bpp_config_HaPCOC_F
| H0_NeSmall -> bpp_config_H0_F_Ne
| HaPC_NeSmall -> bpp_config_HaPC_F_Ne
| HaPCOC_NeSmall -> bpp_config_HaPCOC_F_Ne
| H0_NeBig -> bpp_config_H0_F_Ne
| HaPC_NeBig -> bpp_config_HaPC_F_Ne
| HaPCOC_NeBig -> bpp_config_HaPCOC_F_Ne
;
]
......@@ -14,9 +14,9 @@ let repo ~preview dataset_l =
let tree_prefix = dataset.tree_prefix in
let repo_ready_data = Ready_dataset.repo dataset.dataset in
let repo_raw_data = if preview then Raw_dataset.repo ~prefix:model_prefix (Ready_dataset.to_raw dataset.dataset) else [] in
List.concat [
Repo.shift "simulated_data" (Repo.shift (tree_prefix ^"_"^model_prefix) repo_raw_data);
Repo.shift "simulated_data_debug" (Repo.shift tree_prefix (Repo.shift model_prefix repo_ready_data));
]
List.concat [
Repo.shift "simulated_data" (Repo.shift (tree_prefix ^"_"^model_prefix) repo_raw_data);
Repo.shift "simulated_data_debug" (Repo.shift tree_prefix (Repo.shift model_prefix repo_ready_data));
]
)
|> List.concat
......@@ -9,7 +9,3 @@ type output_parse_input_tree =
| Tree4detect
| Tree4simu
| Tree_diffsel
......@@ -21,11 +21,11 @@ let diffsel ~(phy_n:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:in
cmd "cp" [dep phy_n; tmp_ali]; (* required dep to link the file in the env *)
cmd "cp" [dep tree; tmp_tree]; (* required dep to link the file in the env *)
cmd "/diffsel/_build/diffsel" [
opt "-t" ident tmp_tree;
opt "-d" ident tmp_ali ;
opt "-ncond" int 2 ;
opt "-x" seq [ int w_every; string " "; int n_cycles];
ident chainname ;
opt "-t" ident tmp_tree;
opt "-d" ident tmp_ali ;
opt "-ncond" int 2 ;
opt "-x" seq [ int w_every; string " "; int n_cycles];
ident chainname ;
];
]
)
......
......@@ -11,5 +11,5 @@ val diffsel :
[`diffsel] directory workflow
val selector :
[`diffsel] directory workflow ->
text_file workflow
[`diffsel] directory workflow ->
text_file workflow
......@@ -25,4 +25,3 @@ class type nucleotide_phylip = object
inherit text_file
method format : [`Nucleotide]
end
......@@ -4,12 +4,9 @@ open Bistro.EDSL
open Bistro_bioinfo.Std
open File_formats
let assign k v =
seq ~sep:"=" [ string k ; v ]
let conf_file_bppml ~tree ~faa ~out ~config =
seq ~sep:"\n" (
[
......@@ -45,13 +42,12 @@ let bppml ?(descr="") ~faa ~tree ~config : _ workflow =
mkdir_p dest;
cmd "cat" ~stdout:config_f [(file_dump (conf_file_bppml ~tree ~faa ~out ~config ))];
cmd "bppml" [
assign "param" config_f;
]
assign "param" config_f;
]
]
)
]
let conf_file_bppancestor ~tree ~faa ~out ~config =
seq ~sep:"\n" (
[
......@@ -76,7 +72,6 @@ let conf_file_bppancestor ~tree ~faa ~out ~config =
@ config
)
let bppancestor ?(descr="") ~faa ~tree ~config : _ workflow =
let env = docker_image ~account:"carinerey" ~name:"bppsuite" ~tag:"07052018" () in
let config_f = dest // "config_bppancestor.bpp" in
......@@ -87,34 +82,30 @@ let bppancestor ?(descr="") ~faa ~tree ~config : _ workflow =
mkdir_p dest;
cmd "cat" ~stdout:config_f [(file_dump (conf_file_bppancestor ~tree ~faa ~out ~config))];
cmd "bppancestor" [
assign "param" config_f;
]
assign "param" config_f;
]
]
)
]
let identical ~(tree_id:_ workflow) ~(tree_sc:_ workflow) ~(faa:aminoacid_fasta workflow) ~prot_model : [`identical] directory workflow =
let config = [assign "model" (string prot_model)] in
let out1 = dest // "out1.tsv" in
let out2 = dest // "out2.tsv" in
let run_bppml = bppml ~descr:"" ~tree:tree_id ~config ~faa in
let run_bppancestor = bppancestor ~descr:"" ~tree:tree_id ~faa ~config in
let proba = run_bppancestor / selector ["sites.tsv"] in
let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"07022018" () in
workflow ~descr:("identical."^prot_model) [
mkdir dest ;
cmd "python" ~env [
file_dump (string Scripts.calc_identical) ;
opt "-t" dep tree_sc;
opt "-a" dep faa;
opt "-p" dep proba;
opt "-o" ident out1 ;
opt "-o2" ident out2 ;
file_dump (string Scripts.calc_identical) ;
opt "-t" dep tree_sc;
opt "-a" dep faa;
opt "-p" dep proba;
opt "-o" ident out1 ;
opt "-o2" ident out2 ;
]
]
let results run_identical : text_file workflow =
run_identical / selector ["out1.tsv"]
......@@ -5,17 +5,16 @@ open Bistro_bioinfo.Std
open File_formats
let multinomial ~(tree_id:_ workflow) ~(tree_sc:_ workflow) ~(faa:aminoacid_fasta workflow) : [`multinomial] directory workflow =
let env = docker_image ~account:"carinerey" ~name:"python_basics" ~tag:"07172018" () in
let env = docker_image ~account:"carinerey" ~name:"python_basics" ~tag:"07182018" () in
workflow ~descr:("calc_multinomial") [
mkdir_p dest;
cmd "python" ~env [
file_dump (string Scripts.calc_multinomial) ;
opt "-t" dep tree_sc;
opt "-a" dep faa;
opt "-o" ident (dest // "out.tsv") ;
file_dump (string Scripts.calc_multinomial) ;
opt "-t" dep tree_sc;
opt "-a" dep faa;
opt "-o" ident (dest // "out.tsv") ;
]
]
let results w =
w / selector ["out.tsv"]
......@@ -8,13 +8,13 @@ let pcoc ?plot_complete ?gamma ?catx_est ~(faa:aminoacid_fasta workflow) ~(tree
let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"07022018" () in
workflow ~descr:"convergence_detection.pcoc" [
cmd "pcoc_det.py" ~env [
opt "-t" dep tree;
opt "-m" string "-";
opt "-aa" dep faa ;
opt "-o" ident dest ;
option ( flag string "--gamma" ) gamma;
option ( opt "-CATX_est" int) catx_est;
option ( flag string "--plot --plot_complete_ali" ) plot_complete;
opt "-t" dep tree;
opt "-m" string "-";
opt "-aa" dep faa ;
opt "-o" ident dest ;
option ( flag string "--gamma" ) gamma;
option ( opt "-CATX_est" int) catx_est;
option ( flag string "--plot --plot_complete_ali" ) plot_complete;
]
]
......
......@@ -40,7 +40,7 @@ let parse_input_data indir =
} in
[dataset]
else
failwith ({|More than 2 files in |} ^ (Filename.concat indir dataset_prefix ))
failwith ({|More than 2 files in |} ^ (Filename.concat indir dataset_prefix ))
)
|> List.concat
......@@ -57,12 +57,12 @@ let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile_f ~
(* with several profiles or couples of profiles *)
let config_p = Convergence_hypothesis.bpp_config_F nodes model in
let ne_c = match model with
| H0_NeSmall -> 0.5
| HaPCOC_NeSmall -> 0.5
| HaPC_NeSmall -> 0.5
| H0_NeBig -> 6.
| HaPC_NeBig -> 6.
| HaPCOC_NeBig -> 6.
| H0_NeSmall -> 0.1
| HaPCOC_NeSmall -> 0.1
| HaPC_NeSmall -> 0.1
| H0_NeBig -> 10.
| HaPC_NeBig -> 10.
| HaPCOC_NeBig -> 10.
| _ -> 1.
in
let run_fna = Bppsuite.bppseqgen_multi_profiles ~descr ~nb_sites ~tree ~config:config_p ~profile_f ~ne_c in
......@@ -249,23 +249,23 @@ let simulation_main ~outdir ?(ns = 0) ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn
let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~preview ~fast_mode ~tree_dir ~profile_fn ~use_concat () =
let trees = Array.to_list @@ Sys.readdir tree_dir in
let repo = List.map trees ~f:(fun tree ->
let trees = [tree] in
let tree_prefix = Filename.chop_extension tree in
let indir_dataset_l = if indir = "" then [] else parse_input_data indir in
let dataset_l =
derive_sim ~tree_dir ~trees ~profile_fn ~preview ~use_concat ~ns
@ indir_dataset_l in
let dataset_results_l = derive_det ~dataset_l ~preview ~fast_mode in
let post_analyses = Post_analyses.post_analyses_of_dataset_results_l ~dataset_results_l in
let repo_per_tree = [
Dataset.repo dataset_l ~preview ;
repo_of_dataset_results_l ~dataset_results_l ;
Repo.shift tree_prefix (Post_analyses.repo_of_post_analyses ~prefix:tree_prefix ~post_analyses);
] |> List.concat
in
repo_per_tree
)
|> List.concat
let trees = [tree] in
let tree_prefix = Filename.chop_extension tree in
let indir_dataset_l = if indir = "" then [] else parse_input_data indir in
let dataset_l =
derive_sim ~tree_dir ~trees ~profile_fn ~preview ~use_concat ~ns
@ indir_dataset_l in
let dataset_results_l = derive_det ~dataset_l ~preview ~fast_mode in
let post_analyses = Post_analyses.post_analyses_of_dataset_results_l ~dataset_results_l in
let repo_per_tree = [
Dataset.repo dataset_l ~preview ;
repo_of_dataset_results_l ~dataset_results_l ;
Repo.shift tree_prefix (Post_analyses.repo_of_post_analyses ~prefix:tree_prefix ~post_analyses);
] |> List.concat
in
repo_per_tree
)
|> List.concat
in
Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo
......
......@@ -12,19 +12,19 @@ type t_choices = {
t_choices_complete: text_file workflow ;
t_choices_max: text_file workflow ;
t_choices_plot: text_file workflow ;
}
}
type simu_infos = {
simu_infos: text_file workflow option ;
model_prefix: string ;
tree_prefix: string ;
}
}
type post_analyses = {
t_choices : t_choices option;
simu_infos_l : simu_infos list;
simu_infos_plot : text_file workflow ;
}
}
let is_hyp ~hyp (dataset_results :dataset_res) =
......@@ -35,7 +35,7 @@ let make_t_choices ~h0_merged_results ~ha_merged_results : post_analyses_dir dir
let env = docker_image ~account:"carinerey" ~name:"r_basics" ~tag:"07162018" () in
let out = dest // "out" in
workflow ~descr:"post_analyses.t_choices" [
docker env (
docker env (
and_list [
mkdir_p dest ;
cmd "Rscript" [
......@@ -43,60 +43,60 @@ let make_t_choices ~h0_merged_results ~ha_merged_results : post_analyses_dir dir
opt "--H0" dep h0_merged_results;
opt "--Ha" dep ha_merged_results;
opt "--out " ident out;
];
])
];
])
]
let make_simu_infos ?(descr="") ?(fna_infos) ~faa ~tree_sc : text_file workflow =
let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"07022018" () in
workflow ~descr:("post_analyses.simu_infos." ^ descr) [
cmd "python" ~env [
file_dump (string Scripts.calc_simu_infos) ;
opt "--faa" dep faa;
opt "--tree" dep tree_sc;
option ( opt "--fna_infos" dep) fna_infos;
opt "--output " ident dest;
file_dump (string Scripts.calc_simu_infos) ;
opt "--faa" dep faa;
opt "--tree" dep tree_sc;
option ( opt "--fna_infos" dep) fna_infos;
opt "--output " ident dest;
];
]
let group_simu_infos ~simu_infos_l : simu_infos directory workflow =
let env = docker_image ~account:"carinerey" ~name:"r_basics" ~tag:"07162018" () in
let cmd_cp_l = List.map simu_infos_l ~f:(fun s ->
match s.simu_infos with
match s.simu_infos with
|