Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
VEBER Philippe
codepi
Commits
b45413d6
Commit
b45413d6
authored
Jul 18, 2018
by
LANORE Vincent
Browse files
Auto indentation of everything + removed extra blank lines
parent
2d04fc2f
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
215 additions
and
235 deletions
+215
-235
lib/bppsuite.ml
lib/bppsuite.ml
+8
-14
lib/convergence_detection.ml
lib/convergence_detection.ml
+34
-35
lib/convergence_detection.mli
lib/convergence_detection.mli
+7
-7
lib/convergence_hypothesis.ml
lib/convergence_hypothesis.ml
+38
-39
lib/dataset.ml
lib/dataset.ml
+4
-4
lib/defs.ml
lib/defs.ml
+0
-4
lib/diffsel.ml
lib/diffsel.ml
+5
-5
lib/diffsel.mli
lib/diffsel.mli
+2
-2
lib/file_formats.ml
lib/file_formats.ml
+0
-1
lib/multinomial.ml
lib/multinomial.ml
+4
-5
lib/pcoc.ml
lib/pcoc.ml
+7
-7
lib/pipeline.ml
lib/pipeline.ml
+18
-18
lib/post_analyses.ml
lib/post_analyses.ml
+63
-63
lib/tamuri.ml
lib/tamuri.ml
+16
-17
lib/topological.ml
lib/topological.ml
+8
-13
lib/tree_dataset.ml
lib/tree_dataset.ml
+1
-1
No files found.
lib/bppsuite.ml
View file @
b45413d6
...
...
@@ -21,8 +21,6 @@ let bash_script args code =
in
seq
~
sep
:
"
\n
"
[
prelude
;
string
code
]
let
conf_file_bppseqgen
~
tree
~
out
~
nb_sites
~
config
=
seq
~
sep
:
"
\n
"
(
[
...
...
@@ -42,14 +40,12 @@ let bppseqgen ?(descr="") ~nb_sites ~tree ~config : nucleotide_fasta workflow =
mkdir_p
dest
;
cmd
"cat"
~
stdout
:
config_f
[(
file_dump
(
conf_file_bppseqgen
~
tree
~
out
~
nb_sites
~
config
))];
cmd
"bppseqgen"
[
assign
"param"
config_f
;
]
assign
"param"
config_f
;
]
]
)
]
/
selector
[
"seq.fa"
]
let
conf_file_bppseqgen_multi_profiles
~
tree
~
profile_f
~
ne_c
~
config
~
nb_sites_per_profile
=
seq
~
sep
:
"
\n
"
(
[
...
...
@@ -63,8 +59,6 @@ let conf_file_bppseqgen_multi_profiles ~tree ~profile_f ~ne_c ~config ~nb_sites_
@
config
)
let
bppseqgen_multi_profiles_script
~
config
~
nb_combis
~
out
~
profile_f
=
let
vars
=
[
"FINAL_OUT"
,
ident
out
;
...
...
@@ -145,9 +139,9 @@ let conf_file_bppseqman_fa2phy ~fna =
assign
"output.sequence.file"
dest
;
assign
"output.sequence.format"
(
string
"Phylip"
)
;
string
{
|
input
.
alignment
=
true
input
.
sequence
.
remove_stop_codons
=
no
input
.
sequence
.
sites_to_use
=
all
sequence
.
manip
=
input
.
sequence
.
remove_stop_codons
=
no
input
.
sequence
.
sites_to_use
=
all
sequence
.
manip
=
|
}
]
...
...
@@ -162,7 +156,7 @@ let fa2phy ~(fna: nucleotide_fasta workflow) : nucleotide_phylip workflow =
let
paste_fna
~
(
fna_l
:
nucleotide_fasta
workflow
list
)
:
nucleotide_fasta
workflow
=
workflow
~
descr
:
"bppsuite.catfasta"
[
cmd
"catfasta2phyml.pl"
~
stdout
:
dest
~
env
(
List
.
concat
[
[
string
"-f"
]
;
List
.
map
fna_l
~
f
:
(
fun
fna
->
dep
fna
)
;
])
[
string
"-f"
]
;
List
.
map
fna_l
~
f
:
(
fun
fna
->
dep
fna
)
;
])
]
lib/convergence_detection.ml
View file @
b45413d6
...
...
@@ -75,49 +75,48 @@ let merge_results ~res_by_tools : text_file workflow =
in
workflow
~
descr
:
"convergence_detection.merge_results"
[
cmd
"python"
~
env
[
file_dump
(
string
Scripts
.
merge_det_results
)
;
opt
"-o"
ident
dest
;
seq
~
sep
:
" "
command
;
]
;
file_dump
(
string
Scripts
.
merge_det_results
)
;
opt
"-o"
ident
dest
;
seq
~
sep
:
" "
command
;
]
;
]
let
plot_merge_results
~
plot_all_sites
~
(
res_by_tools
:
result
list
)
~
tree
~
faa
~
tsv
:
svg
workflow
=
let
env
=
docker_image
~
account
:
"carinerey"
~
name
:
"pcoc"
~
tag
:
"06212018"
()
in
(* use of pcoc env due to its working X server for dra plot with ete3 *)
let
meths
=
List
.
map
res_by_tools
~
f
:
(
fun
res
->
let
opt
=
match
res
with
|
`Pcoc
_
->
"PCOC,PC,OC"
|
`Pcoc_gamma
_
->
"PCOC_gamma,PC_gamma,OC_gamma,"
|
`Pcoc_C60
_
->
"PCOC_C60,PC_C60,OC_C60,"
|
`Diffsel
_
->
"Diffsel_mean,Diffsel_max"
|
`Diffsel_bis
_
->
"Diffsel_bis_mean,Diffsel_bis_max"
|
`Identical_LG
_
->
"Identical_LG08"
|
`Identical_WAG
_
->
"Identical_WAG01"
|
`Topological_LG
_
->
"Topological_LG08"
|
`Topological_WAG
_
->
"Topological_WAG01"
|
`Tdg09
_
->
"Tdg09_1-FDR,Tdg09_prob_post"
|
`Multinomial
_
->
"Mutinomial_LRT"
in
string
opt
)
|>
seq
~
sep
:
","
let
opt
=
match
res
with
|
`Pcoc
_
->
"PCOC,PC,OC"
|
`Pcoc_gamma
_
->
"PCOC_gamma,PC_gamma,OC_gamma,"
|
`Pcoc_C60
_
->
"PCOC_C60,PC_C60,OC_C60,"
|
`Diffsel
_
->
"Diffsel_mean,Diffsel_max"
|
`Diffsel_bis
_
->
"Diffsel_bis_mean,Diffsel_bis_max"
|
`Identical_LG
_
->
"Identical_LG08"
|
`Identical_WAG
_
->
"Identical_WAG01"
|
`Topological_LG
_
->
"Topological_LG08"
|
`Topological_WAG
_
->
"Topological_WAG01"
|
`Tdg09
_
->
"Tdg09_1-FDR,Tdg09_prob_post"
|
`Multinomial
_
->
"Mutinomial_LRT"
in
string
opt
)
|>
seq
~
sep
:
","
in
let
meths_t
=
List
.
map
res_by_tools
~
f
:
(
fun
res
->
let
opt
=
match
res
with
|
`Pcoc
_
->
"PCOC:0.99,PC:0.99,OC:0.99"
|
`Pcoc_gamma
_
->
"PCOC_gamma:0.99,PC_gamma:0.99,OC_gamma:0.99"
|
`Pcoc_C60
_
->
"PCOC_C60:0.99,PC_C60:0.99,OC_C60:0.99"
|
`Diffsel
_
->
"Diffsel_mean:0.11,Diffsel_max:0.9"
|
`Diffsel_bis
_
->
"Diffsel_bis_mean:0.11,Diffsel_bis_max:0.9"
|
`Identical_LG
_
->
"Identical_LG08:0.9"
|
`Identical_WAG
_
->
"Identical_WAG01:0.9"
|
`Topological_LG
_
->
"Topological_LG08:0.9"
|
`Topological_WAG
_
->
"Topological_WAG01:0.9"
|
`Tdg09
_
->
"Tdg09_1-FDR:0.9,Tdg09_prob_post:0.9"
|
`Multinomial
_
->
"Mutinomial_LRT:0.9"
in
string
opt
)
|>
seq
~
sep
:
","
let
opt
=
match
res
with
|
`Pcoc
_
->
"PCOC:0.99,PC:0.99,OC:0.99"
|
`Pcoc_gamma
_
->
"PCOC_gamma:0.99,PC_gamma:0.99,OC_gamma:0.99"
|
`Pcoc_C60
_
->
"PCOC_C60:0.99,PC_C60:0.99,OC_C60:0.99"
|
`Diffsel
_
->
"Diffsel_mean:0.11,Diffsel_max:0.9"
|
`Diffsel_bis
_
->
"Diffsel_bis_mean:0.11,Diffsel_bis_max:0.9"
|
`Identical_LG
_
->
"Identical_LG08:0.9"
|
`Identical_WAG
_
->
"Identical_WAG01:0.9"
|
`Topological_LG
_
->
"Topological_LG08:0.9"
|
`Topological_WAG
_
->
"Topological_WAG01:0.9"
|
`Tdg09
_
->
"Tdg09_1-FDR:0.9,Tdg09_prob_post:0.9"
|
`Multinomial
_
->
"Mutinomial_LRT:0.9"
in
string
opt
)
|>
seq
~
sep
:
","
in
let
package_diffsel_script_utils
=
tmp
//
"diffsel_script_utils.py"
in
let
package_plot_data
=
tmp
//
"plot_data.py"
in
...
...
lib/convergence_detection.mli
View file @
b45413d6
...
...
@@ -28,16 +28,16 @@ type dataset_res = {
res_by_tools
:
result
list
;
merged_results
:
text_file
workflow
;
plot_merged_results
:
svg
workflow
}
}
val
merge_results
:
res_by_tools
:
result
list
->
text_file
workflow
val
plot_merge_results
:
plot_all_sites
:
bool
->
res_by_tools
:
result
list
->
tree
:
nhx
workflow
->
faa
:
aminoacid_fasta
workflow
->
tsv
:
text_file
workflow
->
svg
workflow
plot_all_sites
:
bool
->
res_by_tools
:
result
list
->
tree
:
nhx
workflow
->
faa
:
aminoacid_fasta
workflow
->
tsv
:
text_file
workflow
->
svg
workflow
lib/convergence_hypothesis.ml
View file @
b45413d6
...
...
@@ -37,57 +37,56 @@ nonhomogeneous = general
rate_distribution
=
Constant
()
|
}
let
bpp_config_H0_F
=
seq
~
sep
:
"
\n
"
[
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
let
bpp_config_HaPCOC_F
=
seq
~
sep
:
"
\n
"
[
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))"
]
;
seq
[
string
"modelT=OneChange(model=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2))), register=DnDs, numReg=2)"
]
;
seq
[
string
"modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))"
]
;
seq
[
string
"modelT=OneChange(model=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2))), register=DnDs, numReg=2)"
]
;
seq
[
string
"modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
let
bpp_config_HaPC_F
=
seq
~
sep
:
"
\n
"
[
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))"
]
;
seq
[
string
"modelT=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))"
]
;
seq
[
string
"modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))"
]
;
seq
[
string
"modelT=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))"
]
;
seq
[
string
"modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
let
bpp_config_H0_F_Ne
=
seq
~
sep
:
"
\n
"
[
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))"
]
;
seq
[
string
"model2=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_C))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))"
]
;
seq
[
string
"model2=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_C))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
let
bpp_config_HaPCOC_F_Ne
=
seq
~
sep
:
"
\n
"
[
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))"
]
;
seq
[
string
"modelT=OneChange(model=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2))), register=DnDs, numReg=2, Ns=$(NE_T))"
]
;
seq
[
string
"modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)), Ns=$(NE_C))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))"
]
;
seq
[
string
"modelT=OneChange(model=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2))), register=DnDs, numReg=2, Ns=$(NE_T))"
]
;
seq
[
string
"modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)), Ns=$(NE_C))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
let
bpp_config_HaPC_F_Ne
=
seq
~
sep
:
"
\n
"
[
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))"
]
;
seq
[
string
"modelT=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))"
]
;
seq
[
string
"modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)), Ns=$(NE_C))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))"
]
;
seq
[
string
"modelT=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))"
]
;
seq
[
string
"modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)), Ns=$(NE_C))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
let
bpp_config_F
nodes
hyp
=
[
string
bpp_config_base
;
insert
nodes
;
match
hyp
with
|
H0
->
bpp_config_H0_F
|
HaPC
->
bpp_config_HaPC_F
|
HaPCOC
->
bpp_config_HaPCOC_F
|
H0_NeSmall
->
bpp_config_H0_F_Ne
|
HaPC_NeSmall
->
bpp_config_HaPC_F_Ne
|
HaPCOC_NeSmall
->
bpp_config_HaPCOC_F_Ne
|
H0_NeBig
->
bpp_config_H0_F_Ne
|
HaPC_NeBig
->
bpp_config_HaPC_F_Ne
|
HaPCOC_NeBig
->
bpp_config_HaPCOC_F_Ne
;
match
hyp
with
|
H0
->
bpp_config_H0_F
|
HaPC
->
bpp_config_HaPC_F
|
HaPCOC
->
bpp_config_HaPCOC_F
|
H0_NeSmall
->
bpp_config_H0_F_Ne
|
HaPC_NeSmall
->
bpp_config_HaPC_F_Ne
|
HaPCOC_NeSmall
->
bpp_config_HaPCOC_F_Ne
|
H0_NeBig
->
bpp_config_H0_F_Ne
|
HaPC_NeBig
->
bpp_config_HaPC_F_Ne
|
HaPCOC_NeBig
->
bpp_config_HaPCOC_F_Ne
;
]
lib/dataset.ml
View file @
b45413d6
...
...
@@ -14,9 +14,9 @@ let repo ~preview dataset_l =
let
tree_prefix
=
dataset
.
tree_prefix
in
let
repo_ready_data
=
Ready_dataset
.
repo
dataset
.
dataset
in
let
repo_raw_data
=
if
preview
then
Raw_dataset
.
repo
~
prefix
:
model_prefix
(
Ready_dataset
.
to_raw
dataset
.
dataset
)
else
[]
in
List
.
concat
[
Repo
.
shift
"simulated_data"
(
Repo
.
shift
(
tree_prefix
^
"_"
^
model_prefix
)
repo_raw_data
);
Repo
.
shift
"simulated_data_debug"
(
Repo
.
shift
tree_prefix
(
Repo
.
shift
model_prefix
repo_ready_data
));
]
List
.
concat
[
Repo
.
shift
"simulated_data"
(
Repo
.
shift
(
tree_prefix
^
"_"
^
model_prefix
)
repo_raw_data
);
Repo
.
shift
"simulated_data_debug"
(
Repo
.
shift
tree_prefix
(
Repo
.
shift
model_prefix
repo_ready_data
));
]
)
|>
List
.
concat
lib/defs.ml
View file @
b45413d6
...
...
@@ -9,7 +9,3 @@ type output_parse_input_tree =
|
Tree4detect
|
Tree4simu
|
Tree_diffsel
lib/diffsel.ml
View file @
b45413d6
...
...
@@ -21,11 +21,11 @@ let diffsel ~(phy_n:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:in
cmd
"cp"
[
dep
phy_n
;
tmp_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
tree
;
tmp_tree
];
(* required dep to link the file in the env *)
cmd
"/diffsel/_build/diffsel"
[
opt
"-t"
ident
tmp_tree
;
opt
"-d"
ident
tmp_ali
;
opt
"-ncond"
int
2
;
opt
"-x"
seq
[
int
w_every
;
string
" "
;
int
n_cycles
];
ident
chainname
;
opt
"-t"
ident
tmp_tree
;
opt
"-d"
ident
tmp_ali
;
opt
"-ncond"
int
2
;
opt
"-x"
seq
[
int
w_every
;
string
" "
;
int
n_cycles
];
ident
chainname
;
];
]
)
...
...
lib/diffsel.mli
View file @
b45413d6
...
...
@@ -11,5 +11,5 @@ val diffsel :
[
`diffsel
]
directory
workflow
val
selector
:
[
`diffsel
]
directory
workflow
->
text_file
workflow
[
`diffsel
]
directory
workflow
->
text_file
workflow
lib/file_formats.ml
View file @
b45413d6
...
...
@@ -25,4 +25,3 @@ class type nucleotide_phylip = object
inherit
text_file
method
format
:
[
`Nucleotide
]
end
lib/multinomial.ml
View file @
b45413d6
...
...
@@ -9,13 +9,12 @@ let multinomial ~(tree_id:_ workflow) ~(tree_sc:_ workflow) ~(faa:aminoacid_fast
workflow
~
descr
:
(
"calc_multinomial"
)
[
mkdir_p
dest
;
cmd
"python"
~
env
[
file_dump
(
string
Scripts
.
calc_multinomial
)
;
opt
"-t"
dep
tree_sc
;
opt
"-a"
dep
faa
;
opt
"-o"
ident
(
dest
//
"out.tsv"
)
;
file_dump
(
string
Scripts
.
calc_multinomial
)
;
opt
"-t"
dep
tree_sc
;
opt
"-a"
dep
faa
;
opt
"-o"
ident
(
dest
//
"out.tsv"
)
;
]
]
let
results
w
=
w
/
selector
[
"out.tsv"
]
lib/pcoc.ml
View file @
b45413d6
...
...
@@ -8,13 +8,13 @@ let pcoc ?plot_complete ?gamma ?catx_est ~(faa:aminoacid_fasta workflow) ~(tree
let
env
=
docker_image
~
account
:
"carinerey"
~
name
:
"pcoc"
~
tag
:
"07022018"
()
in
workflow
~
descr
:
"convergence_detection.pcoc"
[
cmd
"pcoc_det.py"
~
env
[
opt
"-t"
dep
tree
;
opt
"-m"
string
"-"
;
opt
"-aa"
dep
faa
;
opt
"-o"
ident
dest
;
option
(
flag
string
"--gamma"
)
gamma
;
option
(
opt
"-CATX_est"
int
)
catx_est
;
option
(
flag
string
"--plot --plot_complete_ali"
)
plot_complete
;
opt
"-t"
dep
tree
;
opt
"-m"
string
"-"
;
opt
"-aa"
dep
faa
;
opt
"-o"
ident
dest
;
option
(
flag
string
"--gamma"
)
gamma
;
option
(
opt
"-CATX_est"
int
)
catx_est
;
option
(
flag
string
"--plot --plot_complete_ali"
)
plot_complete
;
]
]
...
...
lib/pipeline.ml
View file @
b45413d6
...
...
@@ -40,7 +40,7 @@ let parse_input_data indir =
}
in
[
dataset
]
else
failwith
({
|
More
than
2
files
in
|
}
^
(
Filename
.
concat
indir
dataset_prefix
))
failwith
({
|
More
than
2
files
in
|
}
^
(
Filename
.
concat
indir
dataset_prefix
))
)
|>
List
.
concat
...
...
@@ -249,23 +249,23 @@ let simulation_main ~outdir ?(ns = 0) ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn
let
validation_main
~
outdir
?
(
indir
=
""
)
?
(
ns
=
0
)
?
(
np
=
2
)
?
(
mem
=
2
)
~
preview
~
fast_mode
~
tree_dir
~
profile_fn
~
use_concat
()
=
let
trees
=
Array
.
to_list
@@
Sys
.
readdir
tree_dir
in
let
repo
=
List
.
map
trees
~
f
:
(
fun
tree
->
let
trees
=
[
tree
]
in
let
tree_prefix
=
Filename
.
chop_extension
tree
in
let
indir_dataset_l
=
if
indir
=
""
then
[]
else
parse_input_data
indir
in
let
dataset_l
=
derive_sim
~
tree_dir
~
trees
~
profile_fn
~
preview
~
use_concat
~
ns
@
indir_dataset_l
in
let
dataset_results_l
=
derive_det
~
dataset_l
~
preview
~
fast_mode
in
let
post_analyses
=
Post_analyses
.
post_analyses_of_dataset_results_l
~
dataset_results_l
in
let
repo_per_tree
=
[
Dataset
.
repo
dataset_l
~
preview
;
repo_of_dataset_results_l
~
dataset_results_l
;
Repo
.
shift
tree_prefix
(
Post_analyses
.
repo_of_post_analyses
~
prefix
:
tree_prefix
~
post_analyses
);
]
|>
List
.
concat
in
repo_per_tree
)
|>
List
.
concat
let
trees
=
[
tree
]
in
let
tree_prefix
=
Filename
.
chop_extension
tree
in
let
indir_dataset_l
=
if
indir
=
""
then
[]
else
parse_input_data
indir
in
let
dataset_l
=
derive_sim
~
tree_dir
~
trees
~
profile_fn
~
preview
~
use_concat
~
ns
@
indir_dataset_l
in
let
dataset_results_l
=
derive_det
~
dataset_l
~
preview
~
fast_mode
in
let
post_analyses
=
Post_analyses
.
post_analyses_of_dataset_results_l
~
dataset_results_l
in
let
repo_per_tree
=
[
Dataset
.
repo
dataset_l
~
preview
;
repo_of_dataset_results_l
~
dataset_results_l
;
Repo
.
shift
tree_prefix
(
Post_analyses
.
repo_of_post_analyses
~
prefix
:
tree_prefix
~
post_analyses
);
]
|>
List
.
concat
in
repo_per_tree
)
|>
List
.
concat
in
Repo
.
build
~
outdir
~
np
~
mem
:
(
`GB
mem
)
~
logger
repo
...
...
lib/post_analyses.ml
View file @
b45413d6
...
...
@@ -12,19 +12,19 @@ type t_choices = {
t_choices_complete
:
text_file
workflow
;
t_choices_max
:
text_file
workflow
;
t_choices_plot
:
text_file
workflow
;
}
}
type
simu_infos
=
{
simu_infos
:
text_file
workflow
option
;
model_prefix
:
string
;
tree_prefix
:
string
;
}
}
type
post_analyses
=
{
t_choices
:
t_choices
option
;
simu_infos_l
:
simu_infos
list
;
simu_infos_plot
:
text_file
workflow
;
}
}
let
is_hyp
~
hyp
(
dataset_results
:
dataset_res
)
=
...
...
@@ -35,7 +35,7 @@ let make_t_choices ~h0_merged_results ~ha_merged_results : post_analyses_dir dir
let
env
=
docker_image
~
account
:
"carinerey"
~
name
:
"r_basics"
~
tag
:
"07162018"
()
in
let
out
=
dest
//
"out"
in
workflow
~
descr
:
"post_analyses.t_choices"
[
docker
env
(
docker
env
(
and_list
[
mkdir_p
dest
;
cmd
"Rscript"
[
...
...
@@ -43,60 +43,60 @@ let make_t_choices ~h0_merged_results ~ha_merged_results : post_analyses_dir dir
opt
"--H0"
dep
h0_merged_results
;
opt
"--Ha"
dep
ha_merged_results
;
opt
"--out "
ident
out
;
];
])
];
])
]
let
make_simu_infos
?
(
descr
=
""
)
?
(
fna_infos
)
~
faa
~
tree_sc
:
text_file
workflow
=
let
env
=
docker_image
~
account
:
"carinerey"
~
name
:
"pcoc"
~
tag
:
"07022018"
()
in
workflow
~
descr
:
(
"post_analyses.simu_infos."
^
descr
)
[
cmd
"python"
~
env
[
file_dump
(
string
Scripts
.
calc_simu_infos
)
;
opt
"--faa"
dep
faa
;
opt
"--tree"
dep
tree_sc
;
option
(
opt
"--fna_infos"
dep
)
fna_infos
;
opt
"--output "
ident
dest
;
file_dump
(
string
Scripts
.
calc_simu_infos
)
;
opt
"--faa"
dep
faa
;
opt
"--tree"
dep
tree_sc
;
option
(
opt
"--fna_infos"
dep
)
fna_infos
;
opt
"--output "
ident
dest
;
];
]
let
group_simu_infos
~
simu_infos_l
:
simu_infos
directory
workflow
=
let
env
=
docker_image
~
account
:
"carinerey"
~
name
:
"r_basics"
~
tag
:
"07162018"
()
in
let
cmd_cp_l
=
List
.
map
simu_infos_l
~
f
:
(
fun
s
->
match
s
.
simu_infos
with
match
s
.
simu_infos
with
|
Some
w
->
[
cmd
"cp"
[
dep
w
;
tmp
//
(
s
.
tree_prefix
^
"."
^
s
.
model_prefix
^
".tsv"
)]]
|
None
->
[]
)
|>
List
.
concat
in
let
out
=
dest
//
"out"
in
workflow
~
descr
:
"post_analyses.plot_simu_infos"
[
docker
env
(
docker
env
(
and_list
([
[
mkdir_p
dest
];
[
mkdir_p
tmp
];
cmd_cp_l
;
[
cmd
"Rscript"
[
file_dump
(
string
Scripts
.
plot_hyp_simu_validation
)
;
opt
"--input_dir"
ident
tmp
;
opt
"--out "
ident
out
;
];]
]
|>
List
.
concat
)
)
[
mkdir_p
dest
];
[
mkdir_p
tmp
];
cmd_cp_l
;
[
cmd
"Rscript"
[
file_dump
(
string
Scripts
.
plot_hyp_simu_validation
)
;
opt
"--input_dir"
ident
tmp
;
opt
"--out "
ident
out
;
];]
]
|>
List
.
concat
)
)
]
let
get_t_choices
~
(
dataset_results_l
:
dataset_res
list
)
:
t_choices
option
=
let
h0_res
=
List
.
find
dataset_results_l
(
is_hyp
~
hyp
:
"H0"
)
in
let
ha_res
=
List
.
find
dataset_results_l
(
is_hyp
~
hyp
:
"HaPCOC"
)
in
match
(
h0_res
,
ha_res
)
with
|
(
Some
h0
,
Some
ha
)
->
let
h0_merged_results
=
h0
.
merged_results
in
let
ha_merged_results
=
ha
.
merged_results
in
let
t_choices_dir
=
make_t_choices
~
h0_merged_results
~
ha_merged_results
in
let
t_choices_max
=
t_choices_dir
/
selector
[
"out.max_per_meth.tsv"
]
in
let
t_choices_complete
=
t_choices_dir
/
selector
[
"out.complete.tsv"
]
in
let
t_choices_plot
=
t_choices_dir
/
selector
[
"out.pdf"
]
in
Some
{
t_choices_max
;
t_choices_complete
;
t_choices_plot
}
|
_
->
None
|
(
Some
h0
,
Some
ha
)
->
let
h0_merged_results
=
h0
.
merged_results
in
let
ha_merged_results
=
ha
.
merged_results
in
let
t_choices_dir
=
make_t_choices
~
h0_merged_results
~
ha_merged_results
in
let
t_choices_max
=
t_choices_dir
/
selector
[
"out.max_per_meth.tsv"
]
in
let
t_choices_complete
=
t_choices_dir
/
selector
[
"out.complete.tsv"
]
in
let
t_choices_plot
=
t_choices_dir
/
selector
[
"out.pdf"
]
in
Some
{
t_choices_max
;
t_choices_complete
;
t_choices_plot
}
|
_
->
None
let
get_simu_infos
~
dataset_results
=
let
model_prefix
=
dataset_results
.
dataset
.
model_prefix
in
...
...
@@ -105,43 +105,43 @@ let get_simu_infos ~dataset_results =
let
tree_sc
=
Tree_dataset
.
tree
ready_dataset
.
tree_dataset
`Detection
in
let
fna_infos
=
ready_dataset
.
fna_infos
in
match
fna_infos
with
|
Some
w
->
Some
(
make_simu_infos
~
descr
:
model_prefix
~
faa
~
tree_sc
~
fna_infos
:
w
)
|
None
->
None
(*make_simu_infos ~faa ~tree_sc*)
|
Some
w
->
Some
(
make_simu_infos
~
descr
:
model_prefix
~
faa
~
tree_sc
~
fna_infos
:
w
)
|
None
->
None
(*make_simu_infos ~faa ~tree_sc*)
let
post_analyses_of_dataset_results_l
~
dataset_results_l
=
let
t_choices
=
get_t_choices
~
dataset_results_l
in
let
simu_infos_l
=
List
.
map
dataset_results_l
~
f
:
(
fun
dataset_results
->
{
simu_infos
=
(
get_simu_infos
~
dataset_results
);
tree_prefix
=
dataset_results
.
tree_prefix
;
model_prefix
=
dataset_results
.
model_prefix
}
)
in
{
simu_infos
=
(
get_simu_infos
~
dataset_results
);
tree_prefix
=
dataset_results
.
tree_prefix
;
model_prefix
=
dataset_results
.
model_prefix
}
)
in
let
simu_infos_plot
=
group_simu_infos
~
simu_infos_l
/
selector
[
"out.pdf"
]
in
{
t_choices
;
simu_infos_l
;
simu_infos_plot
}
let
repo_of_post_analyses
~
prefix
~
post_analyses
=
[
Repo
.[
item
[
prefix
^
".pdf"
]
post_analyses
.
simu_infos_plot
]
|>
Repo
.
shift
"simu_infos"
;
(
match
post_analyses
.
t_choices
with
|
None
->
[]
|
Some
w
->
Repo
.[
item
[
prefix
^
".t_choices.max_mcc_per_meth.tsv"
]
w
.
t_choices_max
;
item
[
prefix
^
".t_choices.complete.tsv"
]
w
.
t_choices_complete
;
item
[
prefix
^
".t_choices.pdf"
]
w
.
t_choices_plot
;
]
|>
Repo
.
shift
"t_choices"
);
(
List
.
map
post_analyses
.
simu_infos_l
~
f
:
(
fun
simu_infos
->
match
simu_infos
.
simu_infos
with
|
None
->
[]
|
Some
w
->
Repo
.[
item
[
prefix
^
"."
^
simu_infos
.
model_prefix
^
".tsv"
]
w
]
|>
Repo
.
shift
"simu_infos"
)
|>
List
.
concat
);
Repo
.[
item
[
prefix
^
".pdf"
]
post_analyses
.
simu_infos_plot
]
|>
Repo
.
shift
"simu_infos"
;
(
match
post_analyses
.
t_choices
with
|
None
->
[]
|
Some
w
->
Repo
.[
item
[
prefix
^
".t_choices.max_mcc_per_meth.tsv"
]
w
.
t_choices_max
;
item
[
prefix
^
".t_choices.complete.tsv"
]
w
.
t_choices_complete
;
item
[
prefix
^
".t_choices.pdf"
]
w
.
t_choices_plot
;
]
|>
Repo
.
shift
"t_choices"
);
(
List
.
map
post_analyses
.
simu_infos_l
~
f
:
(
fun
simu_infos
->
match
simu_infos
.
simu_infos
with
|
None
->
[]
|
Some
w
->
Repo
.[
item
[
prefix
^
"."
^
simu_infos
.
model_prefix
^
".tsv"
]
w
]
|>
Repo
.
shift
"simu_infos"