Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
b45413d6
Commit
b45413d6
authored
Jul 18, 2018
by
LANORE Vincent
Browse files
Auto indentation of everything + removed extra blank lines
parent
2d04fc2f
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
215 additions
and
235 deletions
+215
-235
lib/bppsuite.ml
lib/bppsuite.ml
+8
-14
lib/convergence_detection.ml
lib/convergence_detection.ml
+34
-35
lib/convergence_detection.mli
lib/convergence_detection.mli
+7
-7
lib/convergence_hypothesis.ml
lib/convergence_hypothesis.ml
+38
-39
lib/dataset.ml
lib/dataset.ml
+4
-4
lib/defs.ml
lib/defs.ml
+0
-4
lib/diffsel.ml
lib/diffsel.ml
+5
-5
lib/diffsel.mli
lib/diffsel.mli
+2
-2
lib/file_formats.ml
lib/file_formats.ml
+0
-1
lib/multinomial.ml
lib/multinomial.ml
+4
-5
lib/pcoc.ml
lib/pcoc.ml
+7
-7
lib/pipeline.ml
lib/pipeline.ml
+18
-18
lib/post_analyses.ml
lib/post_analyses.ml
+63
-63
lib/tamuri.ml
lib/tamuri.ml
+16
-17
lib/topological.ml
lib/topological.ml
+8
-13
lib/tree_dataset.ml
lib/tree_dataset.ml
+1
-1
No files found.
lib/bppsuite.ml
View file @
b45413d6
...
...
@@ -21,8 +21,6 @@ let bash_script args code =
in
seq
~
sep
:
"
\n
"
[
prelude
;
string
code
]
let
conf_file_bppseqgen
~
tree
~
out
~
nb_sites
~
config
=
seq
~
sep
:
"
\n
"
(
[
...
...
@@ -42,14 +40,12 @@ let bppseqgen ?(descr="") ~nb_sites ~tree ~config : nucleotide_fasta workflow =
mkdir_p
dest
;
cmd
"cat"
~
stdout
:
config_f
[(
file_dump
(
conf_file_bppseqgen
~
tree
~
out
~
nb_sites
~
config
))];
cmd
"bppseqgen"
[
assign
"param"
config_f
;
]
assign
"param"
config_f
;
]
]
)
]
/
selector
[
"seq.fa"
]
let
conf_file_bppseqgen_multi_profiles
~
tree
~
profile_f
~
ne_c
~
config
~
nb_sites_per_profile
=
seq
~
sep
:
"
\n
"
(
[
...
...
@@ -63,8 +59,6 @@ let conf_file_bppseqgen_multi_profiles ~tree ~profile_f ~ne_c ~config ~nb_sites_
@
config
)
let
bppseqgen_multi_profiles_script
~
config
~
nb_combis
~
out
~
profile_f
=
let
vars
=
[
"FINAL_OUT"
,
ident
out
;
...
...
@@ -145,9 +139,9 @@ let conf_file_bppseqman_fa2phy ~fna =
assign
"output.sequence.file"
dest
;
assign
"output.sequence.format"
(
string
"Phylip"
)
;
string
{
|
input
.
alignment
=
true
input
.
sequence
.
remove_stop_codons
=
no
input
.
sequence
.
sites_to_use
=
all
sequence
.
manip
=
input
.
sequence
.
remove_stop_codons
=
no
input
.
sequence
.
sites_to_use
=
all
sequence
.
manip
=
|
}
]
...
...
@@ -162,7 +156,7 @@ let fa2phy ~(fna: nucleotide_fasta workflow) : nucleotide_phylip workflow =
let
paste_fna
~
(
fna_l
:
nucleotide_fasta
workflow
list
)
:
nucleotide_fasta
workflow
=
workflow
~
descr
:
"bppsuite.catfasta"
[
cmd
"catfasta2phyml.pl"
~
stdout
:
dest
~
env
(
List
.
concat
[
[
string
"-f"
]
;
List
.
map
fna_l
~
f
:
(
fun
fna
->
dep
fna
)
;
])
[
string
"-f"
]
;
List
.
map
fna_l
~
f
:
(
fun
fna
->
dep
fna
)
;
])
]
lib/convergence_detection.ml
View file @
b45413d6
...
...
@@ -75,49 +75,48 @@ let merge_results ~res_by_tools : text_file workflow =
in
workflow
~
descr
:
"convergence_detection.merge_results"
[
cmd
"python"
~
env
[
file_dump
(
string
Scripts
.
merge_det_results
)
;
opt
"-o"
ident
dest
;
seq
~
sep
:
" "
command
;
]
;
file_dump
(
string
Scripts
.
merge_det_results
)
;
opt
"-o"
ident
dest
;
seq
~
sep
:
" "
command
;
]
;
]
let
plot_merge_results
~
plot_all_sites
~
(
res_by_tools
:
result
list
)
~
tree
~
faa
~
tsv
:
svg
workflow
=
let
env
=
docker_image
~
account
:
"carinerey"
~
name
:
"pcoc"
~
tag
:
"06212018"
()
in
(* use of pcoc env due to its working X server for dra plot with ete3 *)
let
meths
=
List
.
map
res_by_tools
~
f
:
(
fun
res
->
let
opt
=
match
res
with
|
`Pcoc
_
->
"PCOC,PC,OC"
|
`Pcoc_gamma
_
->
"PCOC_gamma,PC_gamma,OC_gamma,"
|
`Pcoc_C60
_
->
"PCOC_C60,PC_C60,OC_C60,"
|
`Diffsel
_
->
"Diffsel_mean,Diffsel_max"
|
`Diffsel_bis
_
->
"Diffsel_bis_mean,Diffsel_bis_max"
|
`Identical_LG
_
->
"Identical_LG08"
|
`Identical_WAG
_
->
"Identical_WAG01"
|
`Topological_LG
_
->
"Topological_LG08"
|
`Topological_WAG
_
->
"Topological_WAG01"
|
`Tdg09
_
->
"Tdg09_1-FDR,Tdg09_prob_post"
|
`Multinomial
_
->
"Mutinomial_LRT"
in
string
opt
)
|>
seq
~
sep
:
","
let
opt
=
match
res
with
|
`Pcoc
_
->
"PCOC,PC,OC"
|
`Pcoc_gamma
_
->
"PCOC_gamma,PC_gamma,OC_gamma,"
|
`Pcoc_C60
_
->
"PCOC_C60,PC_C60,OC_C60,"
|
`Diffsel
_
->
"Diffsel_mean,Diffsel_max"
|
`Diffsel_bis
_
->
"Diffsel_bis_mean,Diffsel_bis_max"
|
`Identical_LG
_
->
"Identical_LG08"
|
`Identical_WAG
_
->
"Identical_WAG01"
|
`Topological_LG
_
->
"Topological_LG08"
|
`Topological_WAG
_
->
"Topological_WAG01"
|
`Tdg09
_
->
"Tdg09_1-FDR,Tdg09_prob_post"
|
`Multinomial
_
->
"Mutinomial_LRT"
in
string
opt
)
|>
seq
~
sep
:
","
in
let
meths_t
=
List
.
map
res_by_tools
~
f
:
(
fun
res
->
let
opt
=
match
res
with
|
`Pcoc
_
->
"PCOC:0.99,PC:0.99,OC:0.99"
|
`Pcoc_gamma
_
->
"PCOC_gamma:0.99,PC_gamma:0.99,OC_gamma:0.99"
|
`Pcoc_C60
_
->
"PCOC_C60:0.99,PC_C60:0.99,OC_C60:0.99"
|
`Diffsel
_
->
"Diffsel_mean:0.11,Diffsel_max:0.9"
|
`Diffsel_bis
_
->
"Diffsel_bis_mean:0.11,Diffsel_bis_max:0.9"
|
`Identical_LG
_
->
"Identical_LG08:0.9"
|
`Identical_WAG
_
->
"Identical_WAG01:0.9"
|
`Topological_LG
_
->
"Topological_LG08:0.9"
|
`Topological_WAG
_
->
"Topological_WAG01:0.9"
|
`Tdg09
_
->
"Tdg09_1-FDR:0.9,Tdg09_prob_post:0.9"
|
`Multinomial
_
->
"Mutinomial_LRT:0.9"
in
string
opt
)
|>
seq
~
sep
:
","
let
opt
=
match
res
with
|
`Pcoc
_
->
"PCOC:0.99,PC:0.99,OC:0.99"
|
`Pcoc_gamma
_
->
"PCOC_gamma:0.99,PC_gamma:0.99,OC_gamma:0.99"
|
`Pcoc_C60
_
->
"PCOC_C60:0.99,PC_C60:0.99,OC_C60:0.99"
|
`Diffsel
_
->
"Diffsel_mean:0.11,Diffsel_max:0.9"
|
`Diffsel_bis
_
->
"Diffsel_bis_mean:0.11,Diffsel_bis_max:0.9"
|
`Identical_LG
_
->
"Identical_LG08:0.9"
|
`Identical_WAG
_
->
"Identical_WAG01:0.9"
|
`Topological_LG
_
->
"Topological_LG08:0.9"
|
`Topological_WAG
_
->
"Topological_WAG01:0.9"
|
`Tdg09
_
->
"Tdg09_1-FDR:0.9,Tdg09_prob_post:0.9"
|
`Multinomial
_
->
"Mutinomial_LRT:0.9"
in
string
opt
)
|>
seq
~
sep
:
","
in
let
package_diffsel_script_utils
=
tmp
//
"diffsel_script_utils.py"
in
let
package_plot_data
=
tmp
//
"plot_data.py"
in
...
...
lib/convergence_detection.mli
View file @
b45413d6
...
...
@@ -28,16 +28,16 @@ type dataset_res = {
res_by_tools
:
result
list
;
merged_results
:
text_file
workflow
;
plot_merged_results
:
svg
workflow
}
}
val
merge_results
:
res_by_tools
:
result
list
->
text_file
workflow
val
plot_merge_results
:
plot_all_sites
:
bool
->
res_by_tools
:
result
list
->
tree
:
nhx
workflow
->
faa
:
aminoacid_fasta
workflow
->
tsv
:
text_file
workflow
->
svg
workflow
plot_all_sites
:
bool
->
res_by_tools
:
result
list
->
tree
:
nhx
workflow
->
faa
:
aminoacid_fasta
workflow
->
tsv
:
text_file
workflow
->
svg
workflow
lib/convergence_hypothesis.ml
View file @
b45413d6
...
...
@@ -37,57 +37,56 @@ nonhomogeneous = general
rate_distribution
=
Constant
()
|
}
let
bpp_config_H0_F
=
seq
~
sep
:
"
\n
"
[
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
let
bpp_config_HaPCOC_F
=
seq
~
sep
:
"
\n
"
[
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))"
]
;
seq
[
string
"modelT=OneChange(model=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2))), register=DnDs, numReg=2)"
]
;
seq
[
string
"modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))"
]
;
seq
[
string
"modelT=OneChange(model=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2))), register=DnDs, numReg=2)"
]
;
seq
[
string
"modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
let
bpp_config_HaPC_F
=
seq
~
sep
:
"
\n
"
[
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))"
]
;
seq
[
string
"modelT=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))"
]
;
seq
[
string
"modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))"
]
;
seq
[
string
"modelT=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))"
]
;
seq
[
string
"modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
let
bpp_config_H0_F_Ne
=
seq
~
sep
:
"
\n
"
[
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))"
]
;
seq
[
string
"model2=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_C))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))"
]
;
seq
[
string
"model2=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_C))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
let
bpp_config_HaPCOC_F_Ne
=
seq
~
sep
:
"
\n
"
[
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))"
]
;
seq
[
string
"modelT=OneChange(model=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2))), register=DnDs, numReg=2, Ns=$(NE_T))"
]
;
seq
[
string
"modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)), Ns=$(NE_C))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))"
]
;
seq
[
string
"modelT=OneChange(model=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2))), register=DnDs, numReg=2, Ns=$(NE_T))"
]
;
seq
[
string
"modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)), Ns=$(NE_C))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
let
bpp_config_HaPC_F_Ne
=
seq
~
sep
:
"
\n
"
[
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))"
]
;
seq
[
string
"modelT=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))"
]
;
seq
[
string
"modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)), Ns=$(NE_C))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
seq
[
string
"model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))"
]
;
seq
[
string
"modelT=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))"
]
;
seq
[
string
"modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)), Ns=$(NE_C))"
]
;
seq
[
string
"nonhomogeneous.root_freq=FromModel(model=$(model1))"
]
;
]
let
bpp_config_F
nodes
hyp
=
[
string
bpp_config_base
;
insert
nodes
;
match
hyp
with
|
H0
->
bpp_config_H0_F
|
HaPC
->
bpp_config_HaPC_F
|
HaPCOC
->
bpp_config_HaPCOC_F
|
H0_NeSmall
->
bpp_config_H0_F_Ne
|
HaPC_NeSmall
->
bpp_config_HaPC_F_Ne
|
HaPCOC_NeSmall
->
bpp_config_HaPCOC_F_Ne
|
H0_NeBig
->
bpp_config_H0_F_Ne
|
HaPC_NeBig
->
bpp_config_HaPC_F_Ne
|
HaPCOC_NeBig
->
bpp_config_HaPCOC_F_Ne
;
match
hyp
with
|
H0
->
bpp_config_H0_F
|
HaPC
->
bpp_config_HaPC_F
|
HaPCOC
->
bpp_config_HaPCOC_F
|
H0_NeSmall
->
bpp_config_H0_F_Ne
|
HaPC_NeSmall
->
bpp_config_HaPC_F_Ne
|
HaPCOC_NeSmall
->
bpp_config_HaPCOC_F_Ne
|
H0_NeBig
->
bpp_config_H0_F_Ne
|
HaPC_NeBig
->
bpp_config_HaPC_F_Ne
|
HaPCOC_NeBig
->
bpp_config_HaPCOC_F_Ne
;
]
lib/dataset.ml
View file @
b45413d6
...
...
@@ -14,9 +14,9 @@ let repo ~preview dataset_l =
let
tree_prefix
=
dataset
.
tree_prefix
in
let
repo_ready_data
=
Ready_dataset
.
repo
dataset
.
dataset
in
let
repo_raw_data
=
if
preview
then
Raw_dataset
.
repo
~
prefix
:
model_prefix
(
Ready_dataset
.
to_raw
dataset
.
dataset
)
else
[]
in
List
.
concat
[
Repo
.
shift
"simulated_data"
(
Repo
.
shift
(
tree_prefix
^
"_"
^
model_prefix
)
repo_raw_data
);
Repo
.
shift
"simulated_data_debug"
(
Repo
.
shift
tree_prefix
(
Repo
.
shift
model_prefix
repo_ready_data
));
]
List
.
concat
[
Repo
.
shift
"simulated_data"
(
Repo
.
shift
(
tree_prefix
^
"_"
^
model_prefix
)
repo_raw_data
);
Repo
.
shift
"simulated_data_debug"
(
Repo
.
shift
tree_prefix
(
Repo
.
shift
model_prefix
repo_ready_data
));
]
)
|>
List
.
concat
lib/defs.ml
View file @
b45413d6
...
...
@@ -9,7 +9,3 @@ type output_parse_input_tree =
|
Tree4detect
|
Tree4simu
|
Tree_diffsel
lib/diffsel.ml
View file @
b45413d6
...
...
@@ -21,11 +21,11 @@ let diffsel ~(phy_n:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:in
cmd
"cp"
[
dep
phy_n
;
tmp_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
tree
;
tmp_tree
];
(* required dep to link the file in the env *)
cmd
"/diffsel/_build/diffsel"
[
opt
"-t"
ident
tmp_tree
;
opt
"-d"
ident
tmp_ali
;
opt
"-ncond"
int
2
;
opt
"-x"
seq
[
int
w_every
;
string
" "
;
int
n_cycles
];
ident
chainname
;
opt
"-t"
ident
tmp_tree
;
opt
"-d"
ident
tmp_ali
;
opt
"-ncond"
int
2
;
opt
"-x"
seq
[
int
w_every
;
string
" "
;
int
n_cycles
];
ident
chainname
;
];
]
)
...
...
lib/diffsel.mli
View file @
b45413d6
...
...
@@ -11,5 +11,5 @@ val diffsel :
[
`diffsel
]
directory
workflow
val
selector
:
[
`diffsel
]
directory
workflow
->
text_file
workflow
[
`diffsel
]
directory
workflow
->
text_file
workflow
lib/file_formats.ml
View file @
b45413d6
...
...
@@ -25,4 +25,3 @@ class type nucleotide_phylip = object
inherit
text_file
method
format
:
[
`Nucleotide
]
end
lib/multinomial.ml
View file @
b45413d6
...
...
@@ -9,13 +9,12 @@ let multinomial ~(tree_id:_ workflow) ~(tree_sc:_ workflow) ~(faa:aminoacid_fast
workflow
~
descr
:
(
"calc_multinomial"
)
[
mkdir_p
dest
;
cmd
"python"
~
env
[
file_dump
(
string
Scripts
.
calc_multinomial
)
;
opt
"-t"
dep
tree_sc
;
opt
"-a"
dep
faa
;
opt
"-o"
ident
(
dest
//
"out.tsv"
)
;
file_dump
(
string
Scripts
.
calc_multinomial
)
;
opt
"-t"
dep
tree_sc
;
opt
"-a"
dep
faa
;
opt
"-o"
ident
(
dest
//
"out.tsv"
)
;
]
]
let
results
w
=
w
/
selector
[
"out.tsv"
]
lib/pcoc.ml
View file @
b45413d6
...
...
@@ -8,13 +8,13 @@ let pcoc ?plot_complete ?gamma ?catx_est ~(faa:aminoacid_fasta workflow) ~(tree
let
env
=
docker_image
~
account
:
"carinerey"
~
name
:
"pcoc"
~
tag
:
"07022018"
()
in
workflow
~
descr
:
"convergence_detection.pcoc"
[
cmd
"pcoc_det.py"
~
env
[
opt
"-t"
dep
tree
;
opt
"-m"
string
"-"
;
opt
"-aa"
dep
faa
;
opt
"-o"
ident
dest
;
option
(
flag
string
"--gamma"
)
gamma
;
option
(
opt
"-CATX_est"
int
)
catx_est
;
option
(
flag
string
"--plot --plot_complete_ali"
)
plot_complete
;
opt
"-t"
dep
tree
;
opt
"-m"
string
"-"
;
opt
"-aa"
dep
faa
;
opt
"-o"
ident
dest
;
option
(
flag
string
"--gamma"
)
gamma
;
option
(
opt
"-CATX_est"
int
)
catx_est
;
option
(
flag
string
"--plot --plot_complete_ali"
)
plot_complete
;
]
]
...
...
lib/pipeline.ml
View file @
b45413d6
...
...
@@ -40,7 +40,7 @@ let parse_input_data indir =
}
in
[
dataset
]
else
failwith
({
|
More
than
2
files
in
|
}
^
(
Filename
.
concat
indir
dataset_prefix
))
failwith
({
|
More
than
2
files
in
|
}
^
(
Filename
.
concat
indir
dataset_prefix
))
)
|>
List
.
concat
...
...
@@ -249,23 +249,23 @@ let simulation_main ~outdir ?(ns = 0) ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn
let
validation_main
~
outdir
?
(
indir
=
""
)
?
(
ns
=
0
)
?
(
np
=
2
)
?
(
mem
=
2
)
~
preview
~
fast_mode
~
tree_dir
~
profile_fn
~
use_concat
()
=
let
trees
=
Array
.
to_list
@@
Sys
.
readdir
tree_dir
in
let
repo
=
List
.
map
trees
~
f
:
(
fun
tree
->
let
trees
=
[
tree
]
in
let
tree_prefix
=
Filename
.
chop_extension
tree
in
let
indir_dataset_l
=
if
indir
=
""
then
[]
else
parse_input_data
indir
in
let
dataset_l
=
derive_sim
~
tree_dir
~
trees
~
profile_fn
~
preview
~
use_concat
~
ns
@
indir_dataset_l
in
let
dataset_results_l
=
derive_det
~
dataset_l
~
preview
~
fast_mode
in
let
post_analyses
=
Post_analyses
.
post_analyses_of_dataset_results_l
~
dataset_results_l
in
let
repo_per_tree
=
[
Dataset
.
repo
dataset_l
~
preview
;
repo_of_dataset_results_l
~
dataset_results_l
;
Repo
.
shift
tree_prefix
(
Post_analyses
.
repo_of_post_analyses
~
prefix
:
tree_prefix
~
post_analyses
);
]
|>
List
.
concat
in
repo_per_tree
)
|>
List
.
concat
let
trees
=
[
tree
]
in
let
tree_prefix
=
Filename
.
chop_extension
tree
in
let
indir_dataset_l
=
if
indir
=
""
then
[]
else
parse_input_data
indir
in
let
dataset_l
=
derive_sim
~
tree_dir
~
trees
~
profile_fn
~
preview
~
use_concat
~
ns
@
indir_dataset_l
in
let
dataset_results_l
=
derive_det
~
dataset_l
~
preview
~
fast_mode
in
let
post_analyses
=
Post_analyses
.
post_analyses_of_dataset_results_l
~
dataset_results_l
in
let
repo_per_tree
=
[
Dataset
.
repo
dataset_l
~
preview
;
repo_of_dataset_results_l
~
dataset_results_l
;
Repo
.
shift
tree_prefix
(
Post_analyses
.
repo_of_post_analyses
~
prefix
:
tree_prefix
~
post_analyses
);
]
|>
List
.
concat
in
repo_per_tree
)
|>
List
.
concat
in
Repo
.
build
~
outdir
~
np
~
mem
:
(
`GB
mem
)
~
logger
repo
...
...
lib/post_analyses.ml
View file @
b45413d6
...
...
@@ -12,19 +12,19 @@ type t_choices = {
t_choices_complete
:
text_file
workflow
;
t_choices_max
:
text_file
workflow
;
t_choices_plot
:
text_file
workflow
;
}
}
type
simu_infos
=
{
simu_infos
:
text_file
workflow
option
;
model_prefix
:
string
;
tree_prefix
:
string
;
}
}
type
post_analyses
=
{
t_choices
:
t_choices
option
;
simu_infos_l
:
simu_infos
list
;
simu_infos_plot
:
text_file
workflow
;
}
}
let
is_hyp
~
hyp
(
dataset_results
:
dataset_res
)
=
...
...
@@ -35,7 +35,7 @@ let make_t_choices ~h0_merged_results ~ha_merged_results : post_analyses_dir dir
let
env
=
docker_image
~
account
:
"carinerey"
~
name
:
"r_basics"
~
tag
:
"07162018"
()
in
let
out
=
dest
//
"out"
in
workflow
~
descr
:
"post_analyses.t_choices"
[
docker
env
(
docker
env
(
and_list
[
mkdir_p
dest
;
cmd
"Rscript"
[
...
...
@@ -43,60 +43,60 @@ let make_t_choices ~h0_merged_results ~ha_merged_results : post_analyses_dir dir
opt
"--H0"
dep
h0_merged_results
;
opt
"--Ha"
dep
ha_merged_results
;
opt
"--out "
ident
out
;
];
])
];
])
]
let
make_simu_infos
?
(
descr
=
""
)
?
(
fna_infos
)
~
faa
~
tree_sc
:
text_file
workflow
=
let
env
=
docker_image
~
account
:
"carinerey"
~
name
:
"pcoc"
~
tag
:
"07022018"
()
in
workflow
~
descr
:
(
"post_analyses.simu_infos."
^
descr
)
[
cmd
"python"
~
env
[
file_dump
(
string
Scripts
.
calc_simu_infos
)
;
opt
"--faa"
dep
faa
;
opt
"--tree"
dep
tree_sc
;
option
(
opt
"--fna_infos"
dep
)
fna_infos
;
opt
"--output "
ident
dest
;
file_dump
(
string
Scripts
.
calc_simu_infos
)
;
opt
"--faa"
dep
faa
;
opt
"--tree"
dep
tree_sc
;
option
(
opt
"--fna_infos"
dep
)
fna_infos
;
opt
"--output "
ident
dest
;
];
]
let
group_simu_infos
~
simu_infos_l
:
simu_infos
directory
workflow
=
let
env
=
docker_image
~
account
:
"carinerey"
~
name
:
"r_basics"
~
tag
:
"07162018"
()
in
let
cmd_cp_l
=
List
.
map
simu_infos_l
~
f
:
(
fun
s
->
match
s
.
simu_infos
with
match
s
.
simu_infos
with
|
Some
w
->
[
cmd
"cp"
[
dep
w
;
tmp
//
(
s
.
tree_prefix
^
"."
^
s
.
model_prefix
^
".tsv"
)]]
|
None
->
[]
)
|>
List
.
concat
in
let
out
=
dest
//
"out"
in
workflow
~
descr
:
"post_analyses.plot_simu_infos"
[
docker
env
(
docker
env
(
and_list
([
[
mkdir_p
dest
];
[
mkdir_p
tmp
];
cmd_cp_l
;
[
cmd
"Rscript"
[
file_dump
(
string
Scripts
.
plot_hyp_simu_validation
)
;
opt
"--input_dir"
ident
tmp
;
opt
"--out "
ident
out
;
];]
]
|>
List
.
concat
)
)
[
mkdir_p
dest
];
[
mkdir_p
tmp
];
cmd_cp_l
;
[
cmd
"Rscript"
[
file_dump
(
string
Scripts
.
plot_hyp_simu_validation
)
;
opt
"--input_dir"
ident
tmp
;
opt
"--out "
ident
out
;
];]
]
|>
List
.
concat
)
)
]
let
get_t_choices
~
(
dataset_results_l
:
dataset_res
list
)
:
t_choices
option
=
let
h0_res
=
List
.
find
dataset_results_l
(
is_hyp
~
hyp
:
"H0"
)
in
let
ha_res
=
List
.
find
dataset_results_l
(
is_hyp
~
hyp
:
"HaPCOC"
)
in
match
(
h0_res
,
ha_res
)
with
|
(
Some
h0
,
Some
ha
)
->
let
h0_merged_results
=
h0
.
merged_results
in
let
ha_merged_results
=
ha
.
merged_results
in
let
t_choices_dir
=
make_t_choices
~
h0_merged_results
~
ha_merged_results
in
let
t_choices_max
=
t_choices_dir
/
selector
[
"out.max_per_meth.tsv"
]
in
let
t_choices_complete
=
t_choices_dir
/
selector
[
"out.complete.tsv"
]
in
let
t_choices_plot
=
t_choices_dir
/
selector
[
"out.pdf"
]
in
Some
{
t_choices_max
;
t_choices_complete
;
t_choices_plot
}
|
_
->
None
|
(
Some
h0
,
Some
ha
)
->
let
h0_merged_results
=
h0
.
merged_results
in
let
ha_merged_results
=
ha
.
merged_results
in
let
t_choices_dir
=
make_t_choices
~
h0_merged_results
~
ha_merged_results
in
let
t_choices_max
=
t_choices_dir
/
selector
[
"out.max_per_meth.tsv"
]
in
let
t_choices_complete
=
t_choices_dir
/
selector
[
"out.complete.tsv"
]
in
let
t_choices_plot
=
t_choices_dir
/
selector
[
"out.pdf"
]
in
Some
{
t_choices_max
;
t_choices_complete
;
t_choices_plot
}
|
_
->
None
let
get_simu_infos
~
dataset_results
=
let
model_prefix
=
dataset_results
.
dataset
.
model_prefix
in
...
...
@@ -105,43 +105,43 @@ let get_simu_infos ~dataset_results =
let
tree_sc
=
Tree_dataset
.
tree
ready_dataset
.
tree_dataset
`Detection
in
let
fna_infos
=
ready_dataset
.
fna_infos
in
match
fna_infos
with
|
Some
w
->
Some
(
make_simu_infos
~
descr
:
model_prefix
~
faa
~
tree_sc
~
fna_infos
:
w
)
|
None
->
None
(*make_simu_infos ~faa ~tree_sc*)
|
Some
w
->
Some
(
make_simu_infos
~
descr
:
model_prefix
~
faa
~
tree_sc
~
fna_infos
:
w
)
|
None
->
None
(*make_simu_infos ~faa ~tree_sc*)
let
post_analyses_of_dataset_results_l
~
dataset_results_l
=
let
t_choices
=
get_t_choices
~
dataset_results_l
in
let
simu_infos_l
=
List
.
map
dataset_results_l
~
f
:
(
fun
dataset_results
->
{
simu_infos
=
(
get_simu_infos
~
dataset_results
);
tree_prefix
=
dataset_results
.
tree_prefix
;
model_prefix
=
dataset_results
.
model_prefix
}
)
in
{
simu_infos
=
(
get_simu_infos
~
dataset_results
);
tree_prefix
=
dataset_results
.
tree_prefix
;
model_prefix
=
dataset_results
.
model_prefix
}
)
in
let
simu_infos_plot
=
group_simu_infos
~
simu_infos_l
/
selector
[
"out.pdf"
]
in
{
t_choices
;
simu_infos_l
;
simu_infos_plot
}
let
repo_of_post_analyses
~
prefix
~
post_analyses
=
[
Repo
.[
item
[
prefix
^
".pdf"
]
post_analyses
.
simu_infos_plot
]
|>
Repo
.
shift
"simu_infos"
;
(
match
post_analyses
.
t_choices
with
|
None
->
[]
|
Some
w
->
Repo
.[
item
[
prefix
^
".t_choices.max_mcc_per_meth.tsv"
]
w
.
t_choices_max
;
item
[
prefix
^
".t_choices.complete.tsv"
]
w
.
t_choices_complete
;
item
[
prefix
^
".t_choices.pdf"
]
w
.
t_choices_plot
;
]
|>
Repo
.
shift
"t_choices"
);
(
List
.
map
post_analyses
.
simu_infos_l
~
f
:
(
fun
simu_infos
->
match
simu_infos
.
simu_infos
with
|
None
->
[]
|
Some
w
->
Repo
.[
item
[
prefix
^
"."
^
simu_infos
.
model_prefix
^
".tsv"
]
w
]
|>
Repo
.
shift
"simu_infos"
)
|>
List
.
concat
);
Repo
.[
item
[
prefix
^
".pdf"
]
post_analyses
.
simu_infos_plot
]
|>
Repo
.
shift
"simu_infos"
;
(
match
post_analyses
.
t_choices
with
|
None
->
[]
|
Some
w
->
Repo
.[
item
[
prefix
^
".t_choices.max_mcc_per_meth.tsv"
]
w
.
t_choices_max
;
item
[
prefix
^
".t_choices.complete.tsv"
]
w
.
t_choices_complete
;
item
[
prefix
^
".t_choices.pdf"
]
w
.
t_choices_plot
;
]
|>
Repo
.
shift
"t_choices"
);
(
List
.
map
post_analyses
.
simu_infos_l
~
f
:
(
fun
simu_infos
->
match
simu_infos
.
simu_infos
with
|
None
->
[]
|
Some
w
->
Repo
.[
item
[
prefix
^
"."
^
simu_infos
.
model_prefix
^
".tsv"
]
w
]
|>
Repo
.
shift
"simu_infos"