Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
2a8d34e1
Commit
2a8d34e1
authored
Jul 25, 2018
by
Carine Rey
Browse files
add type profile + split profile_fn in 3 classes at the beginning of the pipeline
parent
525b1d4f
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
107 additions
and
48 deletions
+107
-48
lib/bppsuite.ml
lib/bppsuite.ml
+7
-7
lib/bppsuite.mli
lib/bppsuite.mli
+1
-1
lib/convergence_detection.ml
lib/convergence_detection.ml
+1
-1
lib/pipeline.ml
lib/pipeline.ml
+49
-35
lib/post_analyses.ml
lib/post_analyses.ml
+4
-4
lib/profile.ml
lib/profile.ml
+45
-0
No files found.
lib/bppsuite.ml
View file @
2a8d34e1
...
...
@@ -46,11 +46,11 @@ let bppseqgen ?(descr="") ~nb_sites ~tree ~config : nucleotide_fasta workflow =
)
]
/
selector
[
"seq.fa"
]
let
conf_file_bppseqgen_multi_profiles
~
tree
~
profile_
f
~
ne_c
~
ne_a
~
config
~
nb_sites_per_profile
=
let
conf_file_bppseqgen_multi_profiles
~
tree
~
profile_
w
~
ne_c
~
ne_a
~
config
~
nb_sites_per_profile
=
seq
~
sep
:
"
\n
"
(
[
assign
"input.tree.file"
(
dep
tree
)
;
assign
"PROFILE_F"
(
dep
profile_
f
)
;
assign
"PROFILE_F"
(
dep
profile_
w
)
;
assign
"number_of_sites"
(
int
nb_sites_per_profile
)
;
assign
"NE_1"
(
float
ne_a
)
;
assign
"NE_C"
(
float
ne_c
)
;
...
...
@@ -59,11 +59,11 @@ let conf_file_bppseqgen_multi_profiles ~tree ~profile_f ~ne_c ~ne_a ~config ~nb_
@
config
)
let
bppseqgen_multi_profiles_script
~
config
~
nb_combis
~
out
~
profile_
f
=
let
bppseqgen_multi_profiles_script
~
config
~
nb_combis
~
out
~
profile_
w
=
let
vars
=
[
"FINAL_OUT"
,
ident
out
;
"PARAM"
,
config
;
"PROFILE_F"
,
dep
profile_
f
;
"PROFILE_F"
,
dep
profile_
w
;
"NB_COMBI_PROFILES"
,
int
nb_combis
;
]
in
...
...
@@ -91,7 +91,7 @@ let bppseqgen_multi_profiles_script ~config ~nb_combis ~out ~profile_f =
|
}
let
bppseqgen_multi_profiles
?
(
descr
=
""
)
~
profile_
f
~
nb_sites
~
tree
~
config
~
ne_c
~
ne_a
:
bppseqgen_multi_profiles
directory
workflow
=
let
bppseqgen_multi_profiles
?
(
descr
=
""
)
~
profile_
w
~
nb_sites
~
tree
~
config
~
ne_c
~
ne_a
:
bppseqgen_multi_profiles
directory
workflow
=
let
nb_sites_per_profile
=
1
in
let
nb_combis
=
Pervasives
.(
nb_sites
/
nb_sites_per_profile
)
in
let
config_f
=
dest
//
"config.bpp"
in
...
...
@@ -102,8 +102,8 @@ let bppseqgen_multi_profiles ?(descr="") ~profile_f ~nb_sites ~tree ~config ~ne_
mkdir_p
dest
;
mkdir_p
tmp
;
cd
tmp
;
cmd
"cat"
~
stdout
:
config_f
[(
file_dump
(
conf_file_bppseqgen_multi_profiles
~
tree
~
profile_
f
~
config
~
ne_c
~
ne_a
~
nb_sites_per_profile
))];
cmd
"bash"
[(
file_dump
(
bppseqgen_multi_profiles_script
~
config
:
config_f
~
nb_combis
~
out
~
profile_
f
))];
cmd
"cat"
~
stdout
:
config_f
[(
file_dump
(
conf_file_bppseqgen_multi_profiles
~
tree
~
profile_
w
~
config
~
ne_c
~
ne_a
~
nb_sites_per_profile
))];
cmd
"bash"
[(
file_dump
(
bppseqgen_multi_profiles_script
~
config
:
config_f
~
nb_combis
~
out
~
profile_
w
))];
]
)
]
...
...
lib/bppsuite.mli
View file @
2a8d34e1
...
...
@@ -13,7 +13,7 @@ val bppseqgen :
val
bppseqgen_multi_profiles
:
?
descr
:
string
->
profile_
f
:
_
workflow
->
profile_
w
:
_
workflow
->
nb_sites
:
int
->
tree
:
nhx
workflow
->
config
:
Bistro
.
Template
.
t
list
->
...
...
lib/convergence_detection.ml
View file @
2a8d34e1
...
...
@@ -42,7 +42,7 @@ type dataset_res = {
}
let
merge_results
~
res_by_tools
:
text_file
workflow
=
let
env
=
docker_image
~
account
:
"carinerey"
~
name
:
"python_basics"
~
tag
:
"07
18
2018"
()
in
let
env
=
docker_image
~
account
:
"carinerey"
~
name
:
"python_basics"
~
tag
:
"07
25
2018"
()
in
let
command
=
List
.
map
res_by_tools
~
f
:
(
fun
res
->
let
w
=
match
res
with
|
`Pcoc
d
->
Pcoc
.
results
d
...
...
lib/pipeline.ml
View file @
2a8d34e1
...
...
@@ -5,6 +5,7 @@ open Bistro.Std
open
File_formats
open
Defs
open
Convergence_detection
open
Profile
let
parse_input_data
indir
=
let
datasets
=
Array
.
to_list
@@
Sys
.
readdir
indir
in
...
...
@@ -44,7 +45,7 @@ let parse_input_data indir =
)
|>
List
.
concat
let
derive_from_model
~
model
~
input_tree
~
tree_dataset
~
tree_prefix
~
profile
_f
~
preview
~
ns
=
let
derive_from_model
~
model
~
input_tree
~
tree_dataset
~
tree_prefix
~
profile
~
preview
~
ns
=
let
model_prefix
=
Convergence_hypothesis
.
string_of_model
model
in
let
nb_sites
=
if
ns
=
0
then
(
if
preview
then
20
else
50
)
else
ns
in
let
nodes
=
Tree_dataset
.
nodes
tree_dataset
model
in
...
...
@@ -73,7 +74,8 @@ let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile_f ~
|
H0_SmallNeInBigNe
->
10
.
|
_
->
1
.
in
let
run_fna
=
Bppsuite
.
bppseqgen_multi_profiles
~
descr
~
nb_sites
~
tree
~
config
:
config_p
~
profile_f
~
ne_c
~
ne_a
in
let
profile_w
=
profile
.
profile_w
in
let
run_fna
=
Bppsuite
.
bppseqgen_multi_profiles
~
descr
~
nb_sites
~
tree
~
config
:
config_p
~
profile_w
~
ne_c
~
ne_a
in
let
fna
=
Bppsuite
.
bppseqgen_multi_profiles_get_fa
run_fna
in
let
fna_infos
=
Some
(
Bppsuite
.
bppseqgen_multi_profiles_get_info
run_fna
)
in
...
...
@@ -81,7 +83,7 @@ let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile_f ~
let
ready_dataset
=
{
Ready_dataset
.
input_tree
=
input_tree
;
tree_dataset
;
fna
;
faa
;
fna_infos
}
in
{
Dataset
.
model_prefix
;
is_real
=
false
;
tree_prefix
;
dataset
=
ready_dataset
}
let
derive_from_tree
~
tree_dir
~
tree
~
profile
_f
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
=
let
derive_from_tree
~
tree_dir
~
tree
~
profile
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
=
let
tree_prefix
=
Filename
.
chop_extension
tree
in
let
input_tree
=
input
(
Filename
.
concat
tree_dir
tree
)
in
let
tree_dataset
=
Tree_dataset
.
prepare
input_tree
in
...
...
@@ -122,18 +124,17 @@ let derive_from_tree ~tree_dir ~tree ~profile_f ~preview ~use_concat ~ns ~no_Ne
]
|>
List
.
concat
in
let
dataset_per_hypo
=
List
.
map
models
~
f
:
(
fun
model
->
derive_from_model
~
model
~
input_tree
~
tree_dataset
~
tree_prefix
~
profile
_f
~
preview
~
ns
derive_from_model
~
model
~
input_tree
~
tree_dataset
~
tree_prefix
~
profile
~
preview
~
ns
)
in
let
ready_dataset_H0
=
(
derive_from_model
~
model
:
H0
~
input_tree
~
tree_dataset
~
tree_prefix
~
profile
_f
~
preview
~
ns
)
.
dataset
in
let
ready_dataset_HaPCOC
=
(
derive_from_model
~
model
:
HaPCOC
~
input_tree
~
tree_dataset
~
tree_prefix
~
profile
_f
~
preview
~
ns
)
.
dataset
in
let
ready_dataset_H0
=
(
derive_from_model
~
model
:
H0
~
input_tree
~
tree_dataset
~
tree_prefix
~
profile
~
preview
~
ns
)
.
dataset
in
let
ready_dataset_HaPCOC
=
(
derive_from_model
~
model
:
HaPCOC
~
input_tree
~
tree_dataset
~
tree_prefix
~
profile
~
preview
~
ns
)
.
dataset
in
let
concat_H0Ha
=
{
Dataset
.
model_prefix
=
"H0+HaPCOC"
;
tree_prefix
;
is_real
=
false
;
dataset
=
Ready_dataset
.
paste
ready_dataset_H0
ready_dataset_HaPCOC
}
in
let
dataset_concat_hypos
=
if
use_concat
then
[
concat_H0Ha
;]
else
[]
in
List
.
concat
[
dataset_per_hypo
;
dataset_concat_hypos
]
let
derive_sim
~
tree_dir
~
trees
~
profile_fn
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
=
let
profile_f
=
input
profile_fn
in
let
derive_sim
~
tree_dir
~
trees
~
profile
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
=
List
.
map
trees
~
f
:
(
fun
tree
->
derive_from_tree
~
tree_dir
~
tree
~
profile
_f
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
)
derive_from_tree
~
tree_dir
~
tree
~
profile
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
)
|>
List
.
concat
...
...
@@ -257,36 +258,16 @@ let derive_det ~dataset_l ~preview ~fast_mode =
List
.
map
dataset_l
~
f
:
(
fun
dataset
->
derive_from_dataset
~
preview
~
dataset
~
fast_mode
)
let
logger
=
Logger
.
tee
[
Console_logger
.
create
()
;
Dot_output
.
create
"dag.dot"
;
(*dot -Tpdf example/dag.dot -o dag.pdf*)
Bistro_utils
.
Html_logger
.
create
"report.html"
;
]
let
detection_main
~
outdir
~
indir
?
(
np
=
2
)
?
(
mem
=
2
)
~
preview
~
fast_mode
()
=
let
dataset_l
=
parse_input_data
indir
in
let
dataset_results_l
=
derive_det
~
dataset_l
~
preview
~
fast_mode
in
let
repo
=
repo_of_dataset_results_l
~
dataset_results_l
in
Repo
.
build
~
outdir
~
np
~
mem
:
(
`GB
mem
)
~
logger
repo
let
simulation_main
~
outdir
?
(
ns
=
0
)
?
(
np
=
2
)
?
(
mem
=
2
)
~
tree_dir
~
profile_fn
~
preview
~
use_concat
~
no_Ne
~
no_HaPC
()
=
let
derive_profile
?
(
indir
=
""
)
?
(
ns
=
0
)
~
preview
~
fast_mode
~
no_Ne
~
ne_test
~
no_HaPC
~
tree_dir
~
profile
~
use_concat
~
only_simu
()
=
let
trees
=
Array
.
to_list
@@
Sys
.
readdir
tree_dir
in
let
dataset_l
=
derive_sim
~
tree_dir
~
trees
~
profile_fn
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
:
false
in
let
repo
=
Dataset
.
repo
dataset_l
~
preview
in
Repo
.
build
~
outdir
~
np
~
mem
:
(
`GB
mem
)
~
logger
repo
let
validation_main
~
outdir
?
(
indir
=
""
)
?
(
ns
=
0
)
?
(
np
=
2
)
?
(
mem
=
2
)
~
preview
~
fast_mode
~
no_Ne
~
ne_test
~
no_HaPC
~
tree_dir
~
profile_fn
~
use_concat
~
only_simu
()
=
(* simulated trees *)
let
trees
=
Array
.
to_list
@@
Sys
.
readdir
tree_dir
in
let
simu_dataset_l
=
derive_sim
~
tree_dir
~
trees
~
profile_fn
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
in
let
simu_dataset_l
=
derive_sim
~
tree_dir
~
trees
~
profile
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
in
let
post_analyses_simu
=
Post_analyses
.
post_analyses_simu_of_simu_dataset_l
~
simu_dataset_l
in
let
repo_of_post_analyses_simu
=
Post_analyses
.
repo_of_post_analyses_simu
~
post_analyses_simu
in
let
repo_and_post_analyses_per_tree_simu
=
List
.
map
trees
~
f
:
(
fun
tree
->
(*to keep together all models per tree*)
let
trees
=
[
tree
]
in
let
tree_prefix
=
Filename
.
chop_extension
tree
in
let
dataset_l
=
derive_sim
~
tree_dir
~
trees
~
profile
_fn
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
in
derive_sim
~
tree_dir
~
trees
~
profile
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
in
let
dataset_results_l
=
if
only_simu
then
[]
...
...
@@ -305,8 +286,41 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe
in
let
all_repo_per_tree_simu
=
List
.
map
repo_and_post_analyses_per_tree_simu
~
f
:
(
fun
(
r
,
p
)
->
r
)
|>
List
.
concat
in
let
all_post_analyses_per_tree
=
List
.
map
repo_and_post_analyses_per_tree_simu
~
f
:
(
fun
(
r
,
p
)
->
p
)
in
let
profile
=
Filename
.
chop_extension
profile_fn
in
let
repo_post_analyses_all_trees
=
Post_analyses
.
repo_post_analyses_all_trees_of_all_post_analyses_per_tree
~
all_post_analyses_per_tree
~
profile
in
let
profile_prefix
=
profile
.
profile_n
in
let
repo_post_analyses_all_trees
=
Post_analyses
.
repo_post_analyses_all_trees_of_all_post_analyses_per_tree
~
all_post_analyses_per_tree
~
profile_prefix
in
let
repo
=
repo_of_post_analyses_simu
@
all_repo_per_tree_simu
@
repo_post_analyses_all_trees
in
Repo
.
shift
profile_prefix
repo
let
logger
=
Logger
.
tee
[
Console_logger
.
create
()
;
Dot_output
.
create
"dag.dot"
;
(*dot -Tpdf example/dag.dot -o dag.pdf*)
Bistro_utils
.
Html_logger
.
create
"report.html"
;
]
let
detection_main
~
outdir
~
indir
?
(
np
=
2
)
?
(
mem
=
2
)
~
preview
~
fast_mode
()
=
let
dataset_l
=
parse_input_data
indir
in
let
dataset_results_l
=
derive_det
~
dataset_l
~
preview
~
fast_mode
in
let
repo
=
repo_of_dataset_results_l
~
dataset_results_l
in
Repo
.
build
~
outdir
~
np
~
mem
:
(
`GB
mem
)
~
logger
repo
let
simulation_main
~
outdir
?
(
ns
=
0
)
?
(
np
=
2
)
?
(
mem
=
2
)
~
tree_dir
~
profile_fn
~
preview
~
use_concat
~
no_Ne
~
no_HaPC
()
=
let
profile_w
=
input
profile_fn
in
let
profile_n
=
Filename
.
chop_extension
profile_fn
in
let
profile
=
{
profile_w
;
profile_n
}
in
let
trees
=
Array
.
to_list
@@
Sys
.
readdir
tree_dir
in
let
dataset_l
=
derive_sim
~
tree_dir
~
trees
~
profile
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
:
false
in
let
repo
=
Dataset
.
repo
dataset_l
~
preview
in
Repo
.
build
~
outdir
~
np
~
mem
:
(
`GB
mem
)
~
logger
repo
let
validation_main
~
outdir
?
(
indir
=
""
)
?
(
ns
=
0
)
?
(
np
=
2
)
?
(
mem
=
2
)
~
preview
~
fast_mode
~
no_Ne
~
ne_test
~
no_HaPC
~
tree_dir
~
profile_fn
~
use_concat
~
only_simu
()
=
(* simulated trees *)
let
profile_l
=
Profile
.
profile_l_of_splitted_profile
(
Profile
.
split_profile
(
input
profile_fn
))
in
let
sim_repo_l
=
List
.
map
profile_l
~
f
:
(
fun
profile
->
derive_profile
~
indir
~
ns
~
preview
~
fast_mode
~
no_Ne
~
ne_test
~
no_HaPC
~
tree_dir
~
profile
~
use_concat
~
only_simu
()
)
|>
List
.
concat
in
(* real trees *)
let
indir_dataset_l
=
if
indir
=
""
then
[]
else
parse_input_data
indir
in
let
dataset_l
=
indir_dataset_l
in
...
...
@@ -321,7 +335,7 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe
repo_of_dataset_results_l
~
dataset_results_l
;
]
|>
List
.
concat
in
let
repo
=
repo_of_post_analyses_simu
@
repo_real_trees
@
all_repo_per_tree_simu
@
repo_post_analyses_al
l_trees
in
let
repo
=
sim_repo_l
@
repo_rea
l_trees
in
Repo
.
build
~
outdir
~
np
~
mem
:
(
`GB
mem
)
~
logger
repo
let
simulation_command
=
...
...
lib/post_analyses.ml
View file @
2a8d34e1
...
...
@@ -163,7 +163,7 @@ let post_analyses_simu_of_simu_dataset_l ~simu_dataset_l =
let
plot_sens_spe_t_choices
~
t_choices_l
~
dataset_results_l
~
profile
:
sens_spe_t_choices_plot
directory
workflow
=
let
plot_sens_spe_t_choices
~
t_choices_l
~
dataset_results_l
~
profile
_prefix
:
sens_spe_t_choices_plot
directory
workflow
=
let
env
=
r_env
in
let
t_choices_dir
=
tmp
//
"t_choices_dir"
in
let
merged_results_dir
=
tmp
//
"merged_results_dir"
in
...
...
@@ -188,7 +188,7 @@ let plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile : sens_spe_
file_dump
(
string
Scripts
.
plot_sens_spe_all_trees
)
;
opt
"--input_dir"
ident
t_choices_dir
;
opt
"--input_dir2"
ident
merged_results_dir
;
opt
"--profil"
string
profile
;
opt
"--profil"
string
profile
_prefix
;
opt
"--out "
ident
out
;
];]
]
...
...
@@ -196,7 +196,7 @@ let plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile : sens_spe_
)
]
let
repo_post_analyses_all_trees_of_all_post_analyses_per_tree
~
profile
~
all_post_analyses_per_tree
=
let
repo_post_analyses_all_trees_of_all_post_analyses_per_tree
~
profile
_prefix
~
all_post_analyses_per_tree
=
let
t_choices_l
=
List
.
map
all_post_analyses_per_tree
~
f
:
(
fun
post_analyses_res
->
match
post_analyses_res
.
t_choices
with
|
Some
w
->
[
w
]
...
...
@@ -206,7 +206,7 @@ let repo_post_analyses_all_trees_of_all_post_analyses_per_tree ~profile ~all_pos
let
dataset_results_l
=
List
.
map
all_post_analyses_per_tree
~
f
:
(
fun
post_analyses_res
->
post_analyses_res
.
dataset_results_l
)
|>
List
.
concat
in
let
sens_spe_t_choices_plot
=
plot_sens_spe_t_choices
~
t_choices_l
~
dataset_results_l
~
profile
in
let
sens_spe_t_choices_plot
=
plot_sens_spe_t_choices
~
t_choices_l
~
dataset_results_l
~
profile
_prefix
in
Repo
.[
item
[
"sens_spe.pdf"
]
(
sens_spe_t_choices_plot
/
selector
[
"out.sens_spe_auto_t.pdf"
]);
item
[
"all_t_choices.pdf"
]
(
sens_spe_t_choices_plot
/
selector
[
"out.max_t_per_tree.pdf"
]);
...
...
lib/profile.ml
0 → 100644
View file @
2a8d34e1
open
Core_kernel
open
Bistro
.
Std
open
Bistro
.
EDSL
open
File_formats
type
profile
=
{
profile_w
:
text_file
workflow
;
profile_n
:
string
;
}
let
split_profile
profile_fn
:
text_file
directory
workflow
=
let
env
=
docker_image
~
account
:
"carinerey"
~
name
:
"python_basics"
~
tag
:
"07252018"
()
in
let
package
=
tmp
//
"diffsel_script_utils.py"
in
let
script
=
tmp
//
"generate_pairs.py"
in
let
prefix
=
dest
//
"profile"
in
workflow
~
descr
:
"parse_profile.split_profile"
[
docker
env
(
and_list
[
mkdir_p
tmp
;
mkdir_p
dest
;
cd
tmp
;
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffsel_script_utils
)
;
package
]
;
cmd
"cp"
[
file_dump
(
string
Scripts
.
generate_pairs
)
;
script
]
;
(*generate_pairs.py [options...] -o <output-prefix> <profiles-file> *)
cmd
"python"
[
string
"generate_pairs.py"
;
opt
"-o"
ident
prefix
;
dep
profile_fn
;
]
]
)
]
let
profile_l_of_splitted_profile
splitted_profile
=
let
p0
=
splitted_profile
/
selector
[
"profile_0.tsv"
]
in
let
p1
=
splitted_profile
/
selector
[
"profile_1.tsv"
]
in
let
p2
=
splitted_profile
/
selector
[
"profile_2.tsv"
]
in
[{
profile_w
=
p0
;
profile_n
=
"p0"
};
{
profile_w
=
p1
;
profile_n
=
"p1"
};
{
profile_w
=
p2
;
profile_n
=
"p2"
};
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment