Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
705914d1
Commit
705914d1
authored
Aug 27, 2018
by
Carine Rey
Browse files
create a fixed seed by dataset
parent
110b7fe9
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
41 additions
and
24 deletions
+41
-24
app/jbuild
app/jbuild
+1
-1
lib/dataset.ml
lib/dataset.ml
+1
-0
lib/pipeline.ml
lib/pipeline.ml
+39
-23
No files found.
app/jbuild
View file @
705914d1
...
...
@@ -3,5 +3,5 @@
(executable
((name reviewphiltrans_app)
(public_name reviewphiltrans)
(libraries (reviewphiltrans))
(libraries (
str
reviewphiltrans))
))
lib/dataset.ml
View file @
705914d1
...
...
@@ -6,6 +6,7 @@ type t = {
tree_prefix
:
string
;
is_real
:
bool
;
dataset
:
Ready_dataset
.
t
;
seed
:
int
;
}
let
repo
~
preview
dataset_l
=
...
...
lib/pipeline.ml
View file @
705914d1
...
...
@@ -7,7 +7,7 @@ open Defs
open
Convergence_detection
open
Profile
let
parse_input_data
indir
=
let
parse_input_data
~
seed
indir
=
let
datasets
=
Array
.
to_list
@@
Sys
.
readdir
indir
in
List
.
map
datasets
~
f
:
(
fun
dataset_prefix
->
let
files
=
Array
.
to_list
@@
Sys
.
readdir
(
Filename
.
concat
indir
dataset_prefix
)
in
...
...
@@ -37,7 +37,8 @@ let parse_input_data indir =
let
dataset
=
{
Dataset
.
model_prefix
=
tree_prefix
;
is_real
=
true
;
tree_prefix
=
dataset_prefix
;
dataset
=
Ready_dataset
.
of_raw
~
descr
:
(
"real_data."
^
tree_prefix
)
raw_dataset
dataset
=
Ready_dataset
.
of_raw
~
descr
:
(
"real_data."
^
tree_prefix
)
raw_dataset
;
seed
;
}
in
[
dataset
]
else
...
...
@@ -45,7 +46,18 @@ let parse_input_data indir =
)
|>
List
.
concat
let
derive_from_model
~
model
~
input_tree
~
tree_dataset
~
tree_prefix
~
profile
~
preview
~
ns
=
let
calc_fixed_seed
~
(
str
:
string
)
(
seed
:
int
)
:
int
=
let
str_digest
=
Md5
.
to_hex
(
Md5
.
digest_string
str
)
in
let
id
=
"1"
^
(
Str
.
global_replace
(
Str
.
regexp
"[^0-9]+"
)
""
str_digest
)
in
let
id
=
String
.
sub
id
0
10
in
let
id_int
=
float_of_int
(
int_of_string
id
)
in
let
id_len
=
float_of_int
(
String
.
length
id
)
in
let
seed_f
=
float_of_int
seed
in
let
size_str_id
=
10
.
**
id_len
in
let
res
=
Float
.
abs
(
Pervasives
.(
seed_f
*.
id_int
/.
size_str_id
+.
1
.
))
in
int_of_float
(
res
)
let
derive_from_model
~
model
~
input_tree
~
tree_dataset
~
tree_prefix
~
profile
~
preview
~
ns
~
seed
=
let
model_prefix
=
Convergence_hypothesis
.
string_of_model
model
in
let
nb_sites
=
ns
in
let
nodes
=
Tree_dataset
.
nodes
tree_dataset
model
in
...
...
@@ -110,7 +122,8 @@ let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile ~pr
in
let
profile_f
=
profile
.
profile_f
in
let
profile_c
=
profile
.
profile_c
in
let
seed
=
Random
.
int
Int
.
max_value
in
(*let seed = Random.int Int.max_value in*)
let
seed
=
calc_fixed_seed
~
str
:
descr
seed
in
printf
"Bppseqgen seed: %s %s %i
\n
"
model_prefix
tree_prefix
seed
;
let
run_fna
=
Bppsuite
.
bppseqgen_multi_profiles
~
descr
~
nb_sites
~
tree
~
config
:
config_p
~
profile_f
~
profile_c
~
ne_c
~
ne_a
~
seed
in
let
fna
=
Bppsuite
.
bppseqgen_multi_profiles_get_fa
run_fna
in
...
...
@@ -118,15 +131,15 @@ let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile ~pr
let
faa
=
Bppsuite
.
fna2faa
~
fna
in
let
ready_dataset
=
{
Ready_dataset
.
input_tree
=
input_tree
;
tree_dataset
;
fna
;
faa
;
fna_infos
}
in
{
Dataset
.
model_prefix
;
is_real
=
false
;
tree_prefix
;
dataset
=
ready_dataset
}
{
Dataset
.
model_prefix
;
is_real
=
false
;
tree_prefix
;
dataset
=
ready_dataset
;
seed
}
let
derive_from_tree
~
tree_dir
~
tree
~
profile
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
=
let
derive_from_tree
~
tree_dir
~
tree
~
profile
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
~
seed
=
let
tree_prefix
=
Filename
.
chop_extension
tree
in
let
input_tree
=
input
(
Filename
.
concat
tree_dir
tree
)
in
let
tree_dataset
=
Tree_dataset
.
prepare
~
descr
:
(
"simulated_data."
^
tree_prefix
)
input_tree
in
let
ready_dataset_H0_NeG5
=
derive_from_model
~
model
:
H0_NeG5
~
input_tree
~
tree_dataset
~
tree_prefix
~
profile
~
preview
~
ns
in
let
ready_dataset_HaPCOC
=
derive_from_model
~
model
:
HaPCOC
~
input_tree
~
tree_dataset
~
tree_prefix
~
profile
~
preview
~
ns
in
let
ready_dataset_HaPC_NeG5
=
derive_from_model
~
model
:
HaPC_NeG5
~
input_tree
~
tree_dataset
~
tree_prefix
~
profile
~
preview
~
ns
in
let
ready_dataset_H0_NeG5
=
derive_from_model
~
model
:
H0_NeG5
~
input_tree
~
tree_dataset
~
tree_prefix
~
profile
~
preview
~
ns
~
seed
in
let
ready_dataset_HaPCOC
=
derive_from_model
~
model
:
HaPCOC
~
input_tree
~
tree_dataset
~
tree_prefix
~
profile
~
preview
~
ns
~
seed
in
let
ready_dataset_HaPC_NeG5
=
derive_from_model
~
model
:
HaPC_NeG5
~
input_tree
~
tree_dataset
~
tree_prefix
~
profile
~
preview
~
ns
~
seed
in
let
ready_dataset_basis_hyps
=
[
ready_dataset_H0_NeG5
;
ready_dataset_HaPCOC
;
ready_dataset_HaPC_NeG5
]
in
let
models
=
Convergence_hypothesis
.[
[
...
...
@@ -165,16 +178,16 @@ let derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~ns ~no_Ne ~n
]
|>
List
.
concat
in
let
dataset_per_hypo
=
List
.
map
models
~
f
:
(
fun
model
->
derive_from_model
~
model
~
input_tree
~
tree_dataset
~
tree_prefix
~
profile
~
preview
~
ns
derive_from_model
~
model
~
input_tree
~
tree_dataset
~
tree_prefix
~
profile
~
preview
~
ns
~
seed
)
in
let
_concat_H0HaPCOC
=
{
Dataset
.
model_prefix
=
"H0_NeG5+HaPCOC"
;
tree_prefix
;
is_real
=
false
;
dataset
=
Ready_dataset
.
paste
ready_dataset_H0_NeG5
.
dataset
ready_dataset_HaPCOC
.
dataset
}
in
let
concat_H0HaPC
=
{
Dataset
.
model_prefix
=
"H0_NeG5+HaPC_NeG5"
;
tree_prefix
;
is_real
=
false
;
dataset
=
Ready_dataset
.
paste
ready_dataset_H0_NeG5
.
dataset
ready_dataset_HaPC_NeG5
.
dataset
}
in
let
_concat_H0HaPCOC
=
{
Dataset
.
model_prefix
=
"H0_NeG5+HaPCOC"
;
tree_prefix
;
is_real
=
false
;
dataset
=
Ready_dataset
.
paste
ready_dataset_H0_NeG5
.
dataset
ready_dataset_HaPCOC
.
dataset
;
seed
}
in
let
concat_H0HaPC
=
{
Dataset
.
model_prefix
=
"H0_NeG5+HaPC_NeG5"
;
tree_prefix
;
is_real
=
false
;
dataset
=
Ready_dataset
.
paste
ready_dataset_H0_NeG5
.
dataset
ready_dataset_HaPC_NeG5
.
dataset
;
seed
}
in
let
dataset_concat_hypos
=
if
use_concat
then
[
concat_H0HaPC
;]
else
[]
in
List
.
concat
[
ready_dataset_basis_hyps
;
dataset_per_hypo
;
dataset_concat_hypos
]
let
derive_sim
~
tree_dir
~
trees
~
profile
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
=
let
derive_sim
~
tree_dir
~
trees
~
profile
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
~
seed
=
List
.
map
trees
~
f
:
(
fun
tree
->
derive_from_tree
~
tree_dir
~
tree
~
profile
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
)
derive_from_tree
~
tree_dir
~
tree
~
profile
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
~
seed
)
|>
List
.
concat
...
...
@@ -244,12 +257,13 @@ let derive_from_det_meth ~det_meth ~(dataset : Dataset.t) ~preview =
let
tree_conv
=
Tree_dataset
.
topological_tree
dataset
.
dataset
.
tree_dataset
in
let
w_every
=
if
preview
then
1
else
1
in
let
n_cycles
=
if
preview
then
10
else
2000
in
let
seed
=
dataset
.
seed
+
10
in
match
det_meth
with
|
`Pcoc
->
`Pcoc
(
Pcoc
.
pcoc
~
catx_est
:
10
~
plot_complete
:
true
~
gamma
:
false
~
faa
~
tree
:
tree_sc
)
|
`Pcoc_gamma
->
`Pcoc_gamma
(
Pcoc
.
pcoc
~
catx_est
:
10
~
plot_complete
:
true
~
gamma
:
true
~
faa
~
tree
:
tree_sc
)
|
`Pcoc_C60
->
`Pcoc_C60
(
Pcoc
.
pcoc
~
catx_est
:
60
~
plot_complete
:
true
~
gamma
:
false
~
faa
~
tree
:
tree_sc
)
|
`Tdg09
->
`Tdg09
(
Tamuri
.
tdg09
~
faa
~
tree
:
tree_sc
)
|
`Diffsel
->
`Diffsel
(
Diffsel
.
diffsel
~
phy_n
~
tree
:
diffsel_tree
~
w_every
~
n_cycles
~
id
:
1
~
tag
:
"master_a4b5"
~
seed
:
(
Random
.
int
Int
.
max_value
)
()
)
|
`Diffsel
->
`Diffsel
(
Diffsel
.
diffsel
~
phy_n
~
tree
:
diffsel_tree
~
w_every
~
n_cycles
~
id
:
1
~
tag
:
"master_a4b5"
~
seed
()
)
|
`Identical_LG
->
`Identical_LG
(
Identical
.
identical
~
faa
~
tree_id
~
tree_sc
~
prot_model
:
"LG08"
)
|
`Identical_WAG
->
`Identical_WAG
(
Identical
.
identical
~
faa
~
tree_id
~
tree_sc
~
prot_model
:
"WAG01"
)
|
`Topological_LG
->
`Topological_LG
(
Topological
.
topological
~
faa
~
tree
:
tree_id
~
tree_conv
~
prot_model
:
"LG08"
)
...
...
@@ -305,7 +319,7 @@ let derive_profile ?(indir = "") ?(ns = 0) ~preview ~fast_mode ~no_Ne ~ne_test ~
let
trees
=
[
tree
]
in
let
tree_prefix
=
Filename
.
chop_extension
tree
in
let
dataset_l
=
derive_sim
~
tree_dir
~
trees
~
profile
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
in
derive_sim
~
tree_dir
~
trees
~
profile
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
~
seed
in
let
dataset_results_l
=
if
only_simu
then
[]
...
...
@@ -357,17 +371,17 @@ let logger =
time_logger
#
logger
;
]
let
detection_main
~
outdir
~
indir
?
(
np
=
2
)
?
(
mem
=
2
)
~
preview
~
fast_mode
()
=
let
dataset_l
=
parse_input_data
indir
in
let
detection_main
~
outdir
~
indir
?
(
np
=
2
)
?
(
mem
=
2
)
~
preview
~
fast_mode
?
(
seed
=
Random
.
int
Int
.
max_value
)
()
=
let
dataset_l
=
parse_input_data
~
seed
indir
in
let
dataset_results_l
=
derive_det
~
dataset_l
~
preview
~
fast_mode
in
let
repo
=
repo_of_dataset_results_l
~
dataset_results_l
in
Repo
.
build
~
outdir
~
np
~
mem
:
(
`GB
mem
)
~
logger
repo
let
simulation_main
~
outdir
?
(
ns
=
0
)
?
(
np
=
2
)
?
(
mem
=
2
)
~
tree_dir
~
profile_fn
~
preview
~
use_concat
~
no_Ne
~
no_HaPC
~
seed
()
=
let
simulation_main
~
outdir
?
(
ns
=
0
)
?
(
np
=
2
)
?
(
mem
=
2
)
~
tree_dir
~
profile_fn
~
preview
~
use_concat
~
no_Ne
~
no_HaPC
?
(
seed
=
Random
.
int
Int
.
max_value
)
()
=
let
nb_sites
=
if
ns
=
0
then
(
if
preview
then
20
else
50
)
else
ns
in
let
profile
=
Profile
.
profile_l_of_splitted_profile
~
nb_cat
:
1
~
nb_sites
profile_fn
in
let
trees
=
Array
.
to_list
@@
Sys
.
readdir
tree_dir
in
let
dataset_l
=
derive_sim
~
tree_dir
~
trees
~
profile
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
:
false
in
let
dataset_l
=
derive_sim
~
tree_dir
~
trees
~
profile
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
~
ne_test
:
false
~
seed
in
let
repo
=
Dataset
.
repo
dataset_l
~
preview
in
Repo
.
build
~
outdir
~
np
~
mem
:
(
`GB
mem
)
~
logger
repo
...
...
@@ -379,7 +393,7 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe
let
profile
=
Profile
.
profile_l_of_splitted_profile
~
nb_cat
:
3
~
nb_sites
profile_fn
in
let
sim_repo_l
=
derive_profile
~
indir
~
ns
~
preview
~
fast_mode
~
no_Ne
~
ne_test
~
no_HaPC
~
tree_dir
~
profile
~
use_concat
~
only_simu
~
seed
()
in
(* real trees *)
let
indir_dataset_l
=
if
indir
=
""
then
[]
else
parse_input_data
indir
in
let
indir_dataset_l
=
if
indir
=
""
then
[]
else
parse_input_data
~
seed
indir
in
let
dataset_l
=
indir_dataset_l
in
let
dataset_results_l
=
if
only_simu
then
...
...
@@ -424,7 +438,7 @@ let simulation_command =
and
seed
=
flag
"--seed"
(
optional
int
)
~
doc
:
"INT Global seed"
in
simulation_main
~
outdir
?
ns
?
np
?
mem
~
no_Ne
~
no_HaPC
~
tree_dir
~
profile_fn
~
preview
~
use_concat
~
seed
simulation_main
~
outdir
?
ns
?
np
?
mem
~
no_Ne
~
no_HaPC
~
tree_dir
~
profile_fn
~
preview
~
use_concat
?
seed
]
let
detection_command
=
...
...
@@ -444,8 +458,10 @@ let detection_command =
flag
"--np"
(
optional
int
)
~
doc
:
"INT Number of available processors"
and
mem
=
flag
"--mem"
(
optional
int
)
~
doc
:
"INT Available memory (in GB)"
and
seed
=
flag
"--seed"
(
optional
int
)
~
doc
:
"INT Global seed"
in
detection_main
~
outdir
~
indir
?
np
?
mem
~
preview
~
fast_mode
detection_main
~
outdir
~
indir
?
np
?
mem
~
preview
~
fast_mode
?
seed
]
let
validation_command
=
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment