Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
2ac8dfe8
Commit
2ac8dfe8
authored
Jun 27, 2018
by
Philippe Veber
Browse files
put each dataset variant in its own module
parent
186f64bc
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
85 additions
and
84 deletions
+85
-84
lib/convergence_detection.ml
lib/convergence_detection.ml
+1
-1
lib/convergence_detection.mli
lib/convergence_detection.mli
+1
-1
lib/dataset.ml
lib/dataset.ml
+24
-0
lib/defs.ml
lib/defs.ml
+0
-23
lib/pipeline.ml
lib/pipeline.ml
+10
-59
lib/raw_dataset.ml
lib/raw_dataset.ml
+15
-0
lib/ready_dataset.ml
lib/ready_dataset.ml
+34
-0
No files found.
lib/convergence_detection.ml
View file @
2ac8dfe8
...
...
@@ -13,7 +13,7 @@ type det_out =
|
Diffsel_out
type
det_result
=
{
dataset
:
d
ataset
;
dataset
:
D
ataset
.
t
;
det_meth
:
det_meth
;
det_result
:
det_out
directory
workflow
;
}
...
...
lib/convergence_detection.mli
View file @
2ac8dfe8
...
...
@@ -13,7 +13,7 @@ type det_out =
|
Diffsel_out
type
det_result
=
{
dataset
:
d
ataset
;
dataset
:
D
ataset
.
t
;
det_meth
:
det_meth
;
det_result
:
det_out
directory
workflow
;
}
...
...
lib/dataset.ml
0 → 100644
View file @
2ac8dfe8
open
Core
open
Bistro_utils
type
t
=
{
model_prefix
:
string
;
tree_prefix
:
string
;
dataset
:
Ready_dataset
.
t
;
}
let
repo
~
preview
dataset_l
=
List
.
map
dataset_l
~
f
:
(
fun
dataset
->
let
model_prefix
=
dataset
.
model_prefix
in
let
tree_prefix
=
dataset
.
tree_prefix
in
let
repo
=
if
preview
then
Ready_dataset
.
repo
dataset
.
dataset
else
Raw_dataset
.
repo
(
Ready_dataset
.
to_raw
dataset
.
dataset
)
in
Repo
.
shift
tree_prefix
(
Repo
.
shift
model_prefix
repo
)
)
|>
List
.
concat
lib/defs.ml
View file @
2ac8dfe8
...
...
@@ -10,29 +10,6 @@ type output_parse_input_tree =
|
Tree4simu
|
Tree_diffsel
type
raw_dataset
=
{
input_tree
:
nhx
workflow
;
fna
:
nucleotide_fasta
workflow
;
}
type
ready_dataset
=
{
input_tree
:
nhx
workflow
;
tree_dataset
:
[
`tree_dataset
]
directory
workflow
;
fna
:
nucleotide_fasta
workflow
;
faa
:
aminoacid_fasta
workflow
;
}
type
(
'
a
,
'
b
)
w_dataset
=
|
Raw_dataset
of
'
a
|
Ready_dataset
of
'
b
type
dataset
=
{
model_prefix
:
string
;
tree_prefix
:
string
;
ready_dataset
:
ready_dataset
}
type
det_meth
=
|
Pcoc
|
Pcoc_gamma
...
...
lib/pipeline.ml
View file @
2ac8dfe8
...
...
@@ -6,55 +6,6 @@ open File_formats
open
Defs
open
Convergence_detection
let
ready_dataset_of_raw_dataset
raw_dataset
=
let
input_tree
=
raw_dataset
.
input_tree
in
let
fna
=
raw_dataset
.
fna
in
let
tree_dataset
=
Tree_dataset
.
prepare
input_tree
in
let
faa
=
Bppsuite
.
fna2faa
~
fna
in
{
input_tree
;
tree_dataset
;
fna
;
faa
}
let
raw_dataset_of_ready_dataset
ready_dataset
=
let
input_tree
=
ready_dataset
.
input_tree
in
let
fna
=
ready_dataset
.
fna
in
{
input_tree
;
fna
}
let
repo_of_ready_dataset
(
rd
:
ready_dataset
)
=
Repo
.[
item
[
"input_tree.nhx"
]
rd
.
input_tree
;
item
[
"tree.H0.node_ids"
]
(
Tree_dataset
.
nodes
rd
.
tree_dataset
H0
)
;
item
[
"tree.Ha.node_ids"
]
(
Tree_dataset
.
nodes
rd
.
tree_dataset
Ha
)
;
item
[
"tree.only_convergent_tags.nhx"
]
(
Tree_dataset
.
tree
rd
.
tree_dataset
`Detection
)
;
item
[
"tree.only_node_ids.nhx"
]
(
Tree_dataset
.
tree
rd
.
tree_dataset
`Simulation
)
;
item
[
"tree.diffsel"
]
(
Tree_dataset
.
diffsel_tree
rd
.
tree_dataset
)
;
item
[
"simulated_sequences.fna"
]
rd
.
fna
;
item
[
"simulated_sequences.faa"
]
rd
.
faa
;
]
|>
Repo
.
shift
"ready_dataset"
let
repo_of_raw_dataset
(
raw_dataset
:
raw_dataset
)
=
Repo
.[
item
[
"input_tree.nhx"
]
raw_dataset
.
input_tree
;
item
[
"simulated_sequences.fna"
]
raw_dataset
.
fna
;
]
|>
Repo
.
shift
"raw_dataset"
let
repo_of_dataset_l
~
preview
dataset_l
=
List
.
map
dataset_l
~
f
:
(
fun
(
dataset
:
Defs
.
dataset
)
->
let
model_prefix
=
dataset
.
model_prefix
in
let
tree_prefix
=
dataset
.
tree_prefix
in
let
w_dataset
=
if
preview
then
Ready_dataset
dataset
.
ready_dataset
else
Raw_dataset
(
raw_dataset_of_ready_dataset
dataset
.
ready_dataset
)
in
let
repo_d
=
match
w_dataset
with
|
Ready_dataset
d
->
repo_of_ready_dataset
d
|
Raw_dataset
d
->
repo_of_raw_dataset
d
in
Repo
.
shift
tree_prefix
(
Repo
.
shift
model_prefix
repo_d
)
)
|>
List
.
concat
let
derive_from_model
~
model
~
tree
~
tree_dataset
~
tree_prefix
~
profile_f
~
preview
=
let
model_prefix
=
Convergence_hypothesis
.
string_of_model
model
in
let
nb_sites
=
if
preview
then
20
else
1000
in
...
...
@@ -70,8 +21,8 @@ let derive_from_model ~model ~tree ~tree_dataset ~tree_prefix ~profile_f ~previe
let
fna
=
Bppsuite
.
bppseqgen_multi_profiles
~
descr
~
nb_sites
~
tree
~
config
:
config_p
~
profile_f
in
let
faa
=
Bppsuite
.
fna2faa
~
fna
in
let
ready_dataset
=
{
input_tree
=
tree
;
tree_dataset
;
fna
;
faa
}
in
{
model_prefix
;
tree_prefix
;
ready_dataset
}
let
ready_dataset
=
{
Ready_dataset
.
input_tree
=
tree
;
tree_dataset
;
fna
;
faa
}
in
{
Dataset
.
model_prefix
;
tree_prefix
;
dataset
=
ready_dataset
}
let
derive_from_tree
~
tree_dir
~
tree
~
profile_f
~
preview
=
let
tree_prefix
=
Filename
.
chop_extension
tree
in
...
...
@@ -126,12 +77,12 @@ let repo_of_dataset_results_l ~dataset_results_l =
)
|>
List
.
concat
let
derive_from_det_meth
~
det_meth
~
dataset
~
preview
=
let
faa
=
dataset
.
ready_
dataset
.
faa
in
let
fna
=
dataset
.
ready_
dataset
.
fna
in
let
derive_from_det_meth
~
det_meth
~
(
dataset
:
Dataset
.
t
)
~
preview
=
let
faa
=
dataset
.
dataset
.
faa
in
let
fna
=
dataset
.
dataset
.
fna
in
let
phy_n
=
Bppsuite
.
fa2phy
~
fna
in
let
pcoc_tree
=
Tree_dataset
.
tree
dataset
.
ready_
dataset
.
tree_dataset
`Detection
in
let
diffsel_tree
=
Tree_dataset
.
diffsel_tree
dataset
.
ready_
dataset
.
tree_dataset
in
let
pcoc_tree
=
Tree_dataset
.
tree
dataset
.
dataset
.
tree_dataset
`Detection
in
let
diffsel_tree
=
Tree_dataset
.
diffsel_tree
dataset
.
dataset
.
tree_dataset
in
let
w_every
=
if
preview
then
1
else
10
in
let
n_cycles
=
if
preview
then
100
else
1000
in
let
det_result
=
match
det_meth
with
...
...
@@ -151,8 +102,8 @@ let derive_from_dataset ~dataset ~preview =
)
in
let
merged_results
=
merge_results
~
res_by_tools
in
let
tsv
=
merged_results
in
let
faa
=
dataset
.
ready_
dataset
.
faa
in
let
tree
=
Tree_dataset
.
tree
dataset
.
ready_
dataset
.
tree_dataset
`Detection
in
let
faa
=
dataset
.
dataset
.
faa
in
let
tree
=
Tree_dataset
.
tree
dataset
.
dataset
.
tree_dataset
`Detection
in
let
plot_merged_results
=
plot_merge_results
~
res_by_tools
~
tsv
~
faa
~
tree
in
let
model_prefix
=
dataset
.
model_prefix
in
let
tree_prefix
=
dataset
.
tree_prefix
in
...
...
@@ -172,7 +123,7 @@ let main ~outdir ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn ~preview () =
let
dataset_l
=
derive_sim
~
tree_dir
~
trees
~
profile_fn
~
preview
in
let
dataset_results_l
=
derive_det
~
dataset_l
~
profile_fn
~
preview
in
let
repo
=
[
repo_of_d
ataset
_l
dataset_l
~
preview
;
D
ataset
.
repo
dataset_l
~
preview
;
repo_of_dataset_results_l
~
dataset_results_l
;
]
|>
List
.
concat
...
...
lib/raw_dataset.ml
0 → 100644
View file @
2ac8dfe8
open
Bistro
.
Std
open
File_formats
open
Bistro_utils
type
t
=
{
input_tree
:
nhx
workflow
;
fna
:
nucleotide_fasta
workflow
;
}
let
repo
rd
=
Repo
.[
item
[
"input_tree.nhx"
]
rd
.
input_tree
;
item
[
"simulated_sequences.fna"
]
rd
.
fna
;
]
|>
Repo
.
shift
"raw_dataset"
lib/ready_dataset.ml
0 → 100644
View file @
2ac8dfe8
open
Bistro
.
Std
open
Bistro_utils
open
File_formats
type
t
=
{
input_tree
:
nhx
workflow
;
tree_dataset
:
[
`tree_dataset
]
directory
workflow
;
fna
:
nucleotide_fasta
workflow
;
faa
:
aminoacid_fasta
workflow
;
}
let
of_raw
(
raw_dataset
:
Raw_dataset
.
t
)
=
let
input_tree
=
raw_dataset
.
input_tree
in
let
fna
=
raw_dataset
.
fna
in
let
tree_dataset
=
Tree_dataset
.
prepare
input_tree
in
let
faa
=
Bppsuite
.
fna2faa
~
fna
in
{
input_tree
;
tree_dataset
;
fna
;
faa
}
let
repo
rd
=
Repo
.[
item
[
"input_tree.nhx"
]
rd
.
input_tree
;
item
[
"tree.H0.node_ids"
]
(
Tree_dataset
.
nodes
rd
.
tree_dataset
H0
)
;
item
[
"tree.Ha.node_ids"
]
(
Tree_dataset
.
nodes
rd
.
tree_dataset
Ha
)
;
item
[
"tree.only_convergent_tags.nhx"
]
(
Tree_dataset
.
tree
rd
.
tree_dataset
`Detection
)
;
item
[
"tree.only_node_ids.nhx"
]
(
Tree_dataset
.
tree
rd
.
tree_dataset
`Simulation
)
;
item
[
"tree.diffsel"
]
(
Tree_dataset
.
diffsel_tree
rd
.
tree_dataset
)
;
item
[
"simulated_sequences.fna"
]
rd
.
fna
;
item
[
"simulated_sequences.faa"
]
rd
.
faa
;
]
|>
Repo
.
shift
"ready_dataset"
let
to_raw
{
input_tree
;
fna
}
=
{
Raw_dataset
.
input_tree
;
fna
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment