Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
35a594d0
Commit
35a594d0
authored
Jan 15, 2019
by
Carine Rey
Committed by
Philippe Veber
Sep 20, 2019
Browse files
change realdata inputdir tree
parent
cb149119
Changes
7
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
1993 additions
and
28 deletions
+1993
-28
Makefile
Makefile
+11
-0
example/real_data/besnard2009/Alignments/cyp_coding.fna
example/real_data/besnard2009/Alignments/cyp_coding.fna
+0
-0
example/real_data/besnard2009/Alignments/cyp_coding_bis.fna
example/real_data/besnard2009/Alignments/cyp_coding_bis.fna
+1896
-0
example/real_data/besnard2009/besnard2009.nhx
example/real_data/besnard2009/besnard2009.nhx
+0
-0
lib/dataset.ml
lib/dataset.ml
+22
-9
lib/pipeline.ml
lib/pipeline.ml
+54
-19
lib/raw_dataset.ml
lib/raw_dataset.ml
+10
-0
No files found.
Makefile
View file @
35a594d0
...
@@ -39,6 +39,16 @@ test:
...
@@ -39,6 +39,16 @@ test:
mv
dag.dot dagtest_val.dot
&&
\
mv
dag.dot dagtest_val.dot
&&
\
dot
-Tsvg
dagtest_val.dot
-o
dagtest_val.svg
dot
-Tsvg
dagtest_val.dot
-o
dagtest_val.svg
# -----------------------------------------------------------------------
# Test
# -----------------------------------------------------------------------
.PHONY
:
realdata_test
realdata_test
:
cd
example
&&
\
reviewphiltrans realdata
--outdir
outdir_realdata_test
--indir
real_data
--np
4
--seed
4256073781403810077
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
# big experiments
# big experiments
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
...
@@ -61,6 +71,7 @@ clean:
...
@@ -61,6 +71,7 @@ clean:
clean-test
:
clean-test
:
rm
-rf
example/_bistro
rm
-rf
example/_bistro
rm
-rf
example/outdir_test
rm
-rf
example/outdir_test
rm
-rf
example/outdir_realdata_test
rm
-rf
example/report.log
rm
-rf
example/report.log
rm
-rf
example/dot.dag
rm
-rf
example/dot.dag
...
...
example/real_data/besnard2009/cyp_coding.fna
→
example/real_data/besnard2009/
Alignments/
cyp_coding.fna
View file @
35a594d0
File moved
example/real_data/besnard2009/Alignments/cyp_coding_bis.fna
0 → 100644
View file @
35a594d0
This diff is collapsed.
Click to expand it.
example/real_data/besnard2009/
cyp_coding
.nhx
→
example/real_data/besnard2009/
besnard2009
.nhx
View file @
35a594d0
File moved
lib/dataset.ml
View file @
35a594d0
...
@@ -9,22 +9,35 @@ type t = {
...
@@ -9,22 +9,35 @@ type t = {
seed
:
int
;
seed
:
int
;
}
}
let
repo
dataset_l
=
let
repo
dataset_l
=
List
.
map
dataset_l
~
f
:
(
fun
dataset
->
List
.
map
dataset_l
~
f
:
(
fun
dataset
->
let
model_prefix
=
dataset
.
model_prefix
in
let
model_prefix
=
dataset
.
model_prefix
in
let
tree_prefix
=
dataset
.
tree_prefix
in
let
tree_prefix
=
dataset
.
tree_prefix
in
let
repo_ready_data
=
Ready_dataset
.
repo
dataset
.
dataset
in
if
dataset
.
is_real
then
let
repo_raw_data
=
Raw_dataset
.
repo
~
prefix
:
model_prefix
(
Ready_dataset
.
to_raw
dataset
.
dataset
)
in
let
repo_realdata
=
Raw_dataset
.
repo_realdata
~
ali_prefix
:
model_prefix
~
tree_prefix
(
Ready_dataset
.
to_raw
dataset
.
dataset
)
in
List
.
concat
[
repo_realdata
Repo
.
shift
"minimal"
(
Repo
.
shift
(
tree_prefix
^
"_"
^
model_prefix
)
repo_raw_data
);
|>
Repo
.
shift
"Dataset"
Repo
.
shift
"debug"
repo_ready_data
;
|>
Repo
.
shift
tree_prefix
]
else
|>
Repo
.
shift
"dataset"
let
repo_ready_data
=
Ready_dataset
.
repo
dataset
.
dataset
in
|>
Repo
.
shift
model_prefix
let
repo_raw_data
=
Raw_dataset
.
repo
~
prefix
:
model_prefix
(
Ready_dataset
.
to_raw
dataset
.
dataset
)
in
|>
Repo
.
shift
"Results_per_hypothesis"
List
.
concat
[
Repo
.
shift
"minimal"
(
Repo
.
shift
(
tree_prefix
^
"_"
^
model_prefix
)
repo_raw_data
);
Repo
.
shift
"debug"
repo_ready_data
;
]
|>
Repo
.
shift
"dataset"
|>
Repo
.
shift
model_prefix
|>
Repo
.
shift
"Results_per_hypothesis"
)
)
|>
List
.
concat
|>
List
.
concat
let
add_indels_to_dataset
d
~
seed
=
let
add_indels_to_dataset
d
~
seed
=
let
p
=
0
.
33
in
let
p
=
0
.
33
in
let
model_prefix
=
sprintf
"%s_0.33_i"
d
.
model_prefix
in
let
model_prefix
=
sprintf
"%s_0.33_i"
d
.
model_prefix
in
...
...
lib/pipeline.ml
View file @
35a594d0
...
@@ -6,8 +6,13 @@ open Convergence_hypothesis
...
@@ -6,8 +6,13 @@ open Convergence_hypothesis
open
Profile
open
Profile
let
parse_input_data
~
seed
indir
=
let
parse_input_data
~
seed
indir
=
let
error_message
=
{
|
I
need
a
file
"tree.nhx"
containing
the
annotated
tree
and
a
directory
"Alignments"
containing
fasta
alignments
(
in
nt
)
with
the
format
"gene1.fna"
,
"gene2.fna"
,...
|
}
in
let
datasets
=
Array
.
to_list
@@
Sys
.
readdir
indir
in
let
datasets
=
Array
.
to_list
@@
Sys
.
readdir
indir
in
List
.
map
datasets
~
f
:
(
fun
dataset_prefix
->
List
.
map
datasets
~
f
:
(
fun
dataset_prefix
->
printf
"Real dataset:
\n\t
Tree: %s
\n
"
dataset_prefix
;
let
files
=
Array
.
to_list
@@
Sys
.
readdir
(
Filename
.
concat
indir
dataset_prefix
)
in
let
files
=
Array
.
to_list
@@
Sys
.
readdir
(
Filename
.
concat
indir
dataset_prefix
)
in
if
List
.
length
files
=
2
then
if
List
.
length
files
=
2
then
let
h_file
=
List
.
hd
files
in
let
h_file
=
List
.
hd
files
in
...
@@ -22,25 +27,34 @@ let parse_input_data ~seed indir =
...
@@ -22,25 +27,34 @@ let parse_input_data ~seed indir =
|
None
->
""
|
None
->
""
in
in
let
t_file_ext
=
Filename
.
split_extension
t_file
in
let
t_file_ext
=
Filename
.
split_extension
t_file
in
let
fna
,
input_tree
=
match
(
h_file_ext
,
t_file_ext
)
with
let
fna_dir
,
input_tree
=
match
(
h_file_ext
,
t_file_ext
,
h_file
,
t_file
)
with
|
(
_
,
Some
"fna"
)
,
(
_
,
Some
"nhx"
)
->
h_file
,
t_file
|
_
,
(
_
,
Some
"nhx"
)
,
"Alignments"
,
_
->
h_file
,
t_file
|
(
_
,
Some
"nhx"
)
,
(
_
,
Some
"fna"
)
->
t_file
,
h_file
|
(
_
,
Some
"nhx"
)
,
_
,
_
,
"Alignments"
->
t_file
,
h_file
|
_
->
failwith
({
|
Syntax
error
:
extension
errors
in
|
}
^
(
Filename
.
concat
indir
dataset_prefix
)
^
" nhx: "
^
(
h_file
)
^
" fna: "
^
(
t_file
))
|
_
,
_
,
_
,
"Alignments"
->
failwith
({
|
Syntax
error
:
Naming
errors
in
|
})
|
_
,_,_,_
->
failwith
({
|
Syntax
error
:
Naming
errors
in
|
}
^
(
Filename
.
concat
indir
dataset_prefix
)
^
"
1st file: "
^
(
h_file
)
^
"
2nd file: "
^
(
t_file
)
^
error_message
)
in
in
let
tree_prefix
=
Filename
.
chop_extension
input_tree
in
let
tree_prefix
=
Filename
.
chop_extension
input_tree
in
let
input_tree
=
Workflow
.
input
(
Filename
.
concat
indir
(
Filename
.
concat
dataset_prefix
input_tree
))
in
let
input_tree
=
Workflow
.
input
(
Filename
.
concat
indir
(
Filename
.
concat
dataset_prefix
input_tree
))
in
let
fna
=
Workflow
.
input
(
Filename
.
concat
indir
(
Filename
.
concat
dataset_prefix
fna
))
in
let
fna_l
=
Array
.
to_list
@@
Sys
.
readdir
(
Filename
.
concat
indir
(
dataset_prefix
^
"/"
^
fna_dir
))
in
let
fna_infos
=
None
in
printf
"%i files detected in %s
\n
"
(
List
.
length
fna_l
)
fna_dir
;
let
raw_dataset
=
Raw_dataset
.{
input_tree
;
fna
;
fna_infos
}
in
List
.
map
fna_l
~
f
:
(
function
fna
->
let
dataset
=
{
Dataset
.
model_prefix
=
tree_prefix
;
let
fna_prefix
=
Filename
.
chop_extension
fna
in
is_real
=
true
;
printf
"%s: %s
\n
"
fna_prefix
(
Filename
.
concat
indir
(
Filename
.
concat
(
dataset_prefix
^
"/"
^
fna_dir
)
fna
));
tree_prefix
=
dataset_prefix
;
let
fna
=
Workflow
.
input
(
Filename
.
concat
indir
(
Filename
.
concat
(
dataset_prefix
^
"/"
^
fna_dir
)
fna
))
in
dataset
=
Ready_dataset
.
of_raw
~
descr
:
(
"real_data."
^
tree_prefix
)
raw_dataset
;
let
fna_infos
=
None
in
seed
;
let
raw_dataset
=
Raw_dataset
.{
input_tree
;
fna
;
fna_infos
}
in
}
in
let
dataset
=
{
Dataset
.
model_prefix
=
fna_prefix
;
[
dataset
]
is_real
=
true
;
tree_prefix
=
tree_prefix
;
dataset
=
Ready_dataset
.
of_raw
~
descr
:
(
"real_data."
^
tree_prefix
)
raw_dataset
;
seed
;
}
in
dataset
)
else
else
failwith
({
|
More
than
2
files
in
|
}
^
(
Filename
.
concat
indir
dataset_prefix
))
failwith
({
|
More
than
2
files
in
|
}
^
(
Filename
.
concat
indir
dataset_prefix
)
^
error_message
)
)
)
|>
List
.
concat
|>
List
.
concat
...
@@ -131,7 +145,6 @@ let repo_of_detection_result res =
...
@@ -131,7 +145,6 @@ let repo_of_detection_result res =
|
_
->
[]
|
_
->
[]
]
|>
List
.
concat
]
|>
List
.
concat
|>
Repo
.
shift
det_meth_prefix
|>
Repo
.
shift
det_meth_prefix
|>
Repo
.
shift
"Detection_tools"
let
repo_of_dataset_results_l
~
dataset_results_l
=
let
repo_of_dataset_results_l
~
dataset_results_l
=
List
.
map
dataset_results_l
~
f
:
(
fun
dataset_results
->
List
.
map
dataset_results_l
~
f
:
(
fun
dataset_results
->
...
@@ -145,13 +158,35 @@ let repo_of_dataset_results_l ~dataset_results_l =
...
@@ -145,13 +158,35 @@ let repo_of_dataset_results_l ~dataset_results_l =
let
repo
=
let
repo
=
merged_results_item
::
merged_results_item
::
plot_merged_results_item
::
plot_merged_results_item
::
(
List
.
map
det_results_l
~
f
:
repo_of_detection_result
|>
List
.
concat
)
(
List
.
map
det_results_l
~
f
:
repo_of_detection_result
|>
List
.
concat
|>
Repo
.
shift
"Detection_tools"
)
in
in
repo
repo
|>
Repo
.
shift
dataset_results
.
model_prefix
|>
Repo
.
shift
dataset_results
.
model_prefix
)
)
|>
List
.
concat
|>
List
.
concat
let
repo_of_real_dataset_results_l
~
dataset_results_l
=
List
.
map
dataset_results_l
~
f
:
(
fun
dataset_results
->
let
det_results_l
=
dataset_results
.
res_by_tools
in
let
merged_results
=
dataset_results
.
merged_results
in
let
plot_merge_results
=
dataset_results
.
plot_merged_results
in
let
model_prefix
=
dataset_results
.
model_prefix
in
let
tree_prefix
=
dataset_results
.
tree_prefix
in
let
merged_results_item
=
Repo
.
item
[
tree_prefix
^
"."
^
model_prefix
^
".merged_results.tsv"
]
merged_results
in
let
plot_merged_results_item
=
Repo
.
item
[
tree_prefix
^
"."
^
model_prefix
^
".plot_merged_results.svg"
]
plot_merge_results
in
List
.
concat
[
[
merged_results_item
;
plot_merged_results_item
;
]
|>
Repo
.
shift
"Merged_Results"
;
List
.
map
det_results_l
~
f
:
repo_of_detection_result
|>
List
.
concat
|>
Repo
.
shift
model_prefix
|>
Repo
.
shift
"Results_per_Detection_tool"
;
]
|>
Repo
.
shift
dataset_results
.
tree_prefix
)
|>
List
.
concat
let
derive_from_det_meth
~
det_meth
~
(
dataset
:
Dataset
.
t
)
~
preview
=
let
derive_from_det_meth
~
det_meth
~
(
dataset
:
Dataset
.
t
)
~
preview
=
let
model_prefix
=
dataset
.
model_prefix
in
let
model_prefix
=
dataset
.
model_prefix
in
let
tree_prefix
=
dataset
.
tree_prefix
in
let
tree_prefix
=
dataset
.
tree_prefix
in
...
@@ -302,7 +337,7 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe
...
@@ -302,7 +337,7 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe
in
in
let
repo_real_trees
=
[
let
repo_real_trees
=
[
Dataset
.
repo
dataset_l
;
Dataset
.
repo
dataset_l
;
repo_of_dataset_results_l
~
dataset_results_l
;
repo_of_
real_
dataset_results_l
~
dataset_results_l
;
]
|>
List
.
concat
]
|>
List
.
concat
in
in
let
repo
=
(
Repo
.
shift
"Simulated_datasets"
sim_repo_l
#
repo
)
@
(
Repo
.
shift
"Real_datasets"
repo_real_trees
)
in
let
repo
=
(
Repo
.
shift
"Simulated_datasets"
sim_repo_l
#
repo
)
@
(
Repo
.
shift
"Real_datasets"
repo_real_trees
)
in
...
@@ -358,7 +393,7 @@ let realdata_main ~outdir ~indir ~preview ~use_diffsel ~use_c60 ?(np = 2) ?(mem
...
@@ -358,7 +393,7 @@ let realdata_main ~outdir ~indir ~preview ~use_diffsel ~use_c60 ?(np = 2) ?(mem
let
dataset_results_l
=
derive_det
~
dataset_l
~
preview
~
use_diffsel
~
use_c60
in
let
dataset_results_l
=
derive_det
~
dataset_l
~
preview
~
use_diffsel
~
use_c60
in
let
repo_real_trees
=
[
let
repo_real_trees
=
[
Dataset
.
repo
dataset_l
;
Dataset
.
repo
dataset_l
;
repo_of_dataset_results_l
~
dataset_results_l
;
repo_of_
real_
dataset_results_l
~
dataset_results_l
;
]
|>
List
.
concat
(* list of repos *)
]
|>
List
.
concat
(* list of repos *)
in
in
let
repo
=
Repo
.
shift
"Real_datasets"
repo_real_trees
in
let
repo
=
Repo
.
shift
"Real_datasets"
repo_real_trees
in
...
...
lib/raw_dataset.ml
View file @
35a594d0
...
@@ -13,3 +13,13 @@ let repo ~prefix rd =
...
@@ -13,3 +13,13 @@ let repo ~prefix rd =
item
[
prefix
^
".nhx"
]
rd
.
input_tree
;
item
[
prefix
^
".nhx"
]
rd
.
input_tree
;
item
[
prefix
^
".fna"
]
rd
.
fna
;
item
[
prefix
^
".fna"
]
rd
.
fna
;
]
]
let
repo_realdata
~
tree_prefix
~
ali_prefix
rd
=
List
.
concat
[
Repo
.[
item
[
tree_prefix
^
".nhx"
]
rd
.
input_tree
;
];
Repo
.[
item
[
ali_prefix
^
".fna"
]
rd
.
fna
;
]
|>
Repo
.
shift
"Alignments"
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment