Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
35a594d0
Commit
35a594d0
authored
Jan 15, 2019
by
Carine Rey
Committed by
Philippe Veber
Sep 20, 2019
Browse files
change realdata inputdir tree
parent
cb149119
Changes
7
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
1993 additions
and
28 deletions
+1993
-28
Makefile
Makefile
+11
-0
example/real_data/besnard2009/Alignments/cyp_coding.fna
example/real_data/besnard2009/Alignments/cyp_coding.fna
+0
-0
example/real_data/besnard2009/Alignments/cyp_coding_bis.fna
example/real_data/besnard2009/Alignments/cyp_coding_bis.fna
+1896
-0
example/real_data/besnard2009/besnard2009.nhx
example/real_data/besnard2009/besnard2009.nhx
+0
-0
lib/dataset.ml
lib/dataset.ml
+22
-9
lib/pipeline.ml
lib/pipeline.ml
+54
-19
lib/raw_dataset.ml
lib/raw_dataset.ml
+10
-0
No files found.
Makefile
View file @
35a594d0
...
...
@@ -39,6 +39,16 @@ test:
mv
dag.dot dagtest_val.dot
&&
\
dot
-Tsvg
dagtest_val.dot
-o
dagtest_val.svg
# -----------------------------------------------------------------------
# Test
# -----------------------------------------------------------------------
.PHONY
:
realdata_test
realdata_test
:
cd
example
&&
\
reviewphiltrans realdata
--outdir
outdir_realdata_test
--indir
real_data
--np
4
--seed
4256073781403810077
# -----------------------------------------------------------------------
# big experiments
# -----------------------------------------------------------------------
...
...
@@ -61,6 +71,7 @@ clean:
clean-test
:
rm
-rf
example/_bistro
rm
-rf
example/outdir_test
rm
-rf
example/outdir_realdata_test
rm
-rf
example/report.log
rm
-rf
example/dot.dag
...
...
example/real_data/besnard2009/cyp_coding.fna
→
example/real_data/besnard2009/
Alignments/
cyp_coding.fna
View file @
35a594d0
File moved
example/real_data/besnard2009/Alignments/cyp_coding_bis.fna
0 → 100644
View file @
35a594d0
This diff is collapsed.
Click to expand it.
example/real_data/besnard2009/
cyp_coding
.nhx
→
example/real_data/besnard2009/
besnard2009
.nhx
View file @
35a594d0
File moved
lib/dataset.ml
View file @
35a594d0
...
...
@@ -9,22 +9,35 @@ type t = {
seed
:
int
;
}
let
repo
dataset_l
=
List
.
map
dataset_l
~
f
:
(
fun
dataset
->
let
model_prefix
=
dataset
.
model_prefix
in
let
tree_prefix
=
dataset
.
tree_prefix
in
let
repo_ready_data
=
Ready_dataset
.
repo
dataset
.
dataset
in
let
repo_raw_data
=
Raw_dataset
.
repo
~
prefix
:
model_prefix
(
Ready_dataset
.
to_raw
dataset
.
dataset
)
in
List
.
concat
[
Repo
.
shift
"minimal"
(
Repo
.
shift
(
tree_prefix
^
"_"
^
model_prefix
)
repo_raw_data
);
Repo
.
shift
"debug"
repo_ready_data
;
]
|>
Repo
.
shift
"dataset"
|>
Repo
.
shift
model_prefix
|>
Repo
.
shift
"Results_per_hypothesis"
if
dataset
.
is_real
then
let
repo_realdata
=
Raw_dataset
.
repo_realdata
~
ali_prefix
:
model_prefix
~
tree_prefix
(
Ready_dataset
.
to_raw
dataset
.
dataset
)
in
repo_realdata
|>
Repo
.
shift
"Dataset"
|>
Repo
.
shift
tree_prefix
else
let
repo_ready_data
=
Ready_dataset
.
repo
dataset
.
dataset
in
let
repo_raw_data
=
Raw_dataset
.
repo
~
prefix
:
model_prefix
(
Ready_dataset
.
to_raw
dataset
.
dataset
)
in
List
.
concat
[
Repo
.
shift
"minimal"
(
Repo
.
shift
(
tree_prefix
^
"_"
^
model_prefix
)
repo_raw_data
);
Repo
.
shift
"debug"
repo_ready_data
;
]
|>
Repo
.
shift
"dataset"
|>
Repo
.
shift
model_prefix
|>
Repo
.
shift
"Results_per_hypothesis"
)
|>
List
.
concat
let
add_indels_to_dataset
d
~
seed
=
let
p
=
0
.
33
in
let
model_prefix
=
sprintf
"%s_0.33_i"
d
.
model_prefix
in
...
...
lib/pipeline.ml
View file @
35a594d0
...
...
@@ -6,8 +6,13 @@ open Convergence_hypothesis
open
Profile
let
parse_input_data
~
seed
indir
=
let
error_message
=
{
|
I
need
a
file
"tree.nhx"
containing
the
annotated
tree
and
a
directory
"Alignments"
containing
fasta
alignments
(
in
nt
)
with
the
format
"gene1.fna"
,
"gene2.fna"
,...
|
}
in
let
datasets
=
Array
.
to_list
@@
Sys
.
readdir
indir
in
List
.
map
datasets
~
f
:
(
fun
dataset_prefix
->
printf
"Real dataset:
\n\t
Tree: %s
\n
"
dataset_prefix
;
let
files
=
Array
.
to_list
@@
Sys
.
readdir
(
Filename
.
concat
indir
dataset_prefix
)
in
if
List
.
length
files
=
2
then
let
h_file
=
List
.
hd
files
in
...
...
@@ -22,25 +27,34 @@ let parse_input_data ~seed indir =
|
None
->
""
in
let
t_file_ext
=
Filename
.
split_extension
t_file
in
let
fna
,
input_tree
=
match
(
h_file_ext
,
t_file_ext
)
with
|
(
_
,
Some
"fna"
)
,
(
_
,
Some
"nhx"
)
->
h_file
,
t_file
|
(
_
,
Some
"nhx"
)
,
(
_
,
Some
"fna"
)
->
t_file
,
h_file
|
_
->
failwith
({
|
Syntax
error
:
extension
errors
in
|
}
^
(
Filename
.
concat
indir
dataset_prefix
)
^
" nhx: "
^
(
h_file
)
^
" fna: "
^
(
t_file
))
let
fna_dir
,
input_tree
=
match
(
h_file_ext
,
t_file_ext
,
h_file
,
t_file
)
with
|
_
,
(
_
,
Some
"nhx"
)
,
"Alignments"
,
_
->
h_file
,
t_file
|
(
_
,
Some
"nhx"
)
,
_
,
_
,
"Alignments"
->
t_file
,
h_file
|
_
,
_
,
_
,
"Alignments"
->
failwith
({
|
Syntax
error
:
Naming
errors
in
|
})
|
_
,_,_,_
->
failwith
({
|
Syntax
error
:
Naming
errors
in
|
}
^
(
Filename
.
concat
indir
dataset_prefix
)
^
"
1st file: "
^
(
h_file
)
^
"
2nd file: "
^
(
t_file
)
^
error_message
)
in
let
tree_prefix
=
Filename
.
chop_extension
input_tree
in
let
input_tree
=
Workflow
.
input
(
Filename
.
concat
indir
(
Filename
.
concat
dataset_prefix
input_tree
))
in
let
fna
=
Workflow
.
input
(
Filename
.
concat
indir
(
Filename
.
concat
dataset_prefix
fna
))
in
let
fna_infos
=
None
in
let
raw_dataset
=
Raw_dataset
.{
input_tree
;
fna
;
fna_infos
}
in
let
dataset
=
{
Dataset
.
model_prefix
=
tree_prefix
;
is_real
=
true
;
tree_prefix
=
dataset_prefix
;
dataset
=
Ready_dataset
.
of_raw
~
descr
:
(
"real_data."
^
tree_prefix
)
raw_dataset
;
seed
;
}
in
[
dataset
]
let
fna_l
=
Array
.
to_list
@@
Sys
.
readdir
(
Filename
.
concat
indir
(
dataset_prefix
^
"/"
^
fna_dir
))
in
printf
"%i files detected in %s
\n
"
(
List
.
length
fna_l
)
fna_dir
;
List
.
map
fna_l
~
f
:
(
function
fna
->
let
fna_prefix
=
Filename
.
chop_extension
fna
in
printf
"%s: %s
\n
"
fna_prefix
(
Filename
.
concat
indir
(
Filename
.
concat
(
dataset_prefix
^
"/"
^
fna_dir
)
fna
));
let
fna
=
Workflow
.
input
(
Filename
.
concat
indir
(
Filename
.
concat
(
dataset_prefix
^
"/"
^
fna_dir
)
fna
))
in
let
fna_infos
=
None
in
let
raw_dataset
=
Raw_dataset
.{
input_tree
;
fna
;
fna_infos
}
in
let
dataset
=
{
Dataset
.
model_prefix
=
fna_prefix
;
is_real
=
true
;
tree_prefix
=
tree_prefix
;
dataset
=
Ready_dataset
.
of_raw
~
descr
:
(
"real_data."
^
tree_prefix
)
raw_dataset
;
seed
;
}
in
dataset
)
else
failwith
({
|
More
than
2
files
in
|
}
^
(
Filename
.
concat
indir
dataset_prefix
))
failwith
({
|
More
than
2
files
in
|
}
^
(
Filename
.
concat
indir
dataset_prefix
)
^
error_message
)
)
|>
List
.
concat
...
...
@@ -131,7 +145,6 @@ let repo_of_detection_result res =
|
_
->
[]
]
|>
List
.
concat
|>
Repo
.
shift
det_meth_prefix
|>
Repo
.
shift
"Detection_tools"
let
repo_of_dataset_results_l
~
dataset_results_l
=
List
.
map
dataset_results_l
~
f
:
(
fun
dataset_results
->
...
...
@@ -145,13 +158,35 @@ let repo_of_dataset_results_l ~dataset_results_l =
let
repo
=
merged_results_item
::
plot_merged_results_item
::
(
List
.
map
det_results_l
~
f
:
repo_of_detection_result
|>
List
.
concat
)
(
List
.
map
det_results_l
~
f
:
repo_of_detection_result
|>
List
.
concat
|>
Repo
.
shift
"Detection_tools"
)
in
repo
|>
Repo
.
shift
dataset_results
.
model_prefix
)
|>
List
.
concat
let
repo_of_real_dataset_results_l
~
dataset_results_l
=
List
.
map
dataset_results_l
~
f
:
(
fun
dataset_results
->
let
det_results_l
=
dataset_results
.
res_by_tools
in
let
merged_results
=
dataset_results
.
merged_results
in
let
plot_merge_results
=
dataset_results
.
plot_merged_results
in
let
model_prefix
=
dataset_results
.
model_prefix
in
let
tree_prefix
=
dataset_results
.
tree_prefix
in
let
merged_results_item
=
Repo
.
item
[
tree_prefix
^
"."
^
model_prefix
^
".merged_results.tsv"
]
merged_results
in
let
plot_merged_results_item
=
Repo
.
item
[
tree_prefix
^
"."
^
model_prefix
^
".plot_merged_results.svg"
]
plot_merge_results
in
List
.
concat
[
[
merged_results_item
;
plot_merged_results_item
;
]
|>
Repo
.
shift
"Merged_Results"
;
List
.
map
det_results_l
~
f
:
repo_of_detection_result
|>
List
.
concat
|>
Repo
.
shift
model_prefix
|>
Repo
.
shift
"Results_per_Detection_tool"
;
]
|>
Repo
.
shift
dataset_results
.
tree_prefix
)
|>
List
.
concat
let
derive_from_det_meth
~
det_meth
~
(
dataset
:
Dataset
.
t
)
~
preview
=
let
model_prefix
=
dataset
.
model_prefix
in
let
tree_prefix
=
dataset
.
tree_prefix
in
...
...
@@ -302,7 +337,7 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe
in
let
repo_real_trees
=
[
Dataset
.
repo
dataset_l
;
repo_of_dataset_results_l
~
dataset_results_l
;
repo_of_
real_
dataset_results_l
~
dataset_results_l
;
]
|>
List
.
concat
in
let
repo
=
(
Repo
.
shift
"Simulated_datasets"
sim_repo_l
#
repo
)
@
(
Repo
.
shift
"Real_datasets"
repo_real_trees
)
in
...
...
@@ -358,7 +393,7 @@ let realdata_main ~outdir ~indir ~preview ~use_diffsel ~use_c60 ?(np = 2) ?(mem
let
dataset_results_l
=
derive_det
~
dataset_l
~
preview
~
use_diffsel
~
use_c60
in
let
repo_real_trees
=
[
Dataset
.
repo
dataset_l
;
repo_of_dataset_results_l
~
dataset_results_l
;
repo_of_
real_
dataset_results_l
~
dataset_results_l
;
]
|>
List
.
concat
(* list of repos *)
in
let
repo
=
Repo
.
shift
"Real_datasets"
repo_real_trees
in
...
...
lib/raw_dataset.ml
View file @
35a594d0
...
...
@@ -13,3 +13,13 @@ let repo ~prefix rd =
item
[
prefix
^
".nhx"
]
rd
.
input_tree
;
item
[
prefix
^
".fna"
]
rd
.
fna
;
]
let
repo_realdata
~
tree_prefix
~
ali_prefix
rd
=
List
.
concat
[
Repo
.[
item
[
tree_prefix
^
".nhx"
]
rd
.
input_tree
;
];
Repo
.[
item
[
ali_prefix
^
".fna"
]
rd
.
fna
;
]
|>
Repo
.
shift
"Alignments"
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment