Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Gitlab is now running v13.9.0 - More info ->
here
<-
Open sidebar
VEBER Philippe
codepi
Commits
12048c65
Commit
12048c65
authored
Jan 14, 2021
by
Philippe Veber
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'new-cli'
parents
4d9215f4
6a7e6b9a
Changes
15
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
745 additions
and
148 deletions
+745
-148
.gitignore
.gitignore
+0
-1
app/codepi_app.ml
app/codepi_app.ml
+2
-110
lib/dataset.ml
lib/dataset.ml
+75
-0
lib/detection_pipeline.ml
lib/detection_pipeline.ml
+108
-3
lib/detection_pipeline.mli
lib/detection_pipeline.mli
+11
-0
lib/orthomam.ml
lib/orthomam.ml
+8
-24
lib/run.ml
lib/run.ml
+231
-0
lib/run.mli
lib/run.mli
+3
-0
lib/simulation_pipeline.ml
lib/simulation_pipeline.ml
+0
-7
lib/tk/candidate_site.ml
lib/tk/candidate_site.ml
+2
-2
lib/tk/dataframe.ml
lib/tk/dataframe.ml
+56
-0
lib/tk/dataframe.mli
lib/tk/dataframe.mli
+10
-0
lib/tk/dune
lib/tk/dune
+1
-1
lib/tk/note.ml
lib/tk/note.ml
+206
-0
lib/tk/note.mli
lib/tk/note.mli
+32
-0
No files found.
.gitignore
View file @
12048c65
...
...
@@ -2,7 +2,6 @@
_build
_bistro
outdir*
run*
reviewphiltrans.install
**/.merlin
example/outdir
...
...
app/codepi_app.ml
View file @
12048c65
open
Core
open
Codepi
open
Bistro_utils
let
first_nhx_in_dir
dir
=
Sys
.
readdir
dir
|>
Array
.
find_exn
~
f
:
(
String
.
is_suffix
~
suffix
:
".nhx"
)
let
sw
b
x
=
if
b
then
Some
x
else
None
let
realdata_main
~
use_diffsel
~
use_pcoc
~
use_pcoc_c60
~
use_pcoc_gamma
~
use_pcoc_v2
~
use_pcoc_pcp
~
use_tdg09
~
use_topological
~
use_identical
~
no_use_multinomial
~
calc_dnds
~
calc_gene_trees
~
indir
~
outdir
~
np
~
mem
()
=
let
loggers
=
[
Console_logger
.
create
()
;
]
in
let
mem
=
Option
.
map
mem
~
f
:
(
fun
i
->
`GB
i
)
in
let
rd
=
Real_dataset
.
make
~
alignment_dir_path
:
(
Filename
.
concat
indir
"Alignments"
)
~
tree_path
:
(
Filename
.
concat
indir
(
first_nhx_in_dir
indir
))
in
let
use_multinomial
=
not
no_use_multinomial
in
let
meths
=
List
.
filter_opt
[
sw
use_diffsel
`Diffsel
;
sw
use_pcoc
`Pcoc
;
sw
use_pcoc_c60
`Pcoc_C60
;
sw
use_pcoc_gamma
`Pcoc_gamma
;
sw
use_pcoc_v2
`PCOC_v2
;
sw
use_pcoc_pcp
`PCOC_pcp
;
sw
use_tdg09
`Tdg09
;
sw
use_topological
`Topological
;
sw
use_identical
`Identical
;
sw
use_multinomial
`Multinomial
;
]
in
let
pal
=
List
.
filter_opt
[
sw
calc_dnds
`DnDs
;
sw
calc_gene_trees
`GeneTree
;
]
in
List
.
concat
[
Repo
.
shift
"Merged_results"
(
Real_dataset
.
repo
meths
rd
)
;
Repo
.
shift
"PreParsed_Dataset"
(
Real_dataset
.
repo_parsed_rd
pal
rd
);
]
|>
Bistro_utils
.
Repo
.
build_main
~
outdir
~
loggers
?
np
?
mem
let
realdata_command
=
let
open
Command
.
Let_syntax
in
Command
.
basic
~
summary
:
"Run pipeline on real data"
[
%
map_open
let
outdir
=
flag
"--outdir"
(
required
string
)
~
doc
:
"PATH Output directory"
and
indir
=
flag
"--indir"
(
required
string
)
~
doc
:
"PATH Input directory"
and
use_diffsel
=
flag
"--diffsel"
no_arg
~
doc
:
" use the diffsel method (very slow)."
and
use_pcoc
=
flag
"--pcoc"
no_arg
~
doc
:
" use the pcoc method (slow)."
and
use_pcoc_c60
=
flag
"--pcoc-c60"
no_arg
~
doc
:
" use the pcoc method with c60 profils (very_slow)."
and
use_pcoc_gamma
=
flag
"--pcoc-gamma"
no_arg
~
doc
:
" use the pcoc method with the gamma option (very_slow)."
and
use_pcoc_v2
=
flag
"--pcoc-v2"
no_arg
~
doc
:
" use the pcoc v2 method with the C10 profiles (slow)."
and
use_pcoc_pcp
=
flag
"--pcoc-pcp"
no_arg
~
doc
:
" use the pcoc v2 method with the physico-chemical profiles (slow)."
and
use_tdg09
=
flag
"--tdg09"
no_arg
~
doc
:
" use the tdg09 method (slow)."
and
use_topological
=
flag
"--topological"
no_arg
~
doc
:
" use the topological method (fast)."
and
use_identical
=
flag
"--identical"
no_arg
~
doc
:
" use the identical method (fast)."
and
no_use_multinomial
=
flag
"--no-multinomial"
no_arg
~
doc
:
" not use the multinomial method (very fast so by default)."
and
calc_dnds
=
flag
"--dnds"
no_arg
~
doc
:
" calculate dn ds dnds trees (slow)."
and
calc_gene_trees
=
flag
"--gt"
no_arg
~
doc
:
" calculate gene trees (slow)."
and
np
=
flag
"--np"
(
optional
int
)
~
doc
:
"INT Number of available processors"
and
mem
=
flag
"--mem"
(
optional
int
)
~
doc
:
"INT Available memory (in GB)"
in
realdata_main
~
use_diffsel
~
use_pcoc
~
use_pcoc_c60
~
use_pcoc_gamma
~
use_pcoc_v2
~
use_pcoc_pcp
~
use_tdg09
~
use_topological
~
use_identical
~
no_use_multinomial
~
calc_dnds
~
calc_gene_trees
~
indir
~
outdir
~
np
~
mem
]
let
()
=
Command
.
group
~
summary
:
"
Reviewphiltrans
"
[
Command
.
group
~
summary
:
"
codepi
"
[
"validation"
,
Pipeline
.
validation_command
;
"r
ealdata"
,
realdata_
command
;
"r
un"
,
Run
.
command
;
"alistats"
,
Alistats
.
command
;
]
|>
Command
.
run
lib/dataset.ml
View file @
12048c65
...
...
@@ -49,3 +49,78 @@ let add_indels_to_dataset d ~seed =
let
dataset
=
Ready_dataset
.
add_indels_to_ready_dataset
~
p
~
seed
:
indel_seed
d
.
dataset
in
let
calc_dnds
=
d
.
calc_dnds
in
{
model_prefix
;
tree_prefix
;
is_real
;
calc_dnds
;
dataset
;
seed
=
new_seed
}
open
Bistro
open
File_formats
module
New_API
=
struct
type
t
=
{
tree
:
nhx
file
;
nucleotide_alignments
:
(
string
*
nucleotide_fasta
file
)
list
;
convergent_species
:
string
list
workflow
;
}
let
clip_tree_on_alignment
(
tree
:
nhx
file
)
(
ali
:
nucleotide_fasta
file
)
=
let
f
=
fun
%
workflow
dest
->
let
open
Phylogenetics
in
let
tree
=
Newick
.
from_file
[
%
path
tree
]
in
let
_
,
ali
=
Biotk
.
Fasta
.
from_file
[
%
path
ali
]
|>
Result
.
ok_or_failwith
in
let
ali_species
=
List
.
map
ali
~
f
:
(
fun
it
->
it
.
description
)
in
let
clipped_tree
=
Newick
.
map_inner_tree
tree
~
f
:
(
fun
tree
->
match
Tree
.
leafset_generated_subtree
tree
(
fun
bi
->
bi
.
Newick
.
name
)
ali_species
with
|
None
->
failwith
"Tree has no leaf in alignment"
|
Some
filtered_tree
->
filtered_tree
)
in
Newick
.
to_file
clipped_tree
dest
in
Workflow
.
path_plugin
~
descr
:
"orthomam.clip_tree_on_alignment"
f
let
annotate_convergent_species_in_tree
(
tree
:
newick
file
)
species
:
nhx
file
=
let
f
=
fun
%
workflow
dest
->
let
open
Phylogenetics
in
let
species
=
[
%
eval
species
]
and
omm_tree
=
[
%
path
tree
]
in
let
ensembl_tree
=
Newick
.
from_file
omm_tree
in
let
tagged_tree
=
Newick
.
map_inner_tree
ensembl_tree
~
f
:
(
fun
t
->
Codepitk
.
Convergence_tree
.
infer_binary_condition_on_branches
~
convergent_leaves
:
(
String
.
Set
.
of_list
species
)
t
)
in
Newick
.
to_file
tagged_tree
dest
in
Workflow
.
path_plugin
~
version
:
3
~
descr
:
"dataset.annotate_convergent_species_in_tree"
f
let
make
~
tree
~
nucleotide_alignments
~
convergent_species
=
let
tree
=
annotate_convergent_species_in_tree
tree
convergent_species
in
{
tree
;
nucleotide_alignments
;
convergent_species
}
module
Query
=
struct
type
dataset
=
t
type
t
=
{
dataset
:
dataset
;
alignment_descr
:
string
;
alignment
:
nucleotide_fasta
file
;
}
let
nucleotide_alignment
q
=
q
.
alignment
let
tree
~
branch_length_unit
:_
q
=
clip_tree_on_alignment
q
.
dataset
.
tree
q
.
alignment
end
let
queries
dataset
=
List
.
map
dataset
.
nucleotide_alignments
~
f
:
(
fun
(
alignment_descr
,
alignment
)
->
{
Query
.
dataset
;
alignment_descr
;
alignment
}
)
end
lib/detection_pipeline.ml
View file @
12048c65
...
...
@@ -69,6 +69,17 @@ module type S = sig
val
view_site
:
query
->
convergent_species
:
string
list
->
site_pos
:
int
->
pdf
file
val
ranking
:
?
length
:
int
->
query_descr
:
(
query
->
string
option
)
->
meth
:
(
query
->
cpt
file
)
->
column_label
:
string
->
convergent_species
:
string
list
workflow
->
query
list
->
Codepitk
.
Candidate_site
.
t
list
workflow
val
alignment_plot
:
query
->
svg
file
end
module
Make
(
Q
:
Query
)
=
struct
...
...
@@ -240,9 +251,103 @@ module Make (Q : Query) = struct
Biotk
.
Fasta
.
from_file
alignment_path
|>
Base
.
Result
.
ok_or_failwith
|>
snd
in
CS
.
make
~
condition
tree
alignment
site_pos
|>
CS
.
draw
|>
Biotk_croquis
.
Croquis
.
Layout
.
simple
|>
Base
.
Fn
.
flip
Biotk_croquis
.
Croquis
.
Layout
.
render_pdf
dest
let
layout
=
CS
.
make
~
condition
tree
alignment
site_pos
|>
CS
.
draw
|>
Biotk_croquis
.
Croquis
.
Layout
.
simple
in
Biotk_croquis
.
Croquis
.
Layout
.
render
`pdf
layout
(
`File
dest
)
in
Workflow
.
path_plugin
~
descr
:
"detection_pipeline.view_site"
f
let
ranking
?
(
length
=
100
)
~
query_descr
~
meth
~
column_label
~
convergent_species
queries
=
let
open
Core_kernel
in
let
open
Codepitk
in
let
query_ids
=
Array
.
of_list
(
List
.
map
queries
~
f
:
query_descr
)
in
let
trees
=
List
.
map
queries
~
f
:
(
tree
~
branch_length_unit
:
`Amino_acid
)
in
let
alignments
=
List
.
map
queries
~
f
:
amino_acid_alignment
in
let
result_files
=
List
.
map
queries
~
f
:
meth
in
let
f
=
fun
%
workflow
()
->
let
ranking_length
=
[
%
param
length
]
in
let
column_label
=
[
%
param
column_label
]
in
let
query_ids
=
[
%
param
query_ids
]
in
let
trees
=
[
%
eval
Workflow
.
path_list
trees
]
in
let
alignments
=
[
%
eval
Workflow
.
path_list
alignments
]
in
let
result_files
=
[
%
eval
Workflow
.
path_list
result_files
]
in
let
convergent_species
=
String
.
Set
.
of_list
[
%
eval
convergent_species
]
in
let
module
Result_table
=
Codepitk
.
Result_table
in
let
lazy_load
xs
~
f
=
List
.
map
xs
~
f
:
(
fun
x
->
lazy
(
f
x
))
|>
Array
.
of_list
in
let
results
=
lazy_load
result_files
~
f
:
Result_table
.
of_file
in
let
trees
=
lazy_load
trees
~
f
:
Phylogenetics
.
Newick
.
from_file
in
let
alignments
=
lazy_load
alignments
~
f
:
Biotk
.
Fasta
.
from_file
in
let
module
S
=
struct
type
t
=
{
alignment
:
int
;
pos
:
int
;
score
:
float
;
}
let
leq
s1
s2
=
Float
.(
s1
.
score
<=
s2
.
score
)
end
in
let
module
H
=
CCHeap
.
Make
(
S
)
in
let
add_result_line
acc
~
alignment
~
pos
~
score
=
match
score
with
|
None
->
acc
|
Some
score
->
let
candidate
=
{
S
.
alignment
;
pos
;
score
}
in
let
acc
=
H
.
add
acc
candidate
in
if
H
.
size
acc
>
ranking_length
then
H
.
take_exn
acc
|>
fst
else
acc
in
let
add_result_file
alignment
acc
(
rf
:
Result_table
.
t
Lazy
.
t
)
=
let
col
=
List
.
Assoc
.
find_exn
(
Lazy
.
force
rf
)
.
scores_per_meth
column_label
~
equal
:
String
.
equal
in
Array
.
foldi
col
~
init
:
acc
~
f
:
(
fun
pos
acc
score
->
add_result_line
acc
~
alignment
~
pos
~
score
)
in
let
top_sites
=
Array
.
foldi
results
~
init
:
H
.
empty
~
f
:
add_result_file
|>
H
.
to_list_sorted
|>
List
.
rev
in
let
annotate_site
(
s
:
S
.
t
)
=
let
id
=
query_ids
.
(
s
.
alignment
)
in
let
tree
=
Lazy
.
force
trees
.
(
s
.
alignment
)
in
let
alignment
=
match
Lazy
.
force
alignments
.
(
s
.
alignment
)
with
|
Ok
(
_
,
fa
)
->
fa
|
Error
_
->
assert
false
in
let
leaf_values
=
List
.
map
alignment
~
f
:
(
fun
it
->
it
.
description
,
it
.
sequence
.
[
s
.
pos
]
)
in
let
contents
=
let
open
Phylogenetics
in
Newick
.
with_inner_tree
tree
~
f
:
(
fun
t
->
Tree
.
map
t
~
node
:
(
fun
_
->
()
)
~
branch
:
(
fun
bi
->
Option
.
value_exn
bi
.
length
)
~
leaf
:
(
fun
ni
->
let
species
=
Option
.
value_exn
ni
.
name
in
let
state
=
List
.
Assoc
.
find_exn
leaf_values
species
~
equal
:
String
.
equal
in
let
condition
=
if
String
.
Set
.
mem
convergent_species
species
then
`Convergent
else
`Ancestral
in
{
Codepitk
.
Candidate_site
.
species
;
state
;
condition
}
)
)
in
{
Candidate_site
.
alignment_id
=
id
;
contents
;
pos
=
Some
s
.
pos
;
score
=
Some
s
.
score
}
in
List
.
map
top_sites
~
f
:
annotate_site
in
Workflow
.
plugin
~
descr
:
"detection_pipeline.ranking"
f
let
alignment_plot
d
=
Convergence_detection
.
plot_convergent_sites
~
tree
:
(
Q
.
tree
~
branch_length_unit
:
`Amino_acid
d
)
~
alignment
:
(
amino_acid_alignment
d
)
~
detection_results
:
(
multinomial_asymptotic_lrt
d
)
()
end
lib/detection_pipeline.mli
View file @
12048c65
...
...
@@ -69,6 +69,17 @@ module type S = sig
val
view_site
:
query
->
convergent_species
:
string
list
->
site_pos
:
int
->
pdf
file
val
ranking
:
?
length
:
int
->
query_descr
:
(
query
->
string
option
)
->
meth
:
(
query
->
cpt
file
)
->
column_label
:
string
->
convergent_species
:
string
list
workflow
->
query
list
->
Codepitk
.
Candidate_site
.
t
list
workflow
val
alignment_plot
:
query
->
svg
file
end
module
Make
(
Q
:
Query
)
:
S
with
type
query
:=
Q
.
t
lib/orthomam.ml
View file @
12048c65
...
...
@@ -78,24 +78,6 @@ let clip_tree_on_alignment tree ali =
let
omm_tree_of_db
db
=
Workflow
.
input
(
Orthomam_db
.
tree
db
)
let
annotate_convergent_species_in_tree
(
tree
:
newick
file
)
species
:
nhx
file
=
let
f
=
fun
%
workflow
dest
->
let
open
Phylogenetics
in
let
species
=
[
%
eval
species
]
and
omm_tree
=
[
%
path
tree
]
in
let
ensembl_tree
=
Newick
.
from_file
omm_tree
in
let
tagged_tree
=
Newick
.
map_inner_tree
ensembl_tree
~
f
:
(
fun
t
->
Codepitk
.
Convergence_tree
.
infer_binary_condition_on_branches
~
convergent_leaves
:
(
String
.
Set
.
of_list
species
)
t
)
in
Newick
.
to_file
tagged_tree
dest
in
Workflow
.
path_plugin
~
version
:
3
~
descr
:
"tree_of_convergent_species"
f
let
compare_tree_branch_lengths
t1
t2
=
let
f
=
fun
%
workflow
dest
->
let
open
Phylogenetics
in
...
...
@@ -393,7 +375,7 @@ module Q = struct
let
tree
~
branch_length_unit
q
=
clip_tree_on_alignment
(
annotate_convergent_species_in_tree
(
Dataset
.
New_API
.
annotate_convergent_species_in_tree
(
omm_tree_with_branch_lengths
~
branch_length_unit
q
.
db
)
q
.
convergent_species
)
(
alignment
q
)
...
...
@@ -405,7 +387,7 @@ end
include
Q
let
tree_of_db
db
~
branch_length_unit
~
convergent_species
=
annotate_convergent_species_in_tree
Dataset
.
New_API
.
annotate_convergent_species_in_tree
(
omm_tree_with_branch_lengths
~
branch_length_unit
db
)
(
Workflow
.
data
convergent_species
)
...
...
@@ -484,9 +466,11 @@ let convergence_species_tree_pdf ~convergent_species db =
Codepitk
.
Convergence_tree
.
infer_binary_condition_on_branches
t
~
convergent_leaves
:
convergent_species
)
in
render_tree
tree_or_branch
|>
Biotk_croquis
.
Croquis
.
Layout
.
simple
|>
Fn
.
flip
Biotk_croquis
.
Croquis
.
Layout
.
render_pdf
dest
let
layout
=
render_tree
tree_or_branch
|>
Biotk_croquis
.
Croquis
.
Layout
.
simple
in
Biotk_croquis
.
Croquis
.
Layout
.
render
`pdf
layout
(
`File
dest
)
in
Workflow
.
path_plugin
~
descr
:
"orthomam.convergence_species_tree_pdf"
f
...
...
@@ -607,7 +591,7 @@ let draw_site q pos =
|>
Candidate_site
.
draw
|>
Croquis
.
Layout
.
simple
in
Croquis
.
Layout
.
render
_
pdf
picture
dest
Croquis
.
Layout
.
render
`
pdf
picture
(
`File
dest
)
|
Error
msg
->
failwith
msg
in
Workflow
.
path_plugin
~
descr
:
"orthomam."
f
lib/run.ml
0 → 100644
View file @
12048c65
open
Core
open
Bistro
open
Bistro_utils
open
File_formats
module
Dataset
=
Dataset
.
New_API
module
Pipeline
=
Detection_pipeline
.
Make
(
Dataset
.
Query
)
type
detection_method
=
Detection_method
of
{
id
:
string
;
name
:
string
;
cpt_column_label
:
string
;
f
:
Dataset
.
Query
.
t
->
cpt
file
;
}
let
multinomial_method
=
Detection_method
{
id
=
"multinomial"
;
name
=
"Multinomial"
;
cpt_column_label
=
"Multinomial_1mp"
;
f
=
Pipeline
.
multinomial_asymptotic_lrt
;
}
let
tdg09_method
=
Detection_method
{
id
=
"tdg09"
;
name
=
"TDG09"
;
cpt_column_label
=
"Tdg09_1MinusFDR"
;
f
=
Pipeline
.
tdg09
;
}
let
pcoc_method
=
Detection_method
{
id
=
"pcoc"
;
name
=
"PCOC"
;
cpt_column_label
=
"fdsf"
;
f
=
Pipeline
.
pcoc
;
}
let
known_methods
=
[
"multinomial"
,
multinomial_method
;
"tdg09"
,
tdg09_method
;
"pcoc"
,
pcoc_method
;
]
let
parse_methods
methods
=
List
.
map
methods
~
f
:
(
fun
m
->
match
List
.
Assoc
.
find
known_methods
~
equal
:
String
.
equal
m
with
|
Some
dm
->
Ok
dm
|
None
->
Rresult
.
R
.
error_msgf
"Unknown method %s, known methods are %s"
m
(
List
.
map
known_methods
~
f
:
fst
|>
String
.
concat
~
sep
:
","
)
)
|>
Result
.
all
let
candidate_site_report
dataset
(
Detection_method
meth
)
=
let
all_queries
=
Dataset
.
queries
dataset
in
let
sites
=
Pipeline
.
ranking
~
query_descr
:
(
fun
q
->
Some
q
.
Dataset
.
Query
.
alignment_descr
)
~
meth
:
meth
.
f
~
column_label
:
meth
.
cpt_column_label
~
convergent_species
:
dataset
.
Dataset
.
convergent_species
all_queries
in
let
f
=
fun
%
workflow
dest
->
let
module
N
=
Codepitk
.
Note
in
let
module
DF
=
Codepitk
.
Dataframe
in
let
module
CS
=
Codepitk
.
Candidate_site
in
let
sites
=
Array
.
of_list
[
%
eval
sites
]
in
let
note_to_html
note
path
=
N
.
to_html
note
path
|>
Rresult
.
R
.
failwith_error_msg
in
let
site_page_path
i
=
sprintf
"%s_%04d.html"
meth
.
id
i
in
let
site_page
i
s
=
let
path
=
Filename
.
concat
dest
(
site_page_path
i
)
in
let
title
=
sprintf
"%s candidate site #%d"
meth
.
name
i
in
let
alignment_id
=
Option
.
value
~
default
:
"NA"
s
.
CS
.
alignment_id
in
let
position
=
Option
.
value_map
~
f
:
Int
.
to_string
~
default
:
"NA"
s
.
CS
.
pos
in
let
score
=
Option
.
value_map
~
f
:
Float
.
to_string
~
default
:
"NA"
s
.
CS
.
score
in
let
summary
=
{
%
string
|
-
**
Alignment
**:
%
{
alignment_id
}
-
**
Position
**:
%
{
position
}
-
**
Score
**:
%
{
score
}
|
}
in
let
contents
=
N
.
make
~
title
N
.[
text
summary
;
croquis
~
width
:
20
.
(
CS
.
draw
s
)
;
]
in
note_to_html
contents
path
;
if
i
=
0
then
Out_channel
.
with_file
"delme.bin"
~
f
:
(
fun
oc
->
Marshal
.
to_channel
oc
s
[]
)
in
let
module
H
=
struct
open
Tyxml
.
Html
let
table
~
labels
rows
=
let
open
Tyxml
.
Html
in
let
thead
=
thead
[
tr
(
List
.
map
labels
~
f
:
(
fun
x
->
td
[
txt
x
]))]
in
let
rows
=
List
.
map
rows
~
f
:
(
fun
cells
->
tr
(
List
.
map
cells
~
f
:
(
fun
x
->
td
[
x
]))
)
in
table
~
thead
rows
let
link
~
href
contents
=
a
~
a
:
[
a_href
href
]
[
txt
contents
]
let
opt_cell
x
~
f
=
match
x
with
|
None
->
txt
"NA"
|
Some
y
->
f
y
let
opt_txt
x
~
f
=
opt_cell
x
~
f
:
(
fun
x
->
txt
(
f
x
))
end
in
let
index
=
let
df
=
let
labels
=
[
"Alignment ID"
;
"Position"
;
"Score"
;
"Infos"
]
in
let
rows
=
List
.
init
(
Array
.
length
sites
)
~
f
:
(
fun
i
->
[
H
.
opt_cell
sites
.
(
i
)
.
CS
.
alignment_id
~
f
:
(
fun
id
->
let
href
=
sprintf
"../alignments/%s.svg"
id
in
H
.
link
~
href
id
)
;
H
.
opt_txt
sites
.
(
i
)
.
CS
.
pos
~
f
:
Int
.
to_string
;
H
.
opt_txt
sites
.
(
i
)
.
CS
.
score
~
f
:
Float
.
to_string
;
H
.
link
~
href
:
(
site_page_path
i
)
"Details"
])
in
H
.
table
~
labels
rows
in
let
title
=
sprintf
"Candidate sites for %s method"
meth
.
name
in
N
.
make
~
title
N
.[
table
df
;
]
in
let
path
fn
=
Filename
.
concat
dest
fn
in
Unix
.
mkdir_p
dest
;
note_to_html
index
(
path
"index.html"
)
;
Array
.
iteri
sites
~
f
:
site_page
in
Workflow
.
path_plugin
~
descr
:
"codepi.run.candidate_site_report"
~
version
:
5
f
type
t
=
{
tree_file
:
string
;
alignment_dir
:
string
;
convergent_species_file
:
string
;
detection_methods
:
detection_method
list
;
}
let
convergent_species_workflow
run
=
[
%
workflow
In_channel
.
read_lines
[
%
path
Workflow
.
input
run
.
convergent_species_file
]]
let
dataset
run
=
let
tree
=
Workflow
.
input
run
.
tree_file
in
let
nucleotide_alignments
=
Sys
.
readdir
run
.
alignment_dir
|>
Array
.
to_list
|>
List
.
map
~
f
:
(
fun
ali
->
ali
,
Workflow
.
input
(
Filename
.
concat
run
.
alignment_dir
ali
)
)
in
let
convergent_species
=
convergent_species_workflow
run
in
Dataset
.
make
~
tree
~
nucleotide_alignments
~
convergent_species
let
repo
run
=
let
d
=
dataset
run
in
let
foreach_detection_method
(
Detection_method
meth
as
dm
)
=
let
report
=
candidate_site_report
d
dm
in
Repo
.
item
[
meth
.
id
^
"_report"
]
report
in
let
foreach_query
(
q
:
Dataset
.
Query
.
t
)
=
Repo
.
item
[
"alignments"
;
sprintf
"%s.svg"
q
.
alignment_descr
]
(
Pipeline
.
alignment_plot
q
)
in
List
.
concat
[
List
.
map
run
.
detection_methods
~
f
:
foreach_detection_method
;
List
.
map
(
Dataset
.
queries
d
)
~
f
:
foreach_query
;
]
let
main
~
tree_file
~
alignment_dir
~
convergent_species_file
~
outdir
~
np
~
mem
~
methods
()
=
try
let
loggers
=
[
Console_logger
.
create
()
;
]
in