Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
VEBER Philippe
codepi
Commits
1805222a
Commit
1805222a
authored
May 09, 2020
by
Philippe Veber
Browse files
orthomam: site_ranking workflow (whole database run)
parent
0cbda477
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
128 additions
and
4 deletions
+128
-4
lib/dune
lib/dune
+1
-1
lib/orthomam.ml
lib/orthomam.ml
+108
-3
lib/orthomam.mli
lib/orthomam.mli
+19
-0
No files found.
lib/dune
View file @
1805222a
(library
(name reviewphiltrans)
(libraries core biotk biotope bistro.utils gsl ocaml-r.graphics ocaml-r.grDevices phylogenetics.convergence reviewphiltrans_toolbox)
(libraries core biotk biotope bistro.utils
containers
gsl ocaml-r.graphics ocaml-r.grDevices phylogenetics.convergence reviewphiltrans_toolbox)
(preprocess
(pps ppx_jane ppx_csv_conv bistro.ppx ppx_here)))
...
...
lib/orthomam.ml
View file @
1805222a
...
...
@@ -6,10 +6,10 @@ open Reviewphiltrans_toolbox
let
ensembl_tree
:
nhx
file
=
Bistro_unix
.
wget
"ftp://ftp.ensembl.org/pub/release-99/compara/species_trees/vertebrates_species-tree_Ensembl.nh"
type
alignment
=
Orthomam_db
.
t
*
string
type
alignment
=
Alignment
of
Orthomam_db
.
t
*
string
let
alignments_of_db
db
=
List
.
map
(
Orthomam_db
.
list_alignments
db
)
~
f
:
(
fun
ali
->
db
,
ali
)
List
.
map
(
Orthomam_db
.
list_alignments
db
)
~
f
:
(
fun
ali
->
Alignment
(
db
,
ali
)
)
let
%
pworkflow
fasta_of_phylip
ali
=
let
ali
=
...
...
@@ -27,9 +27,14 @@ type query = {
convergent_species
:
string
list
workflow
;
}
let
query
(
db
,
alignment
)
~
convergent_species
=
let
query
(
Alignment
(
db
,
alignment
)
)
~
convergent_species
=
{
db
;
alignment
;
convergent_species
}
let
family_id
q
=
Filename
.
basename
q
.
alignment
|>
Filename
.
chop_suffix
"_NT.fasta.ali"
|>
String
.
chop_prefix_exn
~
prefix
:
"all"
let
%
pworkflow
remove_unobserved_sequences_from_alignment
phylip
:
phylip
file
=
let
phylip
=
[
%
path
phylip
]
in
let
module
Phylip
=
Phylogenetics
.
Phylip
in
...
...
@@ -495,3 +500,103 @@ let%pworkflow convergence_species_tree_pdf ~convergent_species db =
render_tree
tree_or_branch
|>
Biotk_croquis
.
Croquis
.
Layout
.
simple
|>
Fn
.
flip
Biotk_croquis
.
Croquis
.
Layout
.
render_pdf
[
%
dest
]
module
Candidate_site
=
struct
module
T
=
struct
open
Phylogenetics
type
t
=
{
alignment_id
:
string
;
pos
:
int
;
contents
:
(
unit
,
string
*
char
*
bool
,
float
)
Tree
.
t
;
score
:
float
;
}
let
leq
s1
s2
=
Float
.(
s1
.
score
<=
s2
.
score
)
end
include
T
module
Heap
=
CCHeap
.
Make
(
T
)
end
let
%
workflow
ranking_of_results
~
alignment_ids
~
convergent_species
(
alignments
:
aminoacid_fasta
file
list
)
(
trees
:
newick
file
list
)
result_files
~
column_label
=
let
ranking_size
=
1_000
in
let
column_label
=
[
%
param
column_label
]
in
let
alignment_ids
=
Array
.
of_list
[
%
param
alignment_ids
]
in
let
trees
=
[
%
eval
Workflow
.
path_list
trees
]
in
let
alignments
=
[
%
eval
Workflow
.
path_list
alignments
]
in
let
result_files
=
[
%
eval
Workflow
.
path_list
result_files
]
in
let
convergent_species
=
String
.
Set
.
of_list
[
%
param
convergent_species
]
in
let
module
Result_table
=
Reviewphiltrans_toolbox
.
Result_table
in
let
lazy_load
xs
~
f
=
List
.
map
xs
~
f
:
(
fun
x
->
lazy
(
f
x
))
|>
Array
.
of_list
in
let
results
=
lazy_load
result_files
~
f
:
Result_table
.
of_file
in
let
trees
=
lazy_load
trees
~
f
:
Phylogenetics
.
Newick
.
from_file
in
let
alignments
=
lazy_load
alignments
~
f
:
Biotk
.
Fasta
.
from_file
in
let
module
S
=
struct
type
t
=
{
alignment
:
int
;
pos
:
int
;
score
:
float
;
}
let
leq
s1
s2
=
Float
.(
s1
.
score
<=
s2
.
score
)
end
in
let
module
H
=
CCHeap
.
Make
(
S
)
in
let
add_result_line
acc
~
alignment
~
pos
~
score
=
let
candidate
=
{
S
.
alignment
;
pos
;
score
}
in
let
acc
=
H
.
add
acc
candidate
in
if
H
.
size
acc
>
ranking_size
then
H
.
take_exn
acc
|>
fst
else
acc
in
let
add_result_file
alignment
acc
(
rf
:
Result_table
.
t
Lazy
.
t
)
=
let
col
=
List
.
Assoc
.
find_exn
(
Lazy
.
force
rf
)
.
scores_per_meth
column_label
~
equal
:
String
.
equal
in
Array
.
foldi
col
~
init
:
acc
~
f
:
(
fun
pos
acc
score
->
add_result_line
acc
~
alignment
~
pos
~
score
)
in
let
top_sites
=
Array
.
foldi
results
~
init
:
H
.
empty
~
f
:
add_result_file
|>
H
.
to_list_sorted
|>
List
.
rev
in
let
annotate_site
(
s
:
S
.
t
)
=
let
id
=
alignment_ids
.
(
s
.
alignment
)
in
let
tree
=
Lazy
.
force
trees
.
(
s
.
alignment
)
in
let
alignment
=
match
Lazy
.
force
alignments
.
(
s
.
alignment
)
with
|
Ok
(
_
,
fa
)
->
fa
|
Error
_
->
assert
false
in
let
leaf_values
=
List
.
map
alignment
~
f
:
(
fun
it
->
it
.
description
,
it
.
sequence
.
[
s
.
pos
]
)
in
let
contents
=
let
open
Phylogenetics
in
Newick
.
with_inner_tree
tree
~
f
:
(
fun
t
->
Tree
.
map
t
~
node
:
(
fun
_
->
()
)
~
branch
:
(
fun
bi
->
Option
.
value_exn
bi
.
length
)
~
leaf
:
(
fun
ni
->
let
name
=
Option
.
value_exn
ni
.
name
in
let
aa
=
List
.
Assoc
.
find_exn
leaf_values
name
~
equal
:
String
.
equal
in
name
,
aa
,
String
.
Set
.
mem
convergent_species
name
)
)
in
{
Candidate_site
.
alignment_id
=
id
;
contents
;
pos
=
s
.
pos
;
score
=
s
.
score
}
in
List
.
map
top_sites
~
f
:
annotate_site
let
site_ranking
~
meth
~
convergent_species
db
=
let
detection_method
,
column_label
=
match
meth
with
|
`multinomial_asymptotic_lrt
->
multinomial_asymptotic_lrt
,
"1MinusLRT"
in
let
queries
=
alignments_of_db
db
|>
List
.
map
~
f
:
(
query
~
convergent_species
:
(
Workflow
.
data
convergent_species
))
in
let
alignment_ids
=
List
.
map
queries
~
f
:
family_id
in
let
aa_alignments
=
List
.
map
queries
~
f
:
amino_acid_alignment
in
let
result_files
=
List
.
map
queries
~
f
:
detection_method
in
let
trees
=
List
.
map
queries
~
f
:
(
tree
~
branch_length_unit
:
`Amino_acid
)
in
ranking_of_results
~
alignment_ids
~
convergent_species
aa_alignments
trees
result_files
~
column_label
lib/orthomam.mli
View file @
1805222a
...
...
@@ -13,6 +13,8 @@ val query :
convergent_species
:
string
list
workflow
->
query
val
family_id
:
query
->
string
val
alignment
:
query
->
phylip
file
val
tree
:
...
...
@@ -71,3 +73,20 @@ val convergence_species_tree_pdf :
convergent_species
:
string
list
->
Orthomam_db
.
t
->
pdf
file
module
Candidate_site
:
sig
open
Phylogenetics
type
t
=
{
alignment_id
:
string
;
pos
:
int
;
contents
:
(
unit
,
string
*
char
*
bool
,
float
)
Tree
.
t
;
score
:
float
;
}
end
val
site_ranking
:
meth
:
[
`multinomial_asymptotic_lrt
]
->
convergent_species
:
string
list
->
Orthomam_db
.
t
->
Candidate_site
.
t
list
workflow
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment