Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
fecafac7
Commit
fecafac7
authored
May 29, 2020
by
Philippe Veber
Browse files
new attempts to produce trees with no single nodes
parent
5a2a7780
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
162 additions
and
62 deletions
+162
-62
lib/detection_pipeline.ml
lib/detection_pipeline.ml
+8
-1
lib/orthomam.ml
lib/orthomam.ml
+7
-61
lib/toolbox/convergence_tree.ml
lib/toolbox/convergence_tree.ml
+135
-0
lib/toolbox/convergence_tree.mli
lib/toolbox/convergence_tree.mli
+12
-0
No files found.
lib/detection_pipeline.ml
View file @
fecafac7
...
...
@@ -52,6 +52,13 @@ module Make(Q : Query) = struct
let
gene_tree
d
=
Tree_dataset
.
raxmlng_fna
~
fna
:
(
nucleotide_alignment
d
)
()
let
%
pworkflow
tree_with_no_single_child
~
branch_length_unit
d
:
newick
file
=
let
tree_file
=
[
%
path
tree
~
branch_length_unit
d
]
in
let
open
Phylogenetics
in
let
tree
=
Newick
.
from_file
tree_file
in
let
tree
=
Newick
.
map_inner_tree
tree
~
f
:
Reviewphiltrans_toolbox
.
Convergence_tree
.
remove_nodes_with_single_child
in
Newick
.
to_file
tree
[
%
dest
]
let
identical
d
=
let
tree_sc
=
Tree_dataset
.
prepare_sc_tree
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
in
let
tree_id
=
Tree_dataset
.
prepare_tree_with_node_id
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
in
...
...
@@ -93,7 +100,7 @@ module Make(Q : Query) = struct
let
tdg09
d
=
Tamuri
.
tdg09
~
tree
:
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
~
tree
:
(
tree
_with_no_single_child
~
branch_length_unit
:
`Amino_acid
d
)
~
faa
:
(
amino_acid_alignment
d
)
()
|>
Tamuri
.
results
...
...
lib/orthomam.ml
View file @
fecafac7
...
...
@@ -47,55 +47,6 @@ let%pworkflow remove_unobserved_sequences_from_alignment phylip : phylip file =
let
filtered_data
=
Phylip
.
make_exn
items
in
Phylip
.
write
~
strict
:
false
filtered_data
[
%
dest
]
let
rec
transfer_tag_to_branches
t
=
let
open
Phylogenetics
in
let
category
:
_
Tree
.
t
->
int
=
function
|
Leaf
(
_
,
c
)
->
c
|
Node
n
->
snd
n
.
data
in
match
t
with
|
Tree
.
Leaf
(
l
,
_
)
->
Tree
.
leaf
l
|
Node
n
->
let
cat_n
=
snd
n
.
data
in
List1
.
map
n
.
branches
~
f
:
(
fun
(
Branch
b
)
->
let
cat_child
=
category
b
.
tip
in
let
tags
=
List
.
concat
[
[
"Condition"
,
Int
.
to_string
cat_child
]
;
(
if
cat_n
<>
cat_child
then
[
"Transition"
,
Int
.
to_string
cat_child
]
else
[]
)
;
b
.
data
.
Newick
.
tags
]
in
Tree
.
branch
{
b
.
data
with
Newick
.
tags
}
(
transfer_tag_to_branches
b
.
tip
)
)
|>
Tree
.
node
(
fst
n
.
data
)
let
condition_tag
(
bi
:
Phylogenetics
.
Newick
.
branch_info
)
=
List
.
Assoc
.
find
bi
.
tags
"Condition"
~
equal
:
String
.
equal
let
tag_tree
t
tagged_leaves
=
let
open
Phylogenetics
in
let
category
(
ni
:
Newick
.
node_info
)
=
Option
.
map
ni
.
name
~
f
:
(
fun
l
->
if
String
.
Set
.
mem
tagged_leaves
l
then
1
else
0
)
in
let
cost
x
y
=
match
x
,
y
with
|
0
,
1
->
2
.
|
1
,
0
->
1
.
|
0
,
0
|
1
,
1
->
0
.
|
_
->
assert
false
in
Fitch
.
fitch
~
cost
~
n
:
2
~
category
t
|>
transfer_tag_to_branches
let
%
pworkflow
clip_tree_on_alignment
tree
ali
=
let
open
Phylogenetics
in
let
tree
=
Newick
.
from_file
[
%
path
tree
]
in
...
...
@@ -108,16 +59,7 @@ let%pworkflow clip_tree_on_alignment tree ali =
(
fun
bi
->
bi
.
Newick
.
name
)
ali_species
with
|
None
->
failwith
"Tree has no leaf in alignment"
|
Some
filtered_tree
->
Tree
.
simplify_node_with_single_child
filtered_tree
(
fun
bi1
bi2
->
match
bi1
.
length
,
condition_tag
bi1
,
bi2
.
length
,
condition_tag
bi2
with
|
Some
l1
,
Some
c1
,
Some
l2
,
Some
c2
->
if
String
.(
c1
=
c2
)
then
let
tags
=
List
.
dedup_and_sort
(
bi1
.
tags
@
bi2
.
tags
)
~
compare
:
[
%
compare
:
string
*
string
]
in
Some
{
Newick
.
tags
;
length
=
Some
(
l1
+.
l2
)
}
else
None
|
_
->
assert
false
)
|
Some
filtered_tree
->
filtered_tree
)
in
Newick
.
to_file
clipped_tree
[
%
dest
]
...
...
@@ -134,7 +76,9 @@ let annotate_convergent_species_in_tree (tree : newick file) species : newick fi
let
ensembl_tree
=
Newick
.
from_file
omm_tree
in
let
tagged_tree
=
Newick
.
map_inner_tree
ensembl_tree
~
f
:
(
fun
t
->
tag_tree
t
(
String
.
Set
.
of_list
species
)
Reviewphiltrans_toolbox
.
Convergence_tree
.
infer_binary_condition_on_branches
~
convergent_leaves
:
(
String
.
Set
.
of_list
species
)
t
)
in
Newick
.
to_file
tagged_tree
[
%
dest
]
...
...
@@ -490,7 +434,9 @@ let%pworkflow convergence_species_tree_pdf ~convergent_species db =
in
let
tree_or_branch
=
Newick
.
from_file
tree_path
|>
Newick
.
map_inner_tree
~
f
:
(
fun
t
->
tag_tree
t
convergent_species
)
|>
Newick
.
map_inner_tree
~
f
:
(
fun
t
->
Reviewphiltrans_toolbox
.
Convergence_tree
.
infer_binary_condition_on_branches
t
~
convergent_leaves
:
convergent_species
)
in
render_tree
tree_or_branch
|>
Biotk_croquis
.
Croquis
.
Layout
.
simple
...
...
lib/toolbox/convergence_tree.ml
0 → 100644
View file @
fecafac7
open
Core_kernel
open
Phylogenetics
type
t
=
Newick
.
tree
module
Tags
=
struct
let
condition_label
=
"Condition"
let
transition_label
=
"Transition"
let
condition
tags
=
List
.
Assoc
.
find
tags
condition_label
~
equal
:
String
.
equal
let
set_condition
tags
c
=
List
.
Assoc
.(
add
(
remove
tags
condition_label
~
equal
:
String
.
equal
)
condition_label
c
~
equal
:
String
.
equal
)
(* let other_tags tags =
* List.filter tags ~f:(fun (key, _) -> String.(key <> condition_label && key <> transition_label)) *)
let
unset_transition
tags
=
List
.
Assoc
.
remove
tags
transition_label
~
equal
:
String
.
equal
let
set_transition
tags
c
=
List
.
Assoc
.(
add
(
unset_transition
tags
)
transition_label
c
~
equal
:
String
.
equal
)
end
let
condition_of_branch_info
(
bi
:
Newick
.
branch_info
)
=
Tags
.
condition
bi
.
tags
let
rec
transfer_condition_to_branches
t
=
let
category
:
_
Tree
.
t
->
int
=
function
|
Leaf
(
_
,
c
)
->
c
|
Node
n
->
snd
n
.
data
in
match
t
with
|
Tree
.
Leaf
(
l
,
_
)
->
Tree
.
leaf
l
|
Node
n
->
List1
.
map
n
.
branches
~
f
:
(
fun
(
Branch
b
)
->
let
cat_child
=
category
b
.
tip
in
let
tags
=
Tags
.
set_condition
b
.
data
.
Newick
.
tags
(
Int
.
to_string
cat_child
)
in
Tree
.
branch
{
b
.
data
with
Newick
.
tags
}
(
transfer_condition_to_branches
b
.
tip
)
)
|>
Tree
.
node
(
fst
n
.
data
)
let
reset_transitions
(
tree
:
t
)
=
let
rec
aux
mother_condition
tree
=
match
(
tree
:
t
)
with
|
Leaf
_
as
l
->
l
|
Node
n
->
let
branches
=
List1
.
map
n
.
branches
~
f
:
(
fun
(
Branch
b
)
->
let
tags
,
c_b
=
match
Tags
.
condition
b
.
data
.
tags
with
|
None
->
failwith
"tree tagged with condition expected"
|
Some
c_b
->
let
tags
=
if
String
.(
c_b
<>
mother_condition
)
then
Tags
.
set_transition
b
.
data
.
tags
c_b
else
Tags
.
unset_transition
b
.
data
.
tags
in
tags
,
c_b
in
let
data
=
{
b
.
data
with
tags
}
in
Tree
.
branch
data
(
aux
c_b
b
.
tip
)
)
in
Node
{
n
with
branches
}
in
match
tree
with
|
Leaf
_
as
l
->
l
|
Node
n
->
let
branches
=
List1
.
map
n
.
branches
~
f
:
(
fun
(
Branch
b
)
->
match
Tags
.
condition
b
.
data
.
tags
with
|
None
->
failwith
"tree tagged with condition expected"
|
Some
c_b
->
Tree
.
branch
b
.
data
(
aux
c_b
b
.
tip
)
)
in
Node
{
n
with
branches
}
let
length_on_each_condition
branches
=
let
module
A
=
Biocaml_unix
.
Accu
in
let
acc
=
A
.
create
~
bin
:
Fn
.
id
~
zero
:
0
.
~
add
:
(
+.
)
()
in
List
.
iter
branches
~
f
:
(
fun
bi
->
match
condition_of_branch_info
bi
,
bi
.
Newick
.
length
with
|
Some
c
,
Some
l
->
A
.
add
acc
c
l
|
_
->
()
)
;
A
.
to_alist
acc
let
remove_nodes_with_single_child
(
tree
:
t
)
=
Tree
.
simplify_node_with_single_child
tree
~
merge_branch_data
:
(
fun
branches
->
let
condition_stats
=
length_on_each_condition
branches
in
let
major_condition
=
List
.
max_elt
condition_stats
~
compare
:
(
fun
(
_
,
l
)
(
_
,
l'
)
->
Float
.
compare
l
l'
)
in
let
tags
=
match
major_condition
with
|
None
->
[]
|
Some
(
c
,
_
)
->
Tags
.
set_condition
[]
c
in
let
length
=
List
.
fold
branches
~
init
:
0
.
~
f
:
(
fun
acc
bi
->
acc
+.
Option
.
value_exn
bi
.
length
)
in
{
Newick
.
tags
;
length
=
Some
length
}
)
|>
reset_transitions
let
infer_binary_condition_on_branches
?
(
gain_relative_cost
=
2
.
)
t
~
convergent_leaves
=
let
category
(
ni
:
Newick
.
node_info
)
=
Option
.
map
ni
.
name
~
f
:
(
fun
l
->
if
String
.
Set
.
mem
convergent_leaves
l
then
1
else
0
)
in
let
cost
x
y
=
match
x
,
y
with
|
0
,
1
->
gain_relative_cost
|
1
,
0
->
1
.
|
0
,
0
|
1
,
1
->
0
.
|
_
->
assert
false
in
Fitch
.
fitch
~
cost
~
n
:
2
~
category
t
|>
transfer_condition_to_branches
|>
reset_transitions
lib/toolbox/convergence_tree.mli
0 → 100644
View file @
fecafac7
open
Core_kernel
open
Phylogenetics
type
t
=
Newick
.
tree
val
infer_binary_condition_on_branches
:
?
gain_relative_cost
:
float
->
t
->
convergent_leaves
:
String
.
Set
.
t
->
t
val
remove_nodes_with_single_child
:
t
->
t
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment