Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
4d9215f4
Commit
4d9215f4
authored
Dec 10, 2020
by
Philippe Veber
Browse files
added new note on multinomial method and phylogeny
parent
b041a8e1
Changes
2
Hide whitespace changes
Inline
Sidebyside
Showing
2 changed files
with
154 additions
and
3 deletions
+154
3
notes/dune
notes/dune
+7
3
notes/multinomial_and_phylogeny.md
notes/multinomial_and_phylogeny.md
+147
0
No files found.
notes/dune
View file @
4d9215f4
(executable
(name multinomial_null)
(executable
s
(name
s
multinomial_null
multinomial_and_phylogeny
)
(libraries codepi scirep)
(modules Multinomial_null)
(modes native)
(preprocess (pps bistro.ppx)))
...
...
@@ 9,3 +8,8 @@
(target multinomial_null.ml)
(deps multinomial_null.md)
(action (withstdoutto %{target} (run ocamlmdx pp %{deps}))))
(rule
(target multinomial_and_phylogeny.ml)
(deps multinomial_and_phylogeny.md)
(action (withstdoutto %{target} (run ocamlmdx pp %{deps}))))
notes/multinomial_and_phylogeny.md
0 → 100644
View file @
4d9215f4
# Multinomial and phylogeny
The multinomial method for detecting convergent substitutions
completely ignores phylogenetic inertia. A rather immediate
consequence is that it doesn't assess significance properly: its
pvalues are far from calibration. However in our tests it behaves
rather well in average precision. This notes shows a family of tree
topologies on which the multinomial method significantly underperforms
in comparison to a phylogenyaware method (tdg09).
```
ocaml
open
Core_kernel
;;
open
Phylogenetics
;;
open
Codepitk
;;
module
Top
=
Bistro_utils
.
Toplevel_eval
.
Make
(
struct
let
np
=
4
let
mem
=
4
end
)()
```
```
ocaml
let
tree_dfs_map
t
~
node
~
leaf
~
branch
~
init
=
let
rec
node_traversal
acc
=
function

Tree
.
Leaf
l
>
let
acc
,
l'
=
leaf
acc
l
in
Tree
.
Leaf
l'
,
acc

Node
n
>
let
acc
,
data
=
node
acc
n
.
data
in
let
children
,
acc
=
List1
.
fold
n
.
branches
~
init
:
([]
,
acc
)
~
f
:
(
fun
(
branch_acc
,
acc
)
b
>
let
b'
,
acc
=
branch_traversal
acc
b
in
b'
::
branch_acc
,
acc
)
in
Tree
.
node
data
(
List1
.
of_list_exn
(
List
.
rev
children
))
,
acc
and
branch_traversal
acc
(
Tree
.
Branch
b
)
=
let
acc
,
data
=
branch
acc
b
.
data
in
let
tip
,
acc
=
node_traversal
acc
b
.
tip
in
Tree
.
branch
data
tip
,
acc
in
fst
(
node_traversal
init
t
)
let
tree_renumber_leaves
t
=
tree_dfs_map
t
~
init
:
0
~
node
:
(
fun
i
d
>
i
,
d
)
~
leaf
:
(
fun
i
(
_
,
d
)
>
(
i
+
1
)
,
(
i
,
d
))
~
branch
:
(
fun
i
d
>
i
,
d
)
```
```
ocaml
let
bad_tree_for_multinomial
?
(
lambda
=
1
.
)
n
=
let
branch
length
condition
tip
=
Tree
.
branch
{
Convergence_tree
.
length
;
condition
}
tip
in
let
pitchfork
length
condition
=
Tree
.
node
()
(
List1
.
init
n
~
f
:
(
fun
_
>
branch
length
condition
(
Tree
.
leaf
(
0
,
condition
))))
in
let
indep_subtree
=
Codepitk
.
Tdg09
.
Pack
.
pair_tree
~
branch_length1
:
lambda
~
branch_length2
:
lambda
~
npairs
:
n
in
let
dep_subtree
=
let
p
=
0
.
99
*.
lambda
and
q
=
0
.
01
*.
lambda
in
Tree
.
binary_node
()
(
branch
p
`Ancestral
(
pitchfork
q
`Ancestral
))
(
branch
p
`Convergent
(
pitchfork
q
`Convergent
))
in
Tree
.
binary_node
()
(
branch
2
.
`Ancestral
dep_subtree
)
(
branch
2
.
`Ancestral
indep_subtree
)
>
tree_renumber_leaves
```
```
ocaml
let
string_of_tree
t
=
Tree
.
to_printbox
t
~
leaf
:
(
fun
(
i
,
cond
)
>
sprintf
"%d (%c)"
i
(
match
cond
with
`Convergent
>
'
C'

`Ancestral
>
'
A'
))
>
PrintBox_text
.
to_string
;;
string_of_tree
(
bad_tree_for_multinomial
4
);;
```
```
ocaml
let
wag
=
Bistro_unix
.
wget
"https://www.ebi.ac.uk/goldmansrv/WAG/wag.dat"
>
Top
.
path
>
Wag
.
parse
let
tdg09_site_simulation
?
alpha
:
(
alpha
=
1
.
)
?
(
scale
=
1
.
)
~
convergent_site
tree
=
let
open
Tdg09
.
Pack
in
let
exchangeability_matrix
=
wag
.
rate_matrix
in
let
stationary_distribution0
=
simulate_profile
alpha
in
let
stationary_distribution1
=
if
convergent_site
then
simulate_profile
alpha
else
stationary_distribution0
in
Model3
.
simulate_site
~
exchangeability_matrix
~
stationary_distribution0
~
stationary_distribution1
~
scale
tree
let
tdg09_simulation
?
alpha
tree
~
n_h0
~
n_ha
=
Array
.
init
(
n_h0
+
n_ha
)
~
f
:
(
fun
i
>
tdg09_site_simulation
tree
?
alpha
~
convergent_site
:
(
i
<
n_h0
)
)
let
tdg09_on_tdg09_simulation
tree
sim
=
let
open
Tdg09
.
Pack
in
let
site
=
Tree
.
leaves
sim
>
Array
.
of_list
in
let
_
,
_
,
lrt
=
Model3
.
lrt
~
mode
:
`sparse
wag
tree
site
in
lrt
.
pvalue
let
multinomial_on_tdg09_simulation
site
=
let
leaves1
,
leaves2
=
Convergence_tree
.
leaves
site
>
List
.
partition_map
~
f
:
(
function

aa
,
`Ancestral
>
Either
.
first
aa

aa
,
`Convergent
>
Either
.
second
aa
)
in
let
counts
x
=
(
Amino_acid
.
counts
(
Sequence
.
of_list
x
)
:>
int
array
)
in
let
x1
=
counts
leaves1
in
let
x2
=
counts
leaves2
in
let
d
=
Multinomial_test
.
data
~
x1
~
x2
in
let
r
=
Multinomial_test
.(
Permutation
.
test
~
statistic
:
LRT
.
likelihood_log_ratio
d
)
in
r
.
pvalue
let
tree_path
=
"data/besnard2009/besnard2009.nhx"
;;
let
load_tree
fn
=
Convergence_tree
.
from_file
fn
>
Rresult
.
R
.
failwith_error_msg
>
Tdg09
.
Pack
.
convergence_tree
```
```
ocaml
let
run
?
alpha
~
n_h0
~
n_ha
()
=
let
tree
=
bad_tree_for_multinomial
~
lambda
:
1
.
20
in
let
sites
=
tdg09_simulation
?
alpha
tree
~
n_h0
~
n_ha
in
let
run
f
=
Prc
.
Dataset
(
List
.
init
(
Array
.
length
sites
)
~
f
:
(
fun
i
>
f
sites
.
(
i
)
,
i
>=
n_h0
))
in
let
scores_tdg09
=
run
(
tdg09_on_tdg09_simulation
tree
)
in
let
scores_multinomial
=
run
multinomial_on_tdg09_simulation
in
[
"multinomial"
,
Prc
.
Precision_recall
.
auc_average_precision
scores_multinomial
;
"tdg09"
,
Prc
.
Precision_recall
.
auc_average_precision
scores_tdg09
;
]
let
_
=
run
~
alpha
:
0
.
1
~
n_h0
:
100
~
n_ha
:
100
()
```
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment