Commit 4d9215f4 authored by Philippe Veber's avatar Philippe Veber
Browse files

added new note on multinomial method and phylogeny

parent b041a8e1
(executable
(name multinomial_null)
(executables
(names multinomial_null multinomial_and_phylogeny)
(libraries codepi scirep)
(modules Multinomial_null)
(modes native)
(preprocess (pps bistro.ppx)))
......@@ -9,3 +8,8 @@
(target multinomial_null.ml)
(deps multinomial_null.md)
(action (with-stdout-to %{target} (run ocaml-mdx pp %{deps}))))
(rule
(target multinomial_and_phylogeny.ml)
(deps multinomial_and_phylogeny.md)
(action (with-stdout-to %{target} (run ocaml-mdx pp %{deps}))))
# Multinomial and phylogeny
The multinomial method for detecting convergent substitutions
completely ignores phylogenetic inertia. A rather immediate
consequence is that it doesn't assess significance properly: its
pvalues are far from calibration. However in our tests it behaves
rather well in average precision. This notes shows a family of tree
topologies on which the multinomial method significantly underperforms
in comparison to a phylogeny-aware method (tdg09).
```ocaml
open Core_kernel;;
open Phylogenetics;;
open Codepitk;;
module Top = Bistro_utils.Toplevel_eval.Make(struct let np = 4 let mem = 4 end)()
```
```ocaml
let tree_dfs_map t ~node ~leaf ~branch ~init =
let rec node_traversal acc = function
| Tree.Leaf l ->
let acc, l' = leaf acc l in
Tree.Leaf l', acc
| Node n ->
let acc, data = node acc n.data in
let children, acc = List1.fold n.branches ~init:([], acc) ~f:(fun (branch_acc, acc) b ->
let b', acc = branch_traversal acc b in
b' :: branch_acc, acc
)
in
Tree.node data (List1.of_list_exn (List.rev children)), acc
and branch_traversal acc (Tree.Branch b) =
let acc, data = branch acc b.data in
let tip, acc = node_traversal acc b.tip in
Tree.branch data tip, acc
in
fst (node_traversal init t)
let tree_renumber_leaves t =
tree_dfs_map t
~init:0
~node:(fun i d -> i, d)
~leaf:(fun i (_, d) -> (i + 1), (i, d))
~branch:(fun i d -> i, d)
```
```ocaml
let bad_tree_for_multinomial ?(lambda = 1.) n =
let branch length condition tip =
Tree.branch { Convergence_tree.length ; condition } tip
in
let pitchfork length condition =
Tree.node () (List1.init n ~f:(fun _ -> branch length condition (Tree.leaf (0, condition))))
in
let indep_subtree =
Codepitk.Tdg09.Pack.pair_tree
~branch_length1:lambda ~branch_length2:lambda ~npairs:n
in
let dep_subtree =
let p = 0.99 *. lambda and q = 0.01 *. lambda in
Tree.binary_node ()
(branch p `Ancestral (pitchfork q `Ancestral))
(branch p `Convergent (pitchfork q `Convergent))
in
Tree.binary_node ()
(branch 2. `Ancestral dep_subtree)
(branch 2. `Ancestral indep_subtree)
|> tree_renumber_leaves
```
```ocaml
let string_of_tree t =
Tree.to_printbox t
~leaf:(fun (i, cond) -> sprintf "%d (%c)" i (match cond with `Convergent -> 'C' | `Ancestral -> 'A'))
|> PrintBox_text.to_string
;;
string_of_tree (bad_tree_for_multinomial 4);;
```
```ocaml
let wag =
Bistro_unix.wget "https://www.ebi.ac.uk/goldman-srv/WAG/wag.dat"
|> Top.path
|> Wag.parse
let tdg09_site_simulation ?alpha:(alpha = 1.) ?(scale = 1.) ~convergent_site tree =
let open Tdg09.Pack in
let exchangeability_matrix = wag.rate_matrix in
let stationary_distribution0 = simulate_profile alpha in
let stationary_distribution1 = if convergent_site then simulate_profile alpha else stationary_distribution0 in
Model3.simulate_site ~exchangeability_matrix ~stationary_distribution0 ~stationary_distribution1 ~scale tree
let tdg09_simulation ?alpha tree ~n_h0 ~n_ha =
Array.init (n_h0 + n_ha) ~f:(fun i ->
tdg09_site_simulation tree ?alpha ~convergent_site:(i < n_h0)
)
let tdg09_on_tdg09_simulation tree sim =
let open Tdg09.Pack in
let site = Tree.leaves sim |> Array.of_list in
let _, _, lrt = Model3.lrt ~mode:`sparse wag tree site in
lrt.pvalue
let multinomial_on_tdg09_simulation site =
let leaves1, leaves2 =
Convergence_tree.leaves site
|> List.partition_map ~f:(function
| aa, `Ancestral -> Either.first aa
| aa, `Convergent -> Either.second aa
)
in
let counts x = (Amino_acid.counts (Sequence.of_list x) :> int array) in
let x1 = counts leaves1 in
let x2 = counts leaves2 in
let d = Multinomial_test.data ~x1 ~x2 in
let r = Multinomial_test.(
Permutation.test ~statistic:LRT.likelihood_log_ratio d
) in
r.pvalue
let tree_path = "data/besnard2009/besnard2009.nhx";;
let load_tree fn =
Convergence_tree.from_file fn
|> Rresult.R.failwith_error_msg
|> Tdg09.Pack.convergence_tree
```
```ocaml
let run ?alpha ~n_h0 ~n_ha () =
let tree = bad_tree_for_multinomial ~lambda:1. 20 in
let sites = tdg09_simulation ?alpha tree ~n_h0 ~n_ha in
let run f =
Prc.Dataset (List.init (Array.length sites) ~f:(fun i -> f sites.(i), i >= n_h0))
in
let scores_tdg09 = run (tdg09_on_tdg09_simulation tree) in
let scores_multinomial = run multinomial_on_tdg09_simulation in
[ "multinomial", Prc.Precision_recall.auc_average_precision scores_multinomial ;
"tdg09", Prc.Precision_recall.auc_average_precision scores_tdg09 ; ]
let _ = run ~alpha:0.1 ~n_h0:100 ~n_ha:100 ()
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment