Commit 4d9215f4 by Philippe Veber

### added new note on multinomial method and phylogeny

parent b041a8e1
 (executable (name multinomial_null) (executables (names multinomial_null multinomial_and_phylogeny) (libraries codepi scirep) (modules Multinomial_null) (modes native) (preprocess (pps bistro.ppx))) ... ... @@ -9,3 +8,8 @@ (target multinomial_null.ml) (deps multinomial_null.md) (action (with-stdout-to %{target} (run ocaml-mdx pp %{deps})))) (rule (target multinomial_and_phylogeny.ml) (deps multinomial_and_phylogeny.md) (action (with-stdout-to %{target} (run ocaml-mdx pp %{deps}))))
 # Multinomial and phylogeny The multinomial method for detecting convergent substitutions completely ignores phylogenetic inertia. A rather immediate consequence is that it doesn't assess significance properly: its pvalues are far from calibration. However in our tests it behaves rather well in average precision. This notes shows a family of tree topologies on which the multinomial method significantly underperforms in comparison to a phylogeny-aware method (tdg09). ```ocaml open Core_kernel;; open Phylogenetics;; open Codepitk;; module Top = Bistro_utils.Toplevel_eval.Make(struct let np = 4 let mem = 4 end)() ``` ```ocaml let tree_dfs_map t ~node ~leaf ~branch ~init = let rec node_traversal acc = function | Tree.Leaf l -> let acc, l' = leaf acc l in Tree.Leaf l', acc | Node n -> let acc, data = node acc n.data in let children, acc = List1.fold n.branches ~init:([], acc) ~f:(fun (branch_acc, acc) b -> let b', acc = branch_traversal acc b in b' :: branch_acc, acc ) in Tree.node data (List1.of_list_exn (List.rev children)), acc and branch_traversal acc (Tree.Branch b) = let acc, data = branch acc b.data in let tip, acc = node_traversal acc b.tip in Tree.branch data tip, acc in fst (node_traversal init t) let tree_renumber_leaves t = tree_dfs_map t ~init:0 ~node:(fun i d -> i, d) ~leaf:(fun i (_, d) -> (i + 1), (i, d)) ~branch:(fun i d -> i, d) ``` ```ocaml let bad_tree_for_multinomial ?(lambda = 1.) n = let branch length condition tip = Tree.branch { Convergence_tree.length ; condition } tip in let pitchfork length condition = Tree.node () (List1.init n ~f:(fun _ -> branch length condition (Tree.leaf (0, condition)))) in let indep_subtree = Codepitk.Tdg09.Pack.pair_tree ~branch_length1:lambda ~branch_length2:lambda ~npairs:n in let dep_subtree = let p = 0.99 *. lambda and q = 0.01 *. lambda in Tree.binary_node () (branch p `Ancestral (pitchfork q `Ancestral)) (branch p `Convergent (pitchfork q `Convergent)) in Tree.binary_node () (branch 2. `Ancestral dep_subtree) (branch 2. `Ancestral indep_subtree) |> tree_renumber_leaves ``` ```ocaml let string_of_tree t = Tree.to_printbox t ~leaf:(fun (i, cond) -> sprintf "%d (%c)" i (match cond with `Convergent -> 'C' | `Ancestral -> 'A')) |> PrintBox_text.to_string ;; string_of_tree (bad_tree_for_multinomial 4);; ``` ```ocaml let wag = Bistro_unix.wget "https://www.ebi.ac.uk/goldman-srv/WAG/wag.dat" |> Top.path |> Wag.parse let tdg09_site_simulation ?alpha:(alpha = 1.) ?(scale = 1.) ~convergent_site tree = let open Tdg09.Pack in let exchangeability_matrix = wag.rate_matrix in let stationary_distribution0 = simulate_profile alpha in let stationary_distribution1 = if convergent_site then simulate_profile alpha else stationary_distribution0 in Model3.simulate_site ~exchangeability_matrix ~stationary_distribution0 ~stationary_distribution1 ~scale tree let tdg09_simulation ?alpha tree ~n_h0 ~n_ha = Array.init (n_h0 + n_ha) ~f:(fun i -> tdg09_site_simulation tree ?alpha ~convergent_site:(i < n_h0) ) let tdg09_on_tdg09_simulation tree sim = let open Tdg09.Pack in let site = Tree.leaves sim |> Array.of_list in let _, _, lrt = Model3.lrt ~mode:`sparse wag tree site in lrt.pvalue let multinomial_on_tdg09_simulation site = let leaves1, leaves2 = Convergence_tree.leaves site |> List.partition_map ~f:(function | aa, `Ancestral -> Either.first aa | aa, `Convergent -> Either.second aa ) in let counts x = (Amino_acid.counts (Sequence.of_list x) :> int array) in let x1 = counts leaves1 in let x2 = counts leaves2 in let d = Multinomial_test.data ~x1 ~x2 in let r = Multinomial_test.( Permutation.test ~statistic:LRT.likelihood_log_ratio d ) in r.pvalue let tree_path = "data/besnard2009/besnard2009.nhx";; let load_tree fn = Convergence_tree.from_file fn |> Rresult.R.failwith_error_msg |> Tdg09.Pack.convergence_tree ``` ```ocaml let run ?alpha ~n_h0 ~n_ha () = let tree = bad_tree_for_multinomial ~lambda:1. 20 in let sites = tdg09_simulation ?alpha tree ~n_h0 ~n_ha in let run f = Prc.Dataset (List.init (Array.length sites) ~f:(fun i -> f sites.(i), i >= n_h0)) in let scores_tdg09 = run (tdg09_on_tdg09_simulation tree) in let scores_multinomial = run multinomial_on_tdg09_simulation in [ "multinomial", Prc.Precision_recall.auc_average_precision scores_multinomial ; "tdg09", Prc.Precision_recall.auc_average_precision scores_tdg09 ; ] let _ = run ~alpha:0.1 ~n_h0:100 ~n_ha:100 () ```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!