open Core_kernel open Phylogenetics module L = Lacaml.D module BI = Phylogenetics_convergence.Simulator.Branch_info type correlations = (string * string * float) list * String.Set.t let merge_correlations time_from_ancestor ((dist_l, l) : correlations) ((dist_r, r) : correlations) = let dist_lr = String.Set.fold l ~init:[] ~f:(fun acc e -> String.Set.fold r ~init:acc ~f:(fun acc f -> (e, f, time_from_ancestor) :: acc)) in (List.concat [ dist_l; dist_r; dist_lr ], String.Set.union l r) let correlations (t : Convergence_tree.u) : (string * string * float) list = let rec tree time_from_ancestor = function | Tree.Leaf l -> let l = Option.value_exn l.Newick.name in ([ (l, l, time_from_ancestor) ], String.Set.singleton l) | Node n -> List1.map n.branches ~f:(branch time_from_ancestor) |> List1.reduce ~f:(merge_correlations time_from_ancestor) and branch time_from_ancestor (Branch b) = tree (time_from_ancestor +. b.data.BI.length) b.tip in fst (tree 0. t) let design_matrix ~m ~n ~aa_at_site (al : Alignment.t) = L.Mat.init_rows m n (fun i j -> if Char.(al.sequences.(i).[j] = aa_at_site.(j)) then 1. else 0.) let solve ~y ~_X_ ~_T_ = let _Xtilde_ = L.Mat.mul _X_ _T_ in let ytilde = L.gemv _T_ y in L.gemm (L.getri (L.gemm ~transa:`T _Xtilde_ _Xtilde_)) (L.gemv ~trans:`T _Xtilde_ ytilde) let lrt_on_one_site ~alignment:al ~phenotypes:_ ~_C_ ~site = let m = Alignment.nrows al in let aa_at_site = Alignment.residues al ~column:site |> Char.Set.to_list |> Array.of_list in let n = Array.length aa_at_site in let _X_ = design_matrix ~m ~n ~aa_at_site al in () let phenotypes_of_tree t = Convergence_tree.leaves t |> List.map ~f:(fun (_, condition) -> match condition with `Ancestral -> 0. | `Convergent -> 1.) |> Array.of_list |> L.Vec.of_array let lrt ~alignment ~tree = let phenotypes = phenotypes_of_tree tree in Array.init (Alignment.ncols alignment) ~f:(fun site -> lrt_on_one_site ~alignment ~phenotypes ~site) |> assert false