Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit 485292e8 authored by Louis Duchemin's avatar Louis Duchemin
Browse files

Homemade LMM implementation, has matrix inversion issues

parent dc4e0ad3
......@@ -28,8 +28,27 @@ let correlations (t : Convergence_tree.u) : (string * string * float) list
in
fst (tree 0. t)
let index_correlations (t : Convergence_tree.u) cors =
let index = String.Table.create () in
let leaves = Tree.leaves t in
List.iteri leaves ~f:(fun idx l ->
String.Table.set index
~key:(Option.value_exn l.Newick.name)
~data:idx) ;
List.map cors ~f:(fun (l1, l2, c) ->
let f = String.Table.find index in
match (f l1, f l2) with
| Some id1, Some id2 -> (id1, id2, c)
| _ -> failwithf "Leaf not found : %s" l1 ())
let matrix_of_correlations n cor_assoc =
let mat = L.Mat.create n n in
List.iter cor_assoc ~f:(fun (i, j, c) -> mat.{i + 1, j + 1} <- c) ;
mat
let design_matrix ~m ~n ~aa_at_site (al : Alignment.t) =
L.Mat.init_rows m n (fun i j ->
let i = i - 1 and j = j - 1 in
if j = 0 then 1.
else if Char.(al.sequences.(i).[j - 1] = aa_at_site.(j - 1)) then 1.
else 0.)
......@@ -38,32 +57,43 @@ let predict_y ~_X_ ~theta = L.gemv _X_ theta
let squares_sum ~y_r ~y_q = L.Vec.sub y_r y_q |> L.Vec.sqr_nrm2
let f_stat ~y ~_X_r ~theta_r ~_X_q ~theta_q =
let f_stat ~y ~_X_r ~theta_r ~_X_q ~theta_q ~rank_r ~rank_q ~n =
let y_r = predict_y ~_X_:_X_r ~theta:theta_r in
let y_q = predict_y ~_X_:_X_q ~theta:theta_q in
let scm_rq = squares_sum ~y_r ~y_q in
let scr_r = squares_sum ~y_r ~y_q:y in
(* assuming X has full rank since we are solving using LM equations *)
let rank_r = Float.of_int (L.Mat.dim2 _X_r) in
let rank_q = 1. in
let n = Float.of_int (L.Vec.dim y) in
scm_rq /. (rank_r -. rank_q) /. (scr_r /. (n -. rank_r))
let in_place f x =
f x ;
x
let solve ~y ~_X_ ~_T_ =
let _Xtilde_ = L.Mat.mul _X_ _T_ in
let ytilde = L.gemv _T_ y in
L.gemm
(L.getri (L.gemm ~transa:`T _Xtilde_ _Xtilde_))
L.gemv
(in_place L.getri (L.gemm ~transa:`T _Xtilde_ _Xtilde_))
(L.gemv ~trans:`T _Xtilde_ ytilde)
let lrt_on_one_site ~alignment:al ~phenotypes:_ ~_C_ ~site =
let test_on_one_site ~alignment:al ~phenotypes:y ~_C_ ~site =
let m = Alignment.nrows al in
let aa_at_site =
Alignment.residues al ~column:site |> Char.Set.to_list |> Array.of_list
in
let n = Array.length aa_at_site in
let _X_ = design_matrix ~m ~n ~aa_at_site al in
()
let _X_r = design_matrix ~m ~n ~aa_at_site al in
let _X_q = design_matrix ~m ~n:1 ~aa_at_site al in
let theta_r = solve ~y ~_X_:_X_r ~_T_:_C_ in
let theta_q = solve ~y ~_X_:_X_q ~_T_:_C_ in
(* assuming X has full rank since we are solving using LM equations *)
let rank_r = Float.of_int (L.Mat.dim2 _X_r) in
let rank_q = 1. in
let n = Float.of_int (L.Vec.dim y) in
let _F_ = f_stat ~y ~_X_r ~theta_r ~_X_q ~theta_q ~rank_r ~rank_q ~n in
let nu1 = rank_r -. rank_q in
let nu2 = n -. rank_r in
Some (1. -. Gsl.Cdf.fdist_P ~x:_F_ ~nu1 ~nu2)
let phenotypes_of_tree t =
Convergence_tree.leaves t
......@@ -71,8 +101,13 @@ let phenotypes_of_tree t =
match condition with `Ancestral -> 0. | `Convergent -> 1.)
|> Array.of_list |> L.Vec.of_array
let lrt ~alignment ~tree =
let test ~alignment ~tree =
let phenotypes = phenotypes_of_tree tree in
let cors = correlations tree |> index_correlations tree in
let n = L.Vec.dim phenotypes in
let _C_ = in_place (L.potrf ~up:false) (matrix_of_correlations n cors) in
Array.init (Alignment.ncols alignment) ~f:(fun site ->
lrt_on_one_site ~alignment ~phenotypes ~site)
|> assert false
test_on_one_site ~alignment ~phenotypes ~_C_ ~site)
let result_table_of_test pvals =
Result_table.{ oracle = None; scores_per_meth = [ ("HomeLMM", pvals) ] }
......@@ -6,5 +6,7 @@ open Phylogenetics
H0: b = 0
*)
val lrt :
alignment:Alignment.t -> tree:Convergence_tree.u -> Result_table.t
val test :
alignment:Alignment.t -> tree:Convergence_tree.u -> float option array
val result_table_of_test : float option array -> Result_table.t
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment