Commit dc36ba8c authored by Philippe Veber's avatar Philippe Veber
Browse files

tk/Inhouse_lmm: fixed cholevsky decomposition and design matrix calculation

potrf function in lapack updates the lower or upper part of its
argument but leaves the rest "as is".

there was a serious bug in the calculation of the design matrix
(confusion between indices of row and columns), but also in principle
it was incorrect, because it was not full rank. One hot encoding means
the related columns sum to 1, that is the sum is equal to the offset

Finally when computing the degrees of freedom for the F-distribution
there were a few confusions.
parent 63ab4af5
......@@ -49,54 +49,56 @@ let matrix_of_correlations n cor_assoc =
) ;
let design_matrix ~m ~n ~aa_at_site (al : Alignment.t) =
let design_matrix ~m ~aa_at_site ~site (al : Alignment.t) =
let n = Int.max 1 (Array.length aa_at_site) in
L.Mat.init_rows m n (fun i j ->
let i = i - 1 and j = j - 1 in
if j = 0 then 1.
else if Char.(al.sequences.(i).[j - 1] = aa_at_site.(j - 1)) then 1.
else if Char.(al.sequences.(i).[site] = aa_at_site.(j)) then 1.
else 0.)
let predict_y ~_X_ ~theta = L.gemv _X_ theta
let squares_sum ~y_r ~y_q = L.Vec.sub y_r y_q |> L.Vec.sqr_nrm2
let f_stat ~y ~_X_r ~theta_r ~_X_q ~theta_q ~rank_r ~rank_q ~n =
let f_stat ~y ~_X_r ~theta_r ~_X_q ~theta_q =
let n = L.Mat.dim1 _X_r in
assert (n = L.Mat.dim1 _X_q) ;
let r = L.Mat.dim2 _X_r in
let q = L.Mat.dim2 _X_q in
let y_r = predict_y ~_X_:_X_r ~theta:theta_r in
let y_q = predict_y ~_X_:_X_q ~theta:theta_q in
let scm_rq = squares_sum ~y_r ~y_q in
let scr_r = squares_sum ~y_r ~y_q:y in
scm_rq /. (rank_r -. rank_q) /. (scr_r /. (n -. rank_r))
scm_rq /. float (r - q) /. (scr_r /. float (n - r))
let in_place f x =
f x ;
let solve ~y ~_X_ ~_T_ =
let _Xtilde_ = L.Mat.mul _X_ _T_ in
let ytilde = L.gemv _T_ y in
let solve ~y ~_X_ ~transform_matrix =
let _Xtilde_ = L.gemm transform_matrix _X_ in
let ytilde = L.gemv transform_matrix y in
(in_place L.getri (L.gemm ~transa:`T _Xtilde_ _Xtilde_))
(L.gemv ~trans:`T _Xtilde_ ytilde)
let test_on_one_site ~alignment:al ~phenotypes:y ~_C_ ~site =
let test_on_one_site ~alignment:al ~phenotypes:y ~transform_matrix ~site =
let m = Alignment.nrows al in
assert (m = L.Vec.dim y) ;
let aa_at_site =
Alignment.residues al ~column:site |> Char.Set.to_list |> Array.of_list
let n = Array.length aa_at_site in
let _X_r = design_matrix ~m ~n ~aa_at_site al in
let _X_q = design_matrix ~m ~n:1 ~aa_at_site al in
let theta_r = solve ~y ~_X_:_X_r ~_T_:_C_ in
let theta_q = solve ~y ~_X_:_X_q ~_T_:_C_ in
(* assuming X has full rank since we are solving using LM equations *)
let rank_r = Float.of_int (L.Mat.dim2 _X_r) in
let rank_q = 1. in
let n = Float.of_int (L.Vec.dim y) in
let _F_ = f_stat ~y ~_X_r ~theta_r ~_X_q ~theta_q ~rank_r ~rank_q ~n in
let nu1 = rank_r -. rank_q in
let nu2 = n -. rank_r in
Some (1. -. Gsl.Cdf.fdist_P ~x:_F_ ~nu1 ~nu2)
if Array.length aa_at_site <= 1 then None
let _X_r = design_matrix ~m ~aa_at_site ~site al in
let _X_q = design_matrix ~m ~aa_at_site:[||] ~site al in
let theta_r = solve ~y ~_X_:_X_r ~transform_matrix in
let theta_q = solve ~y ~_X_:_X_q ~transform_matrix in
let _F_ = f_stat ~y ~_X_r ~theta_r ~_X_q ~theta_q in
let nu1 = float L.Mat.(dim2 _X_r - dim2 _X_q) in
let nu2 = float L.Mat.(dim1 _X_r - dim2 _X_r) in
Some (1. -. Gsl.Cdf.fdist_P ~x:_F_ ~nu1 ~nu2)
let phenotypes_of_tree t =
Convergence_tree.leaves t
......@@ -104,13 +106,23 @@ let phenotypes_of_tree t =
match condition with `Ancestral -> 0. | `Convergent -> 1.)
|> Array.of_list |> L.Vec.of_array
let transform_matrix n cors =
let sigma () = matrix_of_correlations n cors in
let mat_C = in_place (L.potrf ~up:false) (sigma ()) in
for i = 1 to L.Mat.dim1 mat_C do
for j = i + 1 to L.Mat.dim2 mat_C do
mat_C.{i, j} <- 0.
done ;
in_place L.getri mat_C
let test ~alignment ~tree =
let phenotypes = phenotypes_of_tree tree in
let cors = correlations tree |> index_correlations tree in
let n = L.Vec.dim phenotypes in
let _C_ = in_place (L.potrf ~up:false) (matrix_of_correlations n cors) in
let transform_matrix = transform_matrix n cors in
Array.init (Alignment.ncols alignment) ~f:(fun site ->
test_on_one_site ~alignment ~phenotypes ~_C_ ~site)
test_on_one_site ~alignment ~phenotypes ~transform_matrix ~site)
let result_table_of_test pvals =
Result_table.{ oracle = None; scores_per_meth = [ ("HomeLMM", pvals) ] }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment