Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit dc36ba8c authored by Philippe Veber's avatar Philippe Veber
Browse files

tk/Inhouse_lmm: fixed cholevsky decomposition and design matrix calculation

potrf function in lapack updates the lower or upper part of its
argument but leaves the rest "as is".

there was a serious bug in the calculation of the design matrix
(confusion between indices of row and columns), but also in principle
it was incorrect, because it was not full rank. One hot encoding means
the related columns sum to 1, that is the sum is equal to the offset
column.

Finally when computing the degrees of freedom for the F-distribution
there were a few confusions.
parent 63ab4af5
...@@ -49,54 +49,56 @@ let matrix_of_correlations n cor_assoc = ...@@ -49,54 +49,56 @@ let matrix_of_correlations n cor_assoc =
) ; ) ;
mat mat
let design_matrix ~m ~n ~aa_at_site (al : Alignment.t) = let design_matrix ~m ~aa_at_site ~site (al : Alignment.t) =
let n = Int.max 1 (Array.length aa_at_site) in
L.Mat.init_rows m n (fun i j -> L.Mat.init_rows m n (fun i j ->
let i = i - 1 and j = j - 1 in let i = i - 1 and j = j - 1 in
if j = 0 then 1. if j = 0 then 1.
else if Char.(al.sequences.(i).[j - 1] = aa_at_site.(j - 1)) then 1. else if Char.(al.sequences.(i).[site] = aa_at_site.(j)) then 1.
else 0.) else 0.)
let predict_y ~_X_ ~theta = L.gemv _X_ theta let predict_y ~_X_ ~theta = L.gemv _X_ theta
let squares_sum ~y_r ~y_q = L.Vec.sub y_r y_q |> L.Vec.sqr_nrm2 let squares_sum ~y_r ~y_q = L.Vec.sub y_r y_q |> L.Vec.sqr_nrm2
let f_stat ~y ~_X_r ~theta_r ~_X_q ~theta_q ~rank_r ~rank_q ~n = let f_stat ~y ~_X_r ~theta_r ~_X_q ~theta_q =
let n = L.Mat.dim1 _X_r in
assert (n = L.Mat.dim1 _X_q) ;
let r = L.Mat.dim2 _X_r in
let q = L.Mat.dim2 _X_q in
let y_r = predict_y ~_X_:_X_r ~theta:theta_r in let y_r = predict_y ~_X_:_X_r ~theta:theta_r in
let y_q = predict_y ~_X_:_X_q ~theta:theta_q in let y_q = predict_y ~_X_:_X_q ~theta:theta_q in
let scm_rq = squares_sum ~y_r ~y_q in let scm_rq = squares_sum ~y_r ~y_q in
let scr_r = squares_sum ~y_r ~y_q:y in let scr_r = squares_sum ~y_r ~y_q:y in
scm_rq /. float (r - q) /. (scr_r /. float (n - r))
scm_rq /. (rank_r -. rank_q) /. (scr_r /. (n -. rank_r))
let in_place f x = let in_place f x =
f x ; f x ;
x x
let solve ~y ~_X_ ~_T_ = let solve ~y ~_X_ ~transform_matrix =
let _Xtilde_ = L.Mat.mul _X_ _T_ in let _Xtilde_ = L.gemm transform_matrix _X_ in
let ytilde = L.gemv _T_ y in let ytilde = L.gemv transform_matrix y in
L.gemv L.gemv
(in_place L.getri (L.gemm ~transa:`T _Xtilde_ _Xtilde_)) (in_place L.getri (L.gemm ~transa:`T _Xtilde_ _Xtilde_))
(L.gemv ~trans:`T _Xtilde_ ytilde) (L.gemv ~trans:`T _Xtilde_ ytilde)
let test_on_one_site ~alignment:al ~phenotypes:y ~_C_ ~site = let test_on_one_site ~alignment:al ~phenotypes:y ~transform_matrix ~site =
let m = Alignment.nrows al in let m = Alignment.nrows al in
assert (m = L.Vec.dim y) ;
let aa_at_site = let aa_at_site =
Alignment.residues al ~column:site |> Char.Set.to_list |> Array.of_list Alignment.residues al ~column:site |> Char.Set.to_list |> Array.of_list
in in
let n = Array.length aa_at_site in if Array.length aa_at_site <= 1 then None
let _X_r = design_matrix ~m ~n ~aa_at_site al in else
let _X_q = design_matrix ~m ~n:1 ~aa_at_site al in let _X_r = design_matrix ~m ~aa_at_site ~site al in
let theta_r = solve ~y ~_X_:_X_r ~_T_:_C_ in let _X_q = design_matrix ~m ~aa_at_site:[||] ~site al in
let theta_q = solve ~y ~_X_:_X_q ~_T_:_C_ in let theta_r = solve ~y ~_X_:_X_r ~transform_matrix in
(* assuming X has full rank since we are solving using LM equations *) let theta_q = solve ~y ~_X_:_X_q ~transform_matrix in
let rank_r = Float.of_int (L.Mat.dim2 _X_r) in let _F_ = f_stat ~y ~_X_r ~theta_r ~_X_q ~theta_q in
let rank_q = 1. in let nu1 = float L.Mat.(dim2 _X_r - dim2 _X_q) in
let n = Float.of_int (L.Vec.dim y) in let nu2 = float L.Mat.(dim1 _X_r - dim2 _X_r) in
let _F_ = f_stat ~y ~_X_r ~theta_r ~_X_q ~theta_q ~rank_r ~rank_q ~n in Some (1. -. Gsl.Cdf.fdist_P ~x:_F_ ~nu1 ~nu2)
let nu1 = rank_r -. rank_q in
let nu2 = n -. rank_r in
Some (1. -. Gsl.Cdf.fdist_P ~x:_F_ ~nu1 ~nu2)
let phenotypes_of_tree t = let phenotypes_of_tree t =
Convergence_tree.leaves t Convergence_tree.leaves t
...@@ -104,13 +106,23 @@ let phenotypes_of_tree t = ...@@ -104,13 +106,23 @@ let phenotypes_of_tree t =
match condition with `Ancestral -> 0. | `Convergent -> 1.) match condition with `Ancestral -> 0. | `Convergent -> 1.)
|> Array.of_list |> L.Vec.of_array |> Array.of_list |> L.Vec.of_array
let transform_matrix n cors =
let sigma () = matrix_of_correlations n cors in
let mat_C = in_place (L.potrf ~up:false) (sigma ()) in
for i = 1 to L.Mat.dim1 mat_C do
for j = i + 1 to L.Mat.dim2 mat_C do
mat_C.{i, j} <- 0.
done
done ;
in_place L.getri mat_C
let test ~alignment ~tree = let test ~alignment ~tree =
let phenotypes = phenotypes_of_tree tree in let phenotypes = phenotypes_of_tree tree in
let cors = correlations tree |> index_correlations tree in let cors = correlations tree |> index_correlations tree in
let n = L.Vec.dim phenotypes in let n = L.Vec.dim phenotypes in
let _C_ = in_place (L.potrf ~up:false) (matrix_of_correlations n cors) in let transform_matrix = transform_matrix n cors in
Array.init (Alignment.ncols alignment) ~f:(fun site -> Array.init (Alignment.ncols alignment) ~f:(fun site ->
test_on_one_site ~alignment ~phenotypes ~_C_ ~site) test_on_one_site ~alignment ~phenotypes ~transform_matrix ~site)
let result_table_of_test pvals = let result_table_of_test pvals =
Result_table.{ oracle = None; scores_per_meth = [ ("HomeLMM", pvals) ] } Result_table.{ oracle = None; scores_per_meth = [ ("HomeLMM", pvals) ] }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment