Commit dc36ba8c by Philippe Veber

### tk/Inhouse_lmm: fixed cholevsky decomposition and design matrix calculation

```potrf function in lapack updates the lower or upper part of its
argument but leaves the rest "as is".

there was a serious bug in the calculation of the design matrix
(confusion between indices of row and columns), but also in principle
it was incorrect, because it was not full rank. One hot encoding means
the related columns sum to 1, that is the sum is equal to the offset
column.

Finally when computing the degrees of freedom for the F-distribution
there were a few confusions.```
parent 63ab4af5
 ... @@ -49,54 +49,56 @@ let matrix_of_correlations n cor_assoc = ... @@ -49,54 +49,56 @@ let matrix_of_correlations n cor_assoc = ) ; ) ; mat mat let design_matrix ~m ~n ~aa_at_site (al : Alignment.t) = let design_matrix ~m ~aa_at_site ~site (al : Alignment.t) = let n = Int.max 1 (Array.length aa_at_site) in L.Mat.init_rows m n (fun i j -> L.Mat.init_rows m n (fun i j -> let i = i - 1 and j = j - 1 in let i = i - 1 and j = j - 1 in if j = 0 then 1. if j = 0 then 1. else if Char.(al.sequences.(i).[j - 1] = aa_at_site.(j - 1)) then 1. else if Char.(al.sequences.(i).[site] = aa_at_site.(j)) then 1. else 0.) else 0.) let predict_y ~_X_ ~theta = L.gemv _X_ theta let predict_y ~_X_ ~theta = L.gemv _X_ theta let squares_sum ~y_r ~y_q = L.Vec.sub y_r y_q |> L.Vec.sqr_nrm2 let squares_sum ~y_r ~y_q = L.Vec.sub y_r y_q |> L.Vec.sqr_nrm2 let f_stat ~y ~_X_r ~theta_r ~_X_q ~theta_q ~rank_r ~rank_q ~n = let f_stat ~y ~_X_r ~theta_r ~_X_q ~theta_q = let n = L.Mat.dim1 _X_r in assert (n = L.Mat.dim1 _X_q) ; let r = L.Mat.dim2 _X_r in let q = L.Mat.dim2 _X_q in let y_r = predict_y ~_X_:_X_r ~theta:theta_r in let y_r = predict_y ~_X_:_X_r ~theta:theta_r in let y_q = predict_y ~_X_:_X_q ~theta:theta_q in let y_q = predict_y ~_X_:_X_q ~theta:theta_q in let scm_rq = squares_sum ~y_r ~y_q in let scm_rq = squares_sum ~y_r ~y_q in let scr_r = squares_sum ~y_r ~y_q:y in let scr_r = squares_sum ~y_r ~y_q:y in scm_rq /. float (r - q) /. (scr_r /. float (n - r)) scm_rq /. (rank_r -. rank_q) /. (scr_r /. (n -. rank_r)) let in_place f x = let in_place f x = f x ; f x ; x x let solve ~y ~_X_ ~_T_ = let solve ~y ~_X_ ~transform_matrix = let _Xtilde_ = L.Mat.mul _X_ _T_ in let _Xtilde_ = L.gemm transform_matrix _X_ in let ytilde = L.gemv _T_ y in let ytilde = L.gemv transform_matrix y in L.gemv L.gemv (in_place L.getri (L.gemm ~transa:`T _Xtilde_ _Xtilde_)) (in_place L.getri (L.gemm ~transa:`T _Xtilde_ _Xtilde_)) (L.gemv ~trans:`T _Xtilde_ ytilde) (L.gemv ~trans:`T _Xtilde_ ytilde) let test_on_one_site ~alignment:al ~phenotypes:y ~_C_ ~site = let test_on_one_site ~alignment:al ~phenotypes:y ~transform_matrix ~site = let m = Alignment.nrows al in let m = Alignment.nrows al in assert (m = L.Vec.dim y) ; let aa_at_site = let aa_at_site = Alignment.residues al ~column:site |> Char.Set.to_list |> Array.of_list Alignment.residues al ~column:site |> Char.Set.to_list |> Array.of_list in in let n = Array.length aa_at_site in if Array.length aa_at_site <= 1 then None let _X_r = design_matrix ~m ~n ~aa_at_site al in else let _X_q = design_matrix ~m ~n:1 ~aa_at_site al in let _X_r = design_matrix ~m ~aa_at_site ~site al in let theta_r = solve ~y ~_X_:_X_r ~_T_:_C_ in let _X_q = design_matrix ~m ~aa_at_site:[||] ~site al in let theta_q = solve ~y ~_X_:_X_q ~_T_:_C_ in let theta_r = solve ~y ~_X_:_X_r ~transform_matrix in (* assuming X has full rank since we are solving using LM equations *) let theta_q = solve ~y ~_X_:_X_q ~transform_matrix in let rank_r = Float.of_int (L.Mat.dim2 _X_r) in let _F_ = f_stat ~y ~_X_r ~theta_r ~_X_q ~theta_q in let rank_q = 1. in let nu1 = float L.Mat.(dim2 _X_r - dim2 _X_q) in let n = Float.of_int (L.Vec.dim y) in let nu2 = float L.Mat.(dim1 _X_r - dim2 _X_r) in let _F_ = f_stat ~y ~_X_r ~theta_r ~_X_q ~theta_q ~rank_r ~rank_q ~n in Some (1. -. Gsl.Cdf.fdist_P ~x:_F_ ~nu1 ~nu2) let nu1 = rank_r -. rank_q in let nu2 = n -. rank_r in Some (1. -. Gsl.Cdf.fdist_P ~x:_F_ ~nu1 ~nu2) let phenotypes_of_tree t = let phenotypes_of_tree t = Convergence_tree.leaves t Convergence_tree.leaves t ... @@ -104,13 +106,23 @@ let phenotypes_of_tree t = ... @@ -104,13 +106,23 @@ let phenotypes_of_tree t = match condition with `Ancestral -> 0. | `Convergent -> 1.) match condition with `Ancestral -> 0. | `Convergent -> 1.) |> Array.of_list |> L.Vec.of_array |> Array.of_list |> L.Vec.of_array let transform_matrix n cors = let sigma () = matrix_of_correlations n cors in let mat_C = in_place (L.potrf ~up:false) (sigma ()) in for i = 1 to L.Mat.dim1 mat_C do for j = i + 1 to L.Mat.dim2 mat_C do mat_C.{i, j} <- 0. done done ; in_place L.getri mat_C let test ~alignment ~tree = let test ~alignment ~tree = let phenotypes = phenotypes_of_tree tree in let phenotypes = phenotypes_of_tree tree in let cors = correlations tree |> index_correlations tree in let cors = correlations tree |> index_correlations tree in let n = L.Vec.dim phenotypes in let n = L.Vec.dim phenotypes in let _C_ = in_place (L.potrf ~up:false) (matrix_of_correlations n cors) in let transform_matrix = transform_matrix n cors in Array.init (Alignment.ncols alignment) ~f:(fun site -> Array.init (Alignment.ncols alignment) ~f:(fun site -> test_on_one_site ~alignment ~phenotypes ~_C_ ~site) test_on_one_site ~alignment ~phenotypes ~transform_matrix ~site) let result_table_of_test pvals = let result_table_of_test pvals = Result_table.{ oracle = None; scores_per_meth = [ ("HomeLMM", pvals) ] } Result_table.{ oracle = None; scores_per_meth = [ ("HomeLMM", pvals) ] }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!