Commit 19ffabff authored by Louis Duchemin's avatar Louis Duchemin
Browse files

Inhouse LMM with AA properties as design matrix

parent 5ad509bf
......@@ -9,7 +9,7 @@ let correlation_method_dep = function
| Brownian -> [%workflow LMM.Brownian]
| Gemma matrix -> [%workflow LMM.Gemma [%path matrix]]
let test ?(correlation_method=Brownian) alignment tree =
let test ?(design_matrix=LMM.AA_onehot) ?(correlation_method=Brownian) alignment tree =
let f = fun%workflow dest ->
let alignment =
Alignment.from_fasta [%path alignment]
......@@ -20,7 +20,7 @@ let test ?(correlation_method=Brownian) alignment tree =
|> Result.get_ok
in
let cor_estimation = [%eval correlation_method_dep correlation_method] in
LMM.test ~cor_estimation ~alignment ~tree ()
LMM.test ~design_matrix ~cor_estimation ~alignment ~tree ()
|> LMM.result_table_of_test
|> Tk.Result_table.to_file ~output:dest
in
......
......@@ -4,6 +4,7 @@ open File_formats
type correlation_method = Brownian | Gemma of Gemma.relatedness_matrix file
val test :
?design_matrix:Codepitk.Inhouse_lmm.design_matrix ->
?correlation_method:correlation_method ->
aminoacid_fasta file ->
nhx file ->
......
......@@ -4,6 +4,30 @@ module L = Lacaml.D
type correlations = (string * string * float) list * String.Set.t
type design_matrix = AA_onehot | AA_chemical_properties
let volume_values = [|
31. ; 55. ; 54. ; 83. ; 132. ; 3. ; 96. ; 111. ; 119. ; 111. ; 105. ; 56. ; 32.5 ; 85. ; 124. ; 32. ; 61. ; 84. ; 170. ; 136. ;
(* A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
31. 124. 56. 54. 55. 85. 83. 3. 96. 111.
111. 119. 105. 132. 32.5 32. 61. 170. 136. 84.
*)
|]
let polarity_values = [|
8.1 ; 5.5 ; 13.0 ; 12.3 ; 5.2 ; 9.0 ; 10.4 ; 5.2 ; 11.3 ; 4.9 ; 5.7 ; 11.6 ; 5.5 ; 10.5 ; 10.5 ; 9.2 ; 8.6 ; 5.9 ; 5.4 ; 6.2 ;
(* A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
8.1 10.5 11.6 13.0 5.5 10.5 12.3 9.0 10.4 5.2
4.9 11.3 5.7 5.2 8.0 9.2 8.6 5.4 6.2 5.9 *)
|]
let composition_values = [|
0. ; 2.75 ; 1.38 ; 0.92 ; 0. ; 0.74 ; 0.58 ; 0. ; 0.33 ; 0. ; 0. ; 1.33 ; 0.39 ; 0.89 ; 0.65 ; 1.42 ; 0.71 ; 0. ; 0.13 ; 0.2 ;
(* A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
0.00 0.65 1.33 1.38 2.75 0.89 0.92 0.74 0.58 0.00
0.00 0.33 0.00 0.00 0.39 1.42 0.71 0.13 0.20 0.00 *)
|]
let cpt_tag = "HomeLMM"
let merge_correlations time_from_ancestor ((dist_l, l) : correlations)
......@@ -50,7 +74,7 @@ let matrix_of_correlations n cor_assoc =
mat
let design_matrix ~m ~aa_at_site ~site (al : Alignment.t) =
let aa_onehot_design_matrix ~m ~aa_at_site ~site (al : Alignment.t) =
let n = Int.max 1 (Array.length aa_at_site) in
L.Mat.init_rows m n (fun i j ->
let i = i - 1 and j = j - 1 in
......@@ -58,6 +82,25 @@ let design_matrix ~m ~aa_at_site ~site (al : Alignment.t) =
else if Char.(al.sequences.(i).[site] = aa_at_site.(j)) then 1.
else 0.)
let unsafe_int_of_aa_char x =
x
|> Amino_acid.of_char
|> Option.value_exn
|> Amino_acid.to_int
let aa_chemical_props_design_matrix ~m ~site (al: Alignment.t) =
let n = 4 in
let aa_props_indexes = [|volume_values;polarity_values;composition_values|] in
L.Mat.init_rows m n (fun i j ->
let i = i - 1 and j = j - 1 in
if j = 0 then 1.
else
let aa_index = unsafe_int_of_aa_char al.sequences.(i).[site] in
aa_props_indexes.(j-1).(aa_index)
)
let predict_y ~_X_ ~theta = L.gemv _X_ theta
let squares_sum ~y_r ~y_q = L.Vec.sub y_r y_q |> L.Vec.sqr_nrm2
......@@ -84,7 +127,7 @@ let solve ~y ~_X_ ~transform_matrix =
(in_place L.getri (L.gemm ~transa:`T _Xtilde_ _Xtilde_))
(L.gemv ~trans:`T _Xtilde_ ytilde)
let test_on_one_site ~alignment:al ~phenotypes:y ~transform_matrix ~site =
let test_on_one_site ~design_matrix:dm ~alignment:al ~phenotypes:y ~transform_matrix ~site =
let m = Alignment.nrows al in
assert (m = L.Vec.dim y) ;
let aa_at_site =
......@@ -92,8 +135,12 @@ let test_on_one_site ~alignment:al ~phenotypes:y ~transform_matrix ~site =
in
if Array.length aa_at_site <= 1 then None
else
let _X_r = design_matrix ~m ~aa_at_site ~site al in
let _X_q = design_matrix ~m ~aa_at_site:[||] ~site al in
let design_matrix = match dm with
| AA_onehot -> aa_onehot_design_matrix ~aa_at_site
| AA_chemical_properties -> aa_chemical_props_design_matrix
in
let _X_r = design_matrix ~m ~site al in
let _X_q = aa_onehot_design_matrix ~m ~aa_at_site:[||] ~site al in
let theta_r = solve ~y ~_X_:_X_r ~transform_matrix in
let theta_q = solve ~y ~_X_:_X_q ~transform_matrix in
let _F_ = f_stat ~y ~_X_r ~theta_r ~_X_q ~theta_q in
......@@ -126,18 +173,18 @@ let load_relatedness_matrix filename =
|> List.mapi ~f:(fun i row ->
String.split row ~on:'\t'
|> List.mapi ~f:(fun j value ->
(* small addition on the diagonal to ensure eigen values are strictly positive *)
let m_ij = float_of_string value +. if i = j then 0.000001 else 0. in
i, j, m_ij
)
) in
(* small addition on the diagonal to ensure eigen values are strictly positive *)
let m_ij = float_of_string value +. if i = j then 0.000001 else 0. in
i, j, m_ij
)
) in
transform_matrix (List.length rowwise_matrix) (List.concat rowwise_matrix)
type correlation_method = Brownian | Gemma of string
let test ?(cor_estimation=Brownian) ~alignment ~tree () =
let test ?(cor_estimation=Brownian) ~design_matrix ~alignment ~tree () =
let phenotypes = phenotypes_of_tree tree in
let transform_matrix = match cor_estimation with
| Brownian ->
......@@ -148,7 +195,7 @@ let test ?(cor_estimation=Brownian) ~alignment ~tree () =
load_relatedness_matrix matrix_file
in
Array.init (Alignment.ncols alignment) ~f:(fun site ->
test_on_one_site ~alignment ~phenotypes ~transform_matrix ~site)
test_on_one_site ~design_matrix ~alignment ~phenotypes ~transform_matrix ~site)
let result_table_of_test pvals =
Result_table.{
......
......@@ -6,12 +6,15 @@ open Phylogenetics
H0: b = 0
*)
type design_matrix = AA_onehot | AA_chemical_properties
type correlation_method = Brownian | Gemma of string
val test :
?cor_estimation:correlation_method ->
alignment:Alignment.t ->
tree:Convergence_tree.t ->
?cor_estimation : correlation_method ->
design_matrix : design_matrix ->
alignment : Alignment.t ->
tree : Convergence_tree.t ->
unit ->
float option array
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment