Commit b412ed30 authored by Louis Duchemin's avatar Louis Duchemin
Browse files

First attempt at LMM with AA chemical properties

Error when solving matrix system due to colinearities
parent 19ffabff
......@@ -58,6 +58,8 @@ module type S = sig
val inhouse_lmm : query -> cpt file
val lmm_chem_props : query -> cpt file
val lmm_with_gemma_matrix : query ->
relatedness_mode:[ `Centered | `Standardized ] ->
cpt file
......@@ -202,6 +204,12 @@ module Make (Q : Query) = struct
let tree = tree ~branch_length_unit:`Amino_acid q in
Inhouse_lmm.test alignment tree
let lmm_chem_props q =
let alignment = amino_acid_alignment q in
let tree = tree ~branch_length_unit:`Amino_acid q in
Inhouse_lmm.test ~design_matrix:Codepitk.Inhouse_lmm.AA_chemical_properties alignment tree
let lmm_with_gemma_matrix q ~relatedness_mode =
let alignment = amino_acid_alignment q in
let tree = tree ~branch_length_unit:`Amino_acid q in
......
......@@ -58,6 +58,8 @@ module type S = sig
val inhouse_lmm : query -> cpt file
val lmm_chem_props : query -> cpt file
val lmm_with_gemma_matrix : query ->
relatedness_mode:[ `Centered | `Standardized ] ->
cpt file
......
......@@ -11,6 +11,7 @@ let correlation_method_dep = function
let test ?(design_matrix=LMM.AA_onehot) ?(correlation_method=Brownian) alignment tree =
let f = fun%workflow dest ->
let design_matrix = [%param design_matrix] in
let alignment =
Alignment.from_fasta [%path alignment]
|> Rresult.R.get_ok
......
......@@ -28,6 +28,15 @@ let composition_values = [|
0.00 0.33 0.00 0.00 0.39 1.42 0.71 0.13 0.20 0.00 *)
|]
let aa_props_indexes = [|volume_values;polarity_values;composition_values|]
(* let aa_normalized_indexes = Array.mapi aa_props_indexes ~f:(fun props ->
let min_val = Array.min_elt props ~compare:Float.compare
and max_val = Array.max_elt props ~compare:Float.compare in
match min_val, max_val with
| Some min_val, Some max_val -> Array.map props ~f:(fun p -> (p -. min_val) /. (max_val -. min_val) )
| _, _ -> failwith "Could not compute min and/or max value of chemical property array"
) *)
let cpt_tag = "HomeLMM"
let merge_correlations time_from_ancestor ((dist_l, l) : correlations)
......@@ -89,15 +98,30 @@ let unsafe_int_of_aa_char x =
|> Amino_acid.to_int
let aa_chemical_props_design_matrix ~m ~site (al: Alignment.t) =
let n = 4 in
let aa_props_indexes = [|volume_values;polarity_values;composition_values|] in
L.Mat.init_rows m n (fun i j ->
let i = i - 1 and j = j - 1 in
if j = 0 then 1.
else
let aa_index = unsafe_int_of_aa_char al.sequences.(i).[site] in
aa_props_indexes.(j-1).(aa_index)
)
let n = 1 + Array.length aa_props_indexes in
let columns =
Array.init n ~f:(fun j ->
L.Vec.init m (fun i ->
let i = i -1 in
if j = 0 then 1.
else
let aa_index = unsafe_int_of_aa_char al.sequences.(i).[site] in
aa_props_indexes.(j-1).(aa_index)
)
)
|> Array.filteri ~f:(fun j column ->
j = 0 ||
Float.(<>.) (L.Vec.min column) (L.Vec.max column)
)
in
L.Mat.of_col_vecs columns
(* L.Mat.init_rows m n (fun i j ->
let i = i - 1 and j = j - 1 in
if j = 0 then 1.
else
let aa_index = unsafe_int_of_aa_char al.sequences.(i).[site] in
aa_props_indexes.(j-1).(aa_index)
) *)
......@@ -137,11 +161,20 @@ let test_on_one_site ~design_matrix:dm ~alignment:al ~phenotypes:y ~transform_ma
else
let design_matrix = match dm with
| AA_onehot -> aa_onehot_design_matrix ~aa_at_site
(* | AA_chemical_properties -> aa_onehot_design_matrix ~aa_at_site *)
| AA_chemical_properties -> aa_chemical_props_design_matrix
in
let _X_r = design_matrix ~m ~site al in
let _X_r = design_matrix ~m ~site al
in
(* Out_channel.with_file "delme" ~f:(fun oc -> L.pp_mat (Format.formatter_of_out_channel oc) _X_r); *)
let _X_q = aa_onehot_design_matrix ~m ~aa_at_site:[||] ~site al in
let theta_r = solve ~y ~_X_:_X_r ~transform_matrix in
let theta_r = try
solve ~y ~_X_:_X_r ~transform_matrix
with _ -> (
Out_channel.with_file "delme" ~f:(fun oc -> L.pp_mat (Format.formatter_of_out_channel oc) (aa_onehot_design_matrix ~aa_at_site ~m ~site al));
failwith "kanar";
) in
let theta_q = solve ~y ~_X_:_X_q ~transform_matrix in
let _F_ = f_stat ~y ~_X_r ~theta_r ~_X_q ~theta_q in
let nu1 = float L.Mat.(dim2 _X_r - dim2 _X_q) in
......
......@@ -28,7 +28,7 @@ let _oneline_rodent = {
ne_s = 4., 4. ;
}
let rubisco = {
let _rubisco = {
label = "rubisco" ;
tree = Rubisco_dataset.(Path "data/rubisco" |> Query.tree ~branch_length_unit:`Amino_acid) ;
rooted = false ;
......@@ -89,13 +89,14 @@ module Under_mutsel = struct
{ f ; label ; requires_rooted_tree }
let methods = Pipeline.[
meth tdg09 "tdg09" ;
meth inhouse_tdg09 "inhouse_tdg09" ;
(* meth tdg09 "tdg09" ;
meth inhouse_tdg09 "inhouse_tdg09" ; *)
(* meth pcoc "pcoc" ; *)
(* meth pcoc_v2 ~col:3 "pcoc v2" ; *)
meth (gemma ~lmm_test:`Score ~relatedness_mode:`Standardized) "gemma" ;
meth inhouse_lmm "LMM" ;
meth (lmm_with_gemma_matrix ~relatedness_mode:`Centered) "LMM_Gemma";
meth (lmm_chem_props) "LMM chem props";
meth multinomial_asymptotic_lrt "multinomial" ;
meth topological "topological" ~requires_rooted_tree:true ;
]
......@@ -110,6 +111,11 @@ module Under_mutsel = struct
|> Pipeline.inhouse_lmm
|> Fn.flip hist_pval Codepitk.Inhouse_lmm.cpt_tag
let lmm_chem_props dataset =
query dataset
|> Pipeline.lmm_chem_props
|> Fn.flip hist_pval Codepitk.Inhouse_lmm.cpt_tag
let multinomial dataset =
query dataset
|> Pipeline.multinomial_simulation_lrt
......@@ -228,11 +234,12 @@ let target () =
let _multinomial_under_tdg09 = Under_tdg09.uniformity_test tree `Multinomial in
let tdg09_under_tdg09 = Under_tdg09.uniformity_test tree `Tdg09 in
let lmm_under_mutsel_besnard2009 = Under_mutsel.lmm besnard2009 in
let lmm_chem_props_mutsel_besnard2009 = Under_mutsel.lmm_chem_props besnard2009 in
let multinomial_under_mutsel_besnard2009 = Under_mutsel.multinomial besnard2009 in
let _tdg09_under_mutsel_besnard2009 = Under_mutsel.tdg09 besnard2009 in
let inhouse_tdg09_under_mutsel_besnard2009 = Under_mutsel.inhouse_tdg09 besnard2009 in
print_endline "Benchmark besnard";
let benchmark_besnard = Under_mutsel.(benchmark besnard2009 |> plot_benchmark) in
print_endline "Benchmark rubisco";
let benchmark_echolocation = Under_mutsel.(benchmark rubisco |> plot_benchmark) in
(* print_endline "Benchmark rubisco";
let benchmark_echolocation = Under_mutsel.(benchmark rubisco |> plot_benchmark) in *)
Report.pdflatex [%include_script "notes/calibration.tex"]
......@@ -31,6 +31,10 @@
\includegraphics[width=0.75\textwidth]{<<<Report.pdfdep lmm_under_mutsel_besnard2009>>>}
\end{frame}
\begin{frame}{LMM with chemical properties under Mutsel simulations}
\includegraphics[width=0.75\textwidth]{<<<Report.pdfdep lmm_chem_props_mutsel_besnard2009>>>}
\end{frame}
\begin{frame}{Tdg09@LBBE under Mutsel simulations}
\includegraphics[width=0.75\textwidth]{<<<Report.pdfdep inhouse_tdg09_under_mutsel_besnard2009>>>}
\end{frame}
......@@ -44,9 +48,9 @@
\includegraphics[width=0.75\textwidth]{<<<Report.pdfdep benchmark_besnard>>>}
\end{frame}
\begin{frame}{Benchmark Echolocation under Mutsel}
\includegraphics[width=0.75\textwidth]{<<<Report.pdfdep benchmark_echolocation>>>}
\end{frame}
% \begin{frame}{Benchmark Echolocation under Mutsel}
% \includegraphics[width=0.75\textwidth]{<<Report.pdfdep benchmark_echolocation>>}
% \end{frame}
\subsection{Mutsel model with gBGC}
% for each topology
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment