Commit 8f09c49e authored by Louis Duchemin's avatar Louis Duchemin
Browse files

LMM : benchmark for chemical props + API refactoring

parent e33062cb
......@@ -65,8 +65,10 @@ let methods = Pipeline.[
(* meth pcoc "pcoc" ; *)
(* meth pcoc_v2 ~col:3 "pcoc v2" ; *)
meth (gemma ~lmm_test:`Score ~relatedness_mode:`Standardized) "gemma" ;
meth inhouse_lmm "LMM" ;
meth (lmm_with_gemma_matrix ~relatedness_mode:`Centered) "LMM_Gemma";
meth (lmm ~design_matrix:`AA_onehot ~correlation_method:`Brownian) "LMM" ;
meth (lmm ~design_matrix:`AA_onehot ~correlation_method:(`Gemma `Centered)) "LMM_Gemma";
meth (lmm ~design_matrix:`AA_chemical_properties ~correlation_method:`Brownian) "LMM_chem_props" ;
meth (lmm ~design_matrix:`AA_chemical_properties ~correlation_method:(`Gemma `Centered)) "LMM_chem_props_Gemma";
meth multinomial_asymptotic_lrt "multinomial" ;
meth topological "topological" ~requires_rooted_tree:true ;
]
......
open Bistro
open File_formats
open Core_kernel
module type Query = sig
type t
......@@ -56,11 +57,16 @@ module type S = sig
relatedness_mode:[ `Centered | `Standardized ] ->
cpt file
val inhouse_lmm : query -> cpt file
val lmm_with_gemma_matrix : query ->
relatedness_mode:[ `Centered | `Standardized ] ->
cpt file
val lmm :
?design_matrix:[
| `AA_onehot
| `AA_chemical_properties
] ->
?correlation_method:[
| `Brownian
| `Gemma of [`Centered | `Standardized]
] ->
query -> cpt file
val diffsel : query -> cpt file
......@@ -197,23 +203,30 @@ module Make (Q : Query) = struct
~relatedness_matrix
|> Gemma.result_table_of_output alignment
let inhouse_lmm q =
let alignment = amino_acid_alignment q in
let tree = tree ~branch_length_unit:`Amino_acid q in
Inhouse_lmm.test alignment tree
let lmm_with_gemma_matrix q ~relatedness_mode =
let lmm ?design_matrix ?correlation_method q =
let alignment = amino_acid_alignment q in
let tree = tree ~branch_length_unit:`Amino_acid q in
let genotype = Gemma.genotype_of_fasta alignment in
let phenotype = Gemma.phenotype_of_tree tree in
let relatedness_matrix =
Gemma.calculate_relatedness_matrix ~mode:relatedness_mode ~genotype
~phenotype
let design_matrix =
Option.map design_matrix ~f:(function
| `AA_onehot -> Codepitk.Inhouse_lmm.AA_onehot
| `AA_chemical_properties-> Codepitk.Inhouse_lmm.AA_chemical_properties
)
in
Inhouse_lmm.test ~correlation_method:(Inhouse_lmm.Gemma relatedness_matrix)
alignment tree
let correlation_method =
Option.map correlation_method ~f:(function
| `Gemma relatedness_mode ->
let genotype = Gemma.genotype_of_fasta alignment in
let phenotype = Gemma.phenotype_of_tree tree in
let relatedness_matrix =
Gemma.calculate_relatedness_matrix ~mode:relatedness_mode ~genotype
~phenotype
in
Inhouse_lmm.Gemma relatedness_matrix
| `Brownian -> Inhouse_lmm.Brownian
)
in
Inhouse_lmm.test ?design_matrix ?correlation_method alignment tree
let diffseltree d =
Tree_dataset.prepare_diffsel_tree
......@@ -261,7 +274,7 @@ module Make (Q : Query) = struct
let convergent_species = [%param convergent_species] in
let site_pos = [%param site_pos] in
let condition s =
if List.mem s convergent_species then `Convergent else `Ancestral
if List.mem convergent_species s ~equal:String.equal then `Convergent else `Ancestral
in
let tree = Phylogenetics.Newick.from_file_exn tree_path in
let alignment =
......
......@@ -56,11 +56,16 @@ module type S = sig
relatedness_mode:[ `Centered | `Standardized ] ->
cpt file
val inhouse_lmm : query -> cpt file
val lmm_with_gemma_matrix : query ->
relatedness_mode:[ `Centered | `Standardized ] ->
cpt file
val lmm :
?design_matrix:[
| `AA_onehot
| `AA_chemical_properties
] ->
?correlation_method:[
| `Brownian
| `Gemma of [`Centered | `Standardized]
] ->
query -> cpt file
val diffsel : query -> cpt file
......
......@@ -21,7 +21,7 @@ let test ?(design_matrix=LMM.AA_onehot) ?(correlation_method=Brownian) alignment
in
let cor_estimation = [%eval correlation_method_dep correlation_method] in
LMM.test ~design_matrix ~cor_estimation ~alignment ~tree ()
|> LMM.result_table_of_test
|> LMM.result_table_of_test ~design_matrix ~cor_estimation
|> Tk.Result_table.to_file ~output:dest
in
Workflow.path_plugin ~descr:"inhouse_lmm.test" f
......@@ -6,7 +6,9 @@ type correlations = (string * string * float) list * String.Set.t
type design_matrix = AA_onehot | AA_chemical_properties
let volume_values = [|
type correlation_method = Brownian | Gemma of string
let _volume_values = [|
31. ; 55. ; 54. ; 83. ; 132. ; 3. ; 96. ; 111. ; 119. ; 111. ; 105. ; 56. ; 32.5 ; 85. ; 124. ; 32. ; 61. ; 84. ; 170. ; 136. ;
(* A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
31. 124. 56. 54. 55. 85. 83. 3. 96. 111.
......@@ -14,21 +16,39 @@ let volume_values = [|
*)
|]
let polarity_values = [|
let _polarity_values = [|
8.1 ; 5.5 ; 13.0 ; 12.3 ; 5.2 ; 9.0 ; 10.4 ; 5.2 ; 11.3 ; 4.9 ; 5.7 ; 11.6 ; 5.5 ; 10.5 ; 10.5 ; 9.2 ; 8.6 ; 5.9 ; 5.4 ; 6.2 ;
(* A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
8.1 10.5 11.6 13.0 5.5 10.5 12.3 9.0 10.4 5.2
4.9 11.3 5.7 5.2 8.0 9.2 8.6 5.4 6.2 5.9 *)
|]
let composition_values = [|
let _composition_values = [|
0. ; 2.75 ; 1.38 ; 0.92 ; 0. ; 0.74 ; 0.58 ; 0. ; 0.33 ; 0. ; 0. ; 1.33 ; 0.39 ; 0.89 ; 0.65 ; 1.42 ; 0.71 ; 0. ; 0.13 ; 0.2 ;
(* A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
0.00 0.65 1.33 1.38 2.75 0.89 0.92 0.74 0.58 0.00
0.00 0.33 0.00 0.00 0.39 1.42 0.71 0.13 0.20 0.00 *)
|]
let cpt_tag = "HomeLMM"
let pca_values = [|
0.07262027 ; 0.62598471 ; -1.35577941 ; -0.89992025 ; 1.39089352 ; -1.10991937 ;
-0.05146399 ; 1.52142208 ; 1.31912706 ; -0.87561066 ; 1.16656546 ; -1.11220456 ;
-1.21297673 ; -0.59246949 ; -0.63862259 ; -0.88654234 ; -0.33885002 ; 1.19782043 ;
1.21900822 ; 0.56091764 ;
(* Ala Arg Asn Asp Cys Gln Glu Gly His
0.07262027 -0.63862259 -1.11220456 -1.35577941 0.62598471 -0.59246949 -0.89992025 -1.10991937 -0.05146399
Ile Leu Lys Met Phe Pro Ser Thr Trp
1.52142208 1.31912706 -0.87561066 1.16656546 1.39089352 -1.21297673 -0.88654234 -0.33885002 1.21900822
Tyr Val
0.56091764 1.19782043 *)
|]
let cpt_tag ~design_matrix ~cor_estimation =
match design_matrix, cor_estimation with
| AA_onehot, Brownian -> "LMM one-hot"
| AA_onehot, Gemma _ -> "LMM one-hot Gemma"
| AA_chemical_properties, Brownian -> "LMM AA-props"
| AA_chemical_properties, Gemma _ -> "LMM AA-props Gemma"
let merge_correlations time_from_ancestor ((dist_l, l) : correlations)
((dist_r, r) : correlations) =
......@@ -84,17 +104,17 @@ let aa_onehot_design_matrix ~m ~aa_at_site ~site (al : Alignment.t) =
let unsafe_int_of_aa_char x =
x
|> Amino_acid.of_char
|> Amino_acid.of_char
|> Option.value_exn
|> Amino_acid.to_int
let aa_chemical_props_design_matrix ~m ~site (al: Alignment.t) =
let n = 4 in
let aa_props_indexes = [|volume_values;polarity_values;composition_values|] in
L.Mat.init_rows m n (fun i j ->
let aa_props_indexes = [|pca_values|] in
let n = Array.length aa_props_indexes + 1 in
L.Mat.init_rows m n (fun i j ->
let i = i - 1 and j = j - 1 in
if j = 0 then 1.
else
else
let aa_index = unsafe_int_of_aa_char al.sequences.(i).[site] in
aa_props_indexes.(j-1).(aa_index)
)
......@@ -154,7 +174,7 @@ let phenotypes_of_tree t =
match condition with `Ancestral -> 0. | `Convergent -> 1.)
|> Array.of_list |> L.Vec.of_array
(* [transform_matrix n cors] computes the cholevski decomposition of a
(* [transform_matrix n cors] computes the cholevski decomposition of a
correlation matrix, provided in sparse representation, [n] being
the dimension of the (square) matrix and [cors] the list of non-zero coefficients *)
let transform_matrix n cors =
......@@ -167,27 +187,27 @@ let transform_matrix n cors =
done ;
in_place L.getri mat_C
let load_relatedness_matrix filename =
let rowwise_matrix =
let load_relatedness_matrix filename =
let rowwise_matrix =
In_channel.read_lines filename
|> List.mapi ~f:(fun i row ->
|> List.mapi ~f:(fun i row ->
String.split row ~on:'\t'
|> List.mapi ~f:(fun j value ->
|> List.mapi ~f:(fun j value ->
(* small addition on the diagonal to ensure eigen values are strictly positive *)
let m_ij = float_of_string value +. if i = j then 0.000001 else 0. in
i, j, m_ij
)
) in
) in
transform_matrix (List.length rowwise_matrix) (List.concat rowwise_matrix)
type correlation_method = Brownian | Gemma of string
let test ?(cor_estimation=Brownian) ~design_matrix ~alignment ~tree () =
let phenotypes = phenotypes_of_tree tree in
let transform_matrix = match cor_estimation with
| Brownian ->
| Brownian ->
let cors = correlations tree |> index_correlations tree in
let n = L.Vec.dim phenotypes in
transform_matrix n cors
......@@ -197,8 +217,8 @@ let test ?(cor_estimation=Brownian) ~design_matrix ~alignment ~tree () =
Array.init (Alignment.ncols alignment) ~f:(fun site ->
test_on_one_site ~design_matrix ~alignment ~phenotypes ~transform_matrix ~site)
let result_table_of_test pvals =
Result_table.{
oracle = None;
scores_per_meth = [ (cpt_tag, Array.map pvals ~f:(Option.map ~f:(( -. ) 1.))) ]
let result_table_of_test pvals ~design_matrix ~cor_estimation =
Result_table.{
oracle = None;
scores_per_meth = [ (cpt_tag ~design_matrix ~cor_estimation, Array.map pvals ~f:(Option.map ~f:(( -. ) 1.))) ]
}
......@@ -10,14 +10,21 @@ type design_matrix = AA_onehot | AA_chemical_properties
type correlation_method = Brownian | Gemma of string
val cpt_tag :
design_matrix:design_matrix ->
cor_estimation:correlation_method ->
string
val test :
?cor_estimation : correlation_method ->
?cor_estimation : correlation_method ->
design_matrix : design_matrix ->
alignment : Alignment.t ->
tree : Convergence_tree.t ->
alignment : Alignment.t ->
tree : Convergence_tree.t ->
unit ->
float option array
val result_table_of_test : float option array -> Result_table.t
val cpt_tag : string
\ No newline at end of file
val result_table_of_test :
float option array ->
design_matrix:design_matrix ->
cor_estimation:correlation_method ->
Result_table.t
......@@ -94,8 +94,8 @@ module Under_mutsel = struct
(* meth pcoc "pcoc" ; *)
(* meth pcoc_v2 ~col:3 "pcoc v2" ; *)
meth (gemma ~lmm_test:`Score ~relatedness_mode:`Standardized) "gemma" ;
meth inhouse_lmm "LMM" ;
meth (lmm_with_gemma_matrix ~relatedness_mode:`Centered) "LMM_Gemma";
meth (lmm ~design_matrix:`AA_onehot ~correlation_method:`Brownian) "LMM" ;
meth (lmm ~design_matrix:`AA_onehot ~correlation_method:(`Gemma `Centered)) "LMM_Gemma";
meth multinomial_asymptotic_lrt "multinomial" ;
meth topological "topological" ~requires_rooted_tree:true ;
]
......@@ -106,9 +106,10 @@ module Under_mutsel = struct
~n_h0:1000 ~n_ha:0 ()
let lmm dataset =
let module LMM = Codepitk.Inhouse_lmm in
query dataset
|> Pipeline.inhouse_lmm
|> Fn.flip hist_pval Codepitk.Inhouse_lmm.cpt_tag
|> Pipeline.lmm ~design_matrix:`AA_onehot ~correlation_method:`Brownian
|> Fn.flip hist_pval (LMM.cpt_tag ~design_matrix:LMM.AA_onehot ~cor_estimation:LMM.Brownian)
let multinomial dataset =
query dataset
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment