Commit 6b3eecf2 authored by Philippe Veber's avatar Philippe Veber
Browse files

tk/Multinomial_test: added a degree of freedom correction heuristic for lrt

it is based on the assumption of Wilks theorem that the maximum
likelihood estimate shouldn't lie at the border of the parameter
space.
parent bc7ef385
......@@ -9,7 +9,7 @@ let multinomial_ocaml_implementation ~meth ~(tree_sc:_ file) ~(faa:aminoacid_fas
let module MT = Codepitk.Multinomial_test in
let meth = [%param meth] in
let test = match meth with
| `Asymptotic_LRT -> MT.LRT.asymptotic_test
| `Asymptotic_LRT -> MT.LRT.asymptotic_test ~df_correction:true
| `Simulation_LRT -> MT.LRT.simulation_test ~sample_size:10_000
| `Asymptotic_sparse -> MT.Sparse.asymptotic_test
| `Simulation_sparse -> MT.Sparse.simulation_test ~sample_size:10_000
......
......@@ -12,6 +12,11 @@ let data ~x1 ~x2 =
if k1 <> k2 then raise (Invalid_argument "Multinomial_test.data: the two arrays should have the same size") ;
{ k = k1 ; x1 ; x2 }
let data_nz d =
Array.counti d.x1 ~f:(fun i _ ->
d.x1.(i) <> 0 && d.x2.(i) <> 0
)
type result = {
_T_ : float ;
pvalue : float ;
......@@ -67,9 +72,11 @@ module LRT = struct
in
-2. *. (log_prod0 -. log_prod1)
let asymptotic_test d =
let asymptotic_test ?(df_correction = false) d =
let _T_ = likelihood_log_ratio d in
let pvalue = 1. -. Owl.Stats.chi2_cdf _T_ ~df:(float (d.k - 1)) in
let df = if df_correction then data_nz d else d.k - 1 in
let nu = float df in
let pvalue = Gsl.Cdf.chisq_Q ~x:_T_ ~nu in
{ _T_ ; pvalue }
let simulation_test = simulation_test ~likelihood_log_ratio
......@@ -124,10 +131,10 @@ let counts n p =
let h0_sample ~n1 ~n2 p =
counts n1 p, counts n2 p
let uniformity_test ~k ~n1 ~n2 test =
let uniformity_test ?(alpha = 10.) ~k ~n1 ~n2 test =
let p_values =
Array.init 1_000 ~f:(fun _ ->
let p = random_discrete_probability k ~alpha:10. in
Array.init 10_000 ~f:(fun _ ->
let p = random_discrete_probability k ~alpha in
let x1, x2 = h0_sample ~n1 ~n2 p in
(test { k ; x1 ; x2 }).pvalue
)
......
......@@ -14,7 +14,7 @@ type result = {
module LRT : sig
val likelihood_log_ratio : data -> float
val asymptotic_test : data -> result
val asymptotic_test : ?df_correction:bool -> data -> result
val simulation_test : ?sample_size:int -> data -> result
end
......@@ -25,6 +25,7 @@ module Sparse : sig
end
val uniformity_test :
?alpha:float ->
k:int ->
n1:int ->
n2:int ->
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment