Commit 745165b2 authored by Louis Duchemin's avatar Louis Duchemin
Browse files

Code to generate methods calibration report

parent 186a5191
......@@ -2,6 +2,8 @@ open Rresult
open Core_kernel
open Let_syntax.Result
(* FIXME: rewrite based on Dataframe? *)
type t = {
nrows : int ;
cols : (string * float option array) list ;
......@@ -30,6 +32,8 @@ let of_file fn =
in
{ nrows ; cols }
let of_file_exn fn = of_file fn |> Rresult.R.failwith_error_msg
let columns x = x.cols
let make = function
......@@ -58,3 +62,9 @@ let to_file table ~output =
write_row oc ("Sites" :: List.map table.cols ~f:fst) ;
write_columns oc table
)
let get_col cpt colname =
List.Assoc.find ~equal:String.equal cpt.cols colname
let get_col_exn cpt colname =
get_col cpt colname |> Option.value_exn
......@@ -3,6 +3,7 @@
type t
val of_file : string -> (t, [> `Msg of string]) result
val of_file_exn : string -> t
val to_file : t -> output:string -> unit
val make :
......@@ -10,3 +11,5 @@ val make :
(t, [> `Msg of string]) result
val columns : t -> (string * float option array) list
val get_col : t -> string -> float option array option
val get_col_exn : t -> string -> float option array
\ No newline at end of file
......@@ -4,6 +4,8 @@ module L = Lacaml.D
type correlations = (string * string * float) list * String.Set.t
let cpt_tag = "HomeLMM"
let merge_correlations time_from_ancestor ((dist_l, l) : correlations)
((dist_r, r) : correlations) =
let dist_lr =
......@@ -151,5 +153,5 @@ let test ?(cor_estimation=Brownian) ~alignment ~tree () =
let result_table_of_test pvals =
Result_table.{
oracle = None;
scores_per_meth = [ ("HomeLMM", Array.map pvals ~f:(Option.map ~f:(( -. ) 1.))) ]
scores_per_meth = [ (cpt_tag, Array.map pvals ~f:(Option.map ~f:(( -. ) 1.))) ]
}
......@@ -16,3 +16,5 @@ val test :
float option array
val result_table_of_test : float option array -> Result_table.t
val cpt_tag : string
\ No newline at end of file
open Core_kernel
open Bistro
open Codepi
open Codepitk
open Codepi.File_formats
type dataset = {
label : string ;
tree : nhx file ;
rooted : bool ;
branch_scale : float ;
ne_s : float * float ;
}
let besnard2009 = {
label = "besnard2009" ;
tree = Bistro.Workflow.input "data/besnard2009/besnard2009.nhx" ;
rooted = true ;
branch_scale = 1. ;
ne_s = 4., 4. ;
}
let _oneline_rodent = {
label = "online_rodent" ;
tree = Bistro.Workflow.input "data/online_rodent/online_rodent.nhx" ;
rooted = true ;
branch_scale = 1. ;
ne_s = 4., 4. ;
}
let _rubisco = {
label = "rubisco" ;
tree = Rubisco_dataset.(Path "data/rubisco" |> Query.tree ~branch_length_unit:`Amino_acid) ;
rooted = false ;
branch_scale = 1. ;
ne_s = 4., 4. ;
}
let _orthomam_echolocation () = {
label = "orthomam_echolocation" ;
tree = (
Orthomam.tree_of_db
~branch_length_unit:`Amino_acid
~convergent_species:Orthomam.species_with_echolocation
(Codepitk.Orthomam_db.make "_runs/omm")
) ;
rooted = false ;
branch_scale = 1. ;
ne_s = 4., 4. ;
}
let hist_pval (convergence_table : cpt file) : pdf file =
let f = fun%workflow dest ->
let cpt = Cpt.of_file_exn [%path convergence_table] in
let pvals =
Cpt.get_col_exn cpt Codepitk.Inhouse_lmm.cpt_tag
|> Array.filter_opt
in
OCamlR_grDevices.pdf dest;
OCamlR_graphics.hist ~breaks:(`n 60) pvals
|> fun _ -> OCamlR_grDevices.dev_off ()
in
Bistro.Workflow.path_plugin ~descr:"Calibration.hist_pval" f
module Under_mutsel = struct
module Pipeline = Codepi.Simulation_pipeline.Mutsel
let query ?(seed = 42) { tree = t ; ne_s ; branch_scale ; _ } =
Pipeline.query ~seed ~tree:(NHX t) ~branch_scale ~ne_s
~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
~n_h0:1000 ~n_ha:0 ()
let lmm d =
Pipeline.inhouse_lmm (query d)
|> hist_pval
end
module Under_tdg09 = struct
let site_simulation ?(alpha = 1.) ?(scale = 1.) (wag : Phylogenetics.Wag.t) tree =
let open Codepitk.Tdg09.Pack in
let exchangeability_matrix = wag.rate_matrix in
let stationary_distribution = simulate_profile alpha in
let param = scale in
Model1.simulate_site ~exchangeability_matrix ~stationary_distribution ~param tree
let simulation ?alpha wag tree nsites =
Array.init nsites ~f:(fun _ ->
site_simulation wag ?alpha tree
)
let load_tree fn =
Convergence_tree.from_file fn
|> Rresult.R.failwith_error_msg
|> Codepitk.Tdg09.Pack.convergence_tree
let run_multinomial site =
let leaves1, leaves2 =
Convergence_tree.leaves site
|> List.partition_map ~f:(function
| aa, `Ancestral -> Either.first aa
| aa, `Convergent -> Either.second aa
)
in
let counts x = (Phylogenetics.Amino_acid.counts (Sequence.of_list x) :> int array) in
let x1 = counts leaves1 in
let x2 = counts leaves2 in
let d = Multinomial_test.data ~x1 ~x2 in
let r = Multinomial_test.(
Permutation.test ~statistic:LRT.likelihood_log_ratio d
) in
r.pvalue
let wag =
Bistro_unix.wget "https://www.ebi.ac.uk/goldman-srv/WAG/wag.dat"
let run_tdg09 wag tree sim =
let open Codepitk.Tdg09.Pack in
let site = Phylogenetics.Tree.leaves sim |> Array.of_list in
let _, _, lrt = Model3.lrt ~mode:`sparse wag tree site in
lrt.pvalue
let uniformity_test ?alpha tree meth =
let f = fun%workflow dest ->
let wag = Phylogenetics.Wag.parse [%path wag] in
let tree = load_tree [%path tree] in
let f = match [%param meth] with
| `Tdg09 -> run_tdg09 wag tree
| `Multinomial -> run_multinomial
in
let sites = simulation ?alpha wag tree 1_000 in
OCamlR_grDevices.pdf dest ;
Array.map sites ~f
|> OCamlR_graphics.hist ~breaks:(`n 60)
|> fun _ -> OCamlR_grDevices.dev_off ()
in
Workflow.path_plugin ~descr:"tdg09_uniformity_test" f
end
let target () =
let tree = Workflow.input "data/besnard2009/besnard2009.nhx" in
let multinomial_under_tdg09 = Under_tdg09.uniformity_test tree `Tdg09 in
let tdg09_under_tdg09 = Under_tdg09.uniformity_test tree `Tdg09 in
let lmm_under_mutsel_besnard2009 = Under_mutsel.lmm besnard2009 in
Report.pdflatex [%include_script "notes/calibration.tex"]
open Bistro
val target : unit -> pdf file
\ No newline at end of file
\documentclass{beamer}
\title{}
\begin{document}
\begin{frame}
\titlepage
\end{frame}
\begin{frame}{Multinomial under tdg09 simulations}
\includegraphics{<<<Report.pdfdep multinomial_under_tdg09>>>}
\end{frame}
\begin{frame}{Multinomial under tdg09 simulations}
\includegraphics{<<<Report.pdfdep tdg09_under_tdg09>>>}
\end{frame}
\begin{frame}{LMM under Mutsel simulations}
\includegraphics{<<<Report.pdfdep lmm_under_mutsel_besnard2009>>>}
\end{frame}
\end{document}
\ No newline at end of file
......@@ -2,4 +2,4 @@
(name codepi_notes)
(libraries codepi)
(preprocess (pps bistro.ppx ppx_jane))
(preprocessor_deps multinomial_and_phylogeny.tex))
(preprocessor_deps calibration.tex multinomial_and_phylogeny.tex))
......@@ -29,4 +29,5 @@ let make_command f topic =
let command =
Command.group ~summary:"codepi report generator" [
"multinomial-and-phylogeny", make_command Multinomial_and_phylogeny.target "Multinomial and phylogeny" ;
"calibration", make_command Calibration.target "Calibration tests";
]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment