Commit e33062cb authored by Philippe Veber's avatar Philippe Veber
Browse files

notes/calibration: enhanced benchmark figure

parent 9e9674d1
......@@ -39,7 +39,7 @@ let simulation ?branch_scale ~rng ~n_h0 ~n_ha
~rng ~n_h0 ~n_ha ~gBGC:(gBGC0, gBGC1)
~ne_s:(ne_s0, ne_s1) ~tree ~fitness_profiles ()
in
Workflow.plugin ~descr:"simulator.simulation" f
Workflow.plugin ~descr:"simulator.simulation" f ~version:4
let alignment_of_simulation sim =
let f = fun%workflow dest ->
......
......@@ -20,7 +20,7 @@ let besnard2009 = {
ne_s = 4., 4. ;
}
let _oneline_rodent = {
let online_rodent = {
label = "online_rodent" ;
tree = Bistro.Workflow.input "data/online_rodent/online_rodent.nhx" ;
rooted = true ;
......@@ -36,7 +36,7 @@ let rubisco = {
ne_s = 4., 4. ;
}
let _orthomam_echolocation = {
let orthomam_echolocation = {
label = "orthomam_echolocation" ;
tree = (
Orthomam.tree_of_db
......@@ -49,25 +49,25 @@ let _orthomam_echolocation = {
ne_s = 4., 4. ;
}
let pvals_of_cpt convergence_table pvalue_column =
let pvals_of_cpt convergence_table pvalue_column =
let cpt = Cpt.of_file_exn convergence_table in
Cpt.get_col_exn cpt pvalue_column
|> Array.filter_opt
|> Array.map ~f:(fun p -> 1. -. p)
let hist_pval (convergence_table : cpt file) (pvalue_column:string): pdf file =
let hist_pval (convergence_table : cpt file) (pvalue_column:string): pdf file =
let f = fun%workflow dest ->
let pvals = pvals_of_cpt [%path convergence_table] pvalue_column in
OCamlR_grDevices.pdf dest;
OCamlR_graphics.hist ~breaks:(`n 60) pvals
|> fun _ -> OCamlR_grDevices.dev_off ()
in
in
Bistro.Workflow.path_plugin ~descr:"Calibration.hist_pval" f
(* let quantiles_pval convergence_table pvalue_column =
(* let quantiles_pval convergence_table pvalue_column =
let module R = Bistro_utils.R_script in
let f = fun%workflow dest ->
let pvals = pvals_of_cpt [%path convergence_table] pvalue_column
let f = fun%workflow dest ->
let pvals = pvals_of_cpt [%path convergence_table] pvalue_column
|> Array.to_list in
[%script {|
library(tidyverse)
......@@ -110,14 +110,14 @@ module Under_mutsel = struct
|> Pipeline.inhouse_lmm
|> Fn.flip hist_pval Codepitk.Inhouse_lmm.cpt_tag
let multinomial dataset =
query dataset
|> Pipeline.multinomial_simulation_lrt
let multinomial dataset =
query dataset
|> Pipeline.multinomial_simulation_lrt
|> Fn.flip hist_pval "Multinomial_1mp"
let tdg09 dataset =
query dataset
|> Pipeline.tdg09
let tdg09 dataset =
query dataset
|> Pipeline.tdg09
|> Fn.flip hist_pval "Tdg09_1MinusLRT"
let inhouse_tdg09 dataset =
......@@ -125,21 +125,23 @@ module Under_mutsel = struct
|> Pipeline.inhouse_tdg09
|> Fn.flip hist_pval "inhouse_tdg09_1mpval"
let query_benchmark ?(seed = 42) { tree = t ; ne_s ; branch_scale ; _ } =
let query_benchmark ?(seed = 42) ?(cpg_hypermutability = false) ?(gBGC = false) { tree = t ; ne_s ; branch_scale ; _ } =
let rate_CpG = if cpg_hypermutability then Some 10. else None in
let gBGC = if gBGC then Some (0., 10.) else None in
Pipeline.query ~seed ~tree:(NHX t) ~branch_scale ~ne_s
~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
~n_h0:9000 ~n_ha:1000 ()
~n_h0:9000 ~n_ha:1000 ?rate_CpG ?gBGC ()
let benchmark dataset ?(query_builder = query_benchmark) () =
let q = query_builder dataset in
let methods = List.filter_map methods ~f:(fun m ->
let benchmark ?cpg_hypermutability ?gBGC dataset =
let q = query_benchmark ?cpg_hypermutability ?gBGC dataset in
let methods = List.filter_map methods ~f:(fun m ->
if not m.requires_rooted_tree || dataset.rooted then
Some m.f else None
) in
Pipeline.benchmark q methods
|> Pipeline.rds_of_benchmark
let plot_benchmark (rds:rds file) =
let _plot_benchmark (rds:rds file) =
let open Bistro.Shell_dsl in
let script = [%script {|
library(tidyverse)
......@@ -150,12 +152,37 @@ module Under_mutsel = struct
ggplot(aes(x=method, y=estimate)) +
geom_pointrange(aes(ymin=lower_bound, ymax=upper_bound)) +
ylab("AUC") +
theme_bw() +
theme_bw() +
theme(axis.text.x = element_text(angle=45, hjust=1))
dev.off()
|}] in
Bistro_utils.R_script.workflow ~descr:"calibration.under_mutsel.plot_benchmark" script
let plot_benchmark3 (rds1 : rds file) (rds2 : rds file) (rds3 : rds file) =
let open Bistro.Shell_dsl in
let labels = ["normal";"with CpG hypermutability";"with gBGC"] in
let labels = list ~sep:"," (fun s -> quote ~using:'"' (string s)) labels in
let script = [%script {|
library(tidyverse)
library(grDevices)
rds1 = readRDS("<<< dep rds1 >>>")
rds2 = readRDS("<<< dep rds2 >>>")
rds3 = readRDS("<<< dep rds3 >>>")
pdf("<<< dest >>>", width=12)
bind_rows(rds1$auc_estimates,rds2$auc_estimates,rds3$auc_estimates, .id = "simulation") %>%
arrange(desc(estimate)) %>%
mutate(method = factor(method, levels=unique(method))) %>%
ggplot(aes(x=method, y=estimate)) +
geom_pointrange(aes(ymin=lower_bound, ymax=upper_bound, color=simulation),
position = position_dodge(0.3), width = 0.2) +
ylab("PRAUC") +
theme_bw(base_size = 14) +
theme(axis.text.x = element_text(angle=45, hjust=1)) +
scale_color_manual(values = c("#00AFBB","#E7B800","#5AD000"), labels = c(<<<labels>>>), name = "Simulation")
dev.off()
|}] in
Bistro_utils.R_script.workflow ~descr:"calibration.under_mutsel.plot_benchmark3" script
end
module Under_tdg09 = struct
......@@ -172,7 +199,7 @@ module Under_tdg09 = struct
site_simulation ~rng wag ?alpha tree
)
let load_tree fn =
let load_tree fn =
Convergence_tree.from_file fn
|> Rresult.R.failwith_error_msg
|> Codepitk.Tdg09.Pack.convergence_tree
......@@ -218,11 +245,9 @@ module Under_tdg09 = struct
|> OCamlR_graphics.hist ~breaks:(`n 60)
|> fun _ -> OCamlR_grDevices.dev_off ()
in
Workflow.path_plugin ~descr:"tdg09_uniformity_test" f
Workflow.path_plugin ~descr:"tdg09_uniformity_test" f
end
let target () =
let tree = Workflow.input "data/besnard2009/besnard2009.nhx" in
let _multinomial_under_tdg09 = Under_tdg09.uniformity_test tree `Multinomial in
......@@ -231,20 +256,16 @@ let target () =
let multinomial_under_mutsel_besnard2009 = Under_mutsel.multinomial besnard2009 in
let _tdg09_under_mutsel_besnard2009 = Under_mutsel.tdg09 besnard2009 in
let inhouse_tdg09_under_mutsel_besnard2009 = Under_mutsel.inhouse_tdg09 besnard2009 in
print_endline "Benchmark besnard";
let benchmark_besnard = Under_mutsel.(benchmark besnard2009 ()|> plot_benchmark) in
print_endline "Benchmark besnard with CpG";
let query_benchmark_CpG ?(seed = 42) { tree = t ; ne_s ; branch_scale ; _ } =
let module Pipeline = Codepi.Simulation_pipeline.Mutsel in
Pipeline.query ~seed ~tree:(NHX t) ~branch_scale ~ne_s
~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
~n_h0:9000 ~n_ha:1000 ~rate_CpG:10. () in
let benchmark_besnard_CpG = Under_mutsel.(
benchmark besnard2009 ~query_builder:query_benchmark_CpG ()|> plot_benchmark
) in
print_endline "Benchmark rubisco";
let benchmark_echolocation = Under_mutsel.(benchmark rubisco () |> plot_benchmark) in
let benchmark_fig dataset =
Under_mutsel.(
plot_benchmark3
(benchmark dataset)
(benchmark dataset ~cpg_hypermutability:true)
(benchmark dataset ~gBGC:true)
)
in
let benchmark_besnard = benchmark_fig besnard2009 in
let benchmark_rubisco = benchmark_fig rubisco in
let benchmark_orthomam_echolocation = benchmark_fig orthomam_echolocation in
let benchmark_online_rodent = benchmark_fig online_rodent in
Report.pdflatex [%include_script "notes/calibration.tex"]
\documentclass{beamer}
\AtBeginSubsection[]
{
\begin{frame}
\frametitle{Table of Contents}
\tableofcontents[currentsection,currentsubsection,subsectionstyle=show/shaded/hide]
\end{frame}
}
\title{Detection models calibration}
\begin{document}
......@@ -37,24 +45,19 @@
\section{Detection performance assessment}
\subsection{Mutsel model}
% for each topology
\begin{frame}{Benchmark Besnard2009 under Mutsel}
\includegraphics[width=0.75\textwidth]{<<<Report.pdfdep benchmark_besnard>>>}
\includegraphics[width=\textwidth]{<<<Report.pdfdep benchmark_besnard>>>}
\end{frame}
% \begin{frame}{Benchmark Echolocation under Mutsel}
% \includegraphics[width=0.75\textwidth]{<<<Report.pdfdep benchmark_echolocation>>>}
% \end{frame}
\subsection{Mutsel model with gBGC}
% for each topology
\begin{frame}{Benchmark echolocation under Mutsel}
\includegraphics[width=\textwidth]{<<<Report.pdfdep benchmark_orthomam_echolocation>>>}
\end{frame}
\subsection{Mutsel model with CpG hypermutability}
% for each topology
\begin{frame}{Benchmark Besnard2009 under Mutsel}
\includegraphics[width=0.75\textwidth]{<<<Report.pdfdep benchmark_besnard_CpG>>>}
\begin{frame}{Benchmark rodents under Mutsel}
\includegraphics[width=\textwidth]{<<<Report.pdfdep benchmark_online_rodent>>>}
\end{frame}
\begin{frame}{Benchmark rubisco under Mutsel}
\includegraphics[width=\textwidth]{<<<Report.pdfdep benchmark_rubisco>>>}
\end{frame}
\end{document}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment