Commit e33062cb authored by Philippe Veber's avatar Philippe Veber
Browse files

notes/calibration: enhanced benchmark figure

parent 9e9674d1
...@@ -39,7 +39,7 @@ let simulation ?branch_scale ~rng ~n_h0 ~n_ha ...@@ -39,7 +39,7 @@ let simulation ?branch_scale ~rng ~n_h0 ~n_ha
~rng ~n_h0 ~n_ha ~gBGC:(gBGC0, gBGC1) ~rng ~n_h0 ~n_ha ~gBGC:(gBGC0, gBGC1)
~ne_s:(ne_s0, ne_s1) ~tree ~fitness_profiles () ~ne_s:(ne_s0, ne_s1) ~tree ~fitness_profiles ()
in in
Workflow.plugin ~descr:"simulator.simulation" f Workflow.plugin ~descr:"simulator.simulation" f ~version:4
let alignment_of_simulation sim = let alignment_of_simulation sim =
let f = fun%workflow dest -> let f = fun%workflow dest ->
......
...@@ -20,7 +20,7 @@ let besnard2009 = { ...@@ -20,7 +20,7 @@ let besnard2009 = {
ne_s = 4., 4. ; ne_s = 4., 4. ;
} }
let _oneline_rodent = { let online_rodent = {
label = "online_rodent" ; label = "online_rodent" ;
tree = Bistro.Workflow.input "data/online_rodent/online_rodent.nhx" ; tree = Bistro.Workflow.input "data/online_rodent/online_rodent.nhx" ;
rooted = true ; rooted = true ;
...@@ -36,7 +36,7 @@ let rubisco = { ...@@ -36,7 +36,7 @@ let rubisco = {
ne_s = 4., 4. ; ne_s = 4., 4. ;
} }
let _orthomam_echolocation = { let orthomam_echolocation = {
label = "orthomam_echolocation" ; label = "orthomam_echolocation" ;
tree = ( tree = (
Orthomam.tree_of_db Orthomam.tree_of_db
...@@ -49,25 +49,25 @@ let _orthomam_echolocation = { ...@@ -49,25 +49,25 @@ let _orthomam_echolocation = {
ne_s = 4., 4. ; ne_s = 4., 4. ;
} }
let pvals_of_cpt convergence_table pvalue_column = let pvals_of_cpt convergence_table pvalue_column =
let cpt = Cpt.of_file_exn convergence_table in let cpt = Cpt.of_file_exn convergence_table in
Cpt.get_col_exn cpt pvalue_column Cpt.get_col_exn cpt pvalue_column
|> Array.filter_opt |> Array.filter_opt
|> Array.map ~f:(fun p -> 1. -. p) |> Array.map ~f:(fun p -> 1. -. p)
let hist_pval (convergence_table : cpt file) (pvalue_column:string): pdf file = let hist_pval (convergence_table : cpt file) (pvalue_column:string): pdf file =
let f = fun%workflow dest -> let f = fun%workflow dest ->
let pvals = pvals_of_cpt [%path convergence_table] pvalue_column in let pvals = pvals_of_cpt [%path convergence_table] pvalue_column in
OCamlR_grDevices.pdf dest; OCamlR_grDevices.pdf dest;
OCamlR_graphics.hist ~breaks:(`n 60) pvals OCamlR_graphics.hist ~breaks:(`n 60) pvals
|> fun _ -> OCamlR_grDevices.dev_off () |> fun _ -> OCamlR_grDevices.dev_off ()
in in
Bistro.Workflow.path_plugin ~descr:"Calibration.hist_pval" f Bistro.Workflow.path_plugin ~descr:"Calibration.hist_pval" f
(* let quantiles_pval convergence_table pvalue_column = (* let quantiles_pval convergence_table pvalue_column =
let module R = Bistro_utils.R_script in let module R = Bistro_utils.R_script in
let f = fun%workflow dest -> let f = fun%workflow dest ->
let pvals = pvals_of_cpt [%path convergence_table] pvalue_column let pvals = pvals_of_cpt [%path convergence_table] pvalue_column
|> Array.to_list in |> Array.to_list in
[%script {| [%script {|
library(tidyverse) library(tidyverse)
...@@ -110,14 +110,14 @@ module Under_mutsel = struct ...@@ -110,14 +110,14 @@ module Under_mutsel = struct
|> Pipeline.inhouse_lmm |> Pipeline.inhouse_lmm
|> Fn.flip hist_pval Codepitk.Inhouse_lmm.cpt_tag |> Fn.flip hist_pval Codepitk.Inhouse_lmm.cpt_tag
let multinomial dataset = let multinomial dataset =
query dataset query dataset
|> Pipeline.multinomial_simulation_lrt |> Pipeline.multinomial_simulation_lrt
|> Fn.flip hist_pval "Multinomial_1mp" |> Fn.flip hist_pval "Multinomial_1mp"
let tdg09 dataset = let tdg09 dataset =
query dataset query dataset
|> Pipeline.tdg09 |> Pipeline.tdg09
|> Fn.flip hist_pval "Tdg09_1MinusLRT" |> Fn.flip hist_pval "Tdg09_1MinusLRT"
let inhouse_tdg09 dataset = let inhouse_tdg09 dataset =
...@@ -125,21 +125,23 @@ module Under_mutsel = struct ...@@ -125,21 +125,23 @@ module Under_mutsel = struct
|> Pipeline.inhouse_tdg09 |> Pipeline.inhouse_tdg09
|> Fn.flip hist_pval "inhouse_tdg09_1mpval" |> Fn.flip hist_pval "inhouse_tdg09_1mpval"
let query_benchmark ?(seed = 42) { tree = t ; ne_s ; branch_scale ; _ } = let query_benchmark ?(seed = 42) ?(cpg_hypermutability = false) ?(gBGC = false) { tree = t ; ne_s ; branch_scale ; _ } =
let rate_CpG = if cpg_hypermutability then Some 10. else None in
let gBGC = if gBGC then Some (0., 10.) else None in
Pipeline.query ~seed ~tree:(NHX t) ~branch_scale ~ne_s Pipeline.query ~seed ~tree:(NHX t) ~branch_scale ~ne_s
~profiles:"example/aa_fitness/263SelectedProfiles.tsv" ~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
~n_h0:9000 ~n_ha:1000 () ~n_h0:9000 ~n_ha:1000 ?rate_CpG ?gBGC ()
let benchmark dataset ?(query_builder = query_benchmark) () = let benchmark ?cpg_hypermutability ?gBGC dataset =
let q = query_builder dataset in let q = query_benchmark ?cpg_hypermutability ?gBGC dataset in
let methods = List.filter_map methods ~f:(fun m -> let methods = List.filter_map methods ~f:(fun m ->
if not m.requires_rooted_tree || dataset.rooted then if not m.requires_rooted_tree || dataset.rooted then
Some m.f else None Some m.f else None
) in ) in
Pipeline.benchmark q methods Pipeline.benchmark q methods
|> Pipeline.rds_of_benchmark |> Pipeline.rds_of_benchmark
let plot_benchmark (rds:rds file) = let _plot_benchmark (rds:rds file) =
let open Bistro.Shell_dsl in let open Bistro.Shell_dsl in
let script = [%script {| let script = [%script {|
library(tidyverse) library(tidyverse)
...@@ -150,12 +152,37 @@ module Under_mutsel = struct ...@@ -150,12 +152,37 @@ module Under_mutsel = struct
ggplot(aes(x=method, y=estimate)) + ggplot(aes(x=method, y=estimate)) +
geom_pointrange(aes(ymin=lower_bound, ymax=upper_bound)) + geom_pointrange(aes(ymin=lower_bound, ymax=upper_bound)) +
ylab("AUC") + ylab("AUC") +
theme_bw() + theme_bw() +
theme(axis.text.x = element_text(angle=45, hjust=1)) theme(axis.text.x = element_text(angle=45, hjust=1))
dev.off() dev.off()
|}] in |}] in
Bistro_utils.R_script.workflow ~descr:"calibration.under_mutsel.plot_benchmark" script Bistro_utils.R_script.workflow ~descr:"calibration.under_mutsel.plot_benchmark" script
let plot_benchmark3 (rds1 : rds file) (rds2 : rds file) (rds3 : rds file) =
let open Bistro.Shell_dsl in
let labels = ["normal";"with CpG hypermutability";"with gBGC"] in
let labels = list ~sep:"," (fun s -> quote ~using:'"' (string s)) labels in
let script = [%script {|
library(tidyverse)
library(grDevices)
rds1 = readRDS("<<< dep rds1 >>>")
rds2 = readRDS("<<< dep rds2 >>>")
rds3 = readRDS("<<< dep rds3 >>>")
pdf("<<< dest >>>", width=12)
bind_rows(rds1$auc_estimates,rds2$auc_estimates,rds3$auc_estimates, .id = "simulation") %>%
arrange(desc(estimate)) %>%
mutate(method = factor(method, levels=unique(method))) %>%
ggplot(aes(x=method, y=estimate)) +
geom_pointrange(aes(ymin=lower_bound, ymax=upper_bound, color=simulation),
position = position_dodge(0.3), width = 0.2) +
ylab("PRAUC") +
theme_bw(base_size = 14) +
theme(axis.text.x = element_text(angle=45, hjust=1)) +
scale_color_manual(values = c("#00AFBB","#E7B800","#5AD000"), labels = c(<<<labels>>>), name = "Simulation")
dev.off()
|}] in
Bistro_utils.R_script.workflow ~descr:"calibration.under_mutsel.plot_benchmark3" script
end end
module Under_tdg09 = struct module Under_tdg09 = struct
...@@ -172,7 +199,7 @@ module Under_tdg09 = struct ...@@ -172,7 +199,7 @@ module Under_tdg09 = struct
site_simulation ~rng wag ?alpha tree site_simulation ~rng wag ?alpha tree
) )
let load_tree fn = let load_tree fn =
Convergence_tree.from_file fn Convergence_tree.from_file fn
|> Rresult.R.failwith_error_msg |> Rresult.R.failwith_error_msg
|> Codepitk.Tdg09.Pack.convergence_tree |> Codepitk.Tdg09.Pack.convergence_tree
...@@ -218,11 +245,9 @@ module Under_tdg09 = struct ...@@ -218,11 +245,9 @@ module Under_tdg09 = struct
|> OCamlR_graphics.hist ~breaks:(`n 60) |> OCamlR_graphics.hist ~breaks:(`n 60)
|> fun _ -> OCamlR_grDevices.dev_off () |> fun _ -> OCamlR_grDevices.dev_off ()
in in
Workflow.path_plugin ~descr:"tdg09_uniformity_test" f Workflow.path_plugin ~descr:"tdg09_uniformity_test" f
end end
let target () = let target () =
let tree = Workflow.input "data/besnard2009/besnard2009.nhx" in let tree = Workflow.input "data/besnard2009/besnard2009.nhx" in
let _multinomial_under_tdg09 = Under_tdg09.uniformity_test tree `Multinomial in let _multinomial_under_tdg09 = Under_tdg09.uniformity_test tree `Multinomial in
...@@ -231,20 +256,16 @@ let target () = ...@@ -231,20 +256,16 @@ let target () =
let multinomial_under_mutsel_besnard2009 = Under_mutsel.multinomial besnard2009 in let multinomial_under_mutsel_besnard2009 = Under_mutsel.multinomial besnard2009 in
let _tdg09_under_mutsel_besnard2009 = Under_mutsel.tdg09 besnard2009 in let _tdg09_under_mutsel_besnard2009 = Under_mutsel.tdg09 besnard2009 in
let inhouse_tdg09_under_mutsel_besnard2009 = Under_mutsel.inhouse_tdg09 besnard2009 in let inhouse_tdg09_under_mutsel_besnard2009 = Under_mutsel.inhouse_tdg09 besnard2009 in
let benchmark_fig dataset =
print_endline "Benchmark besnard"; Under_mutsel.(
let benchmark_besnard = Under_mutsel.(benchmark besnard2009 ()|> plot_benchmark) in plot_benchmark3
(benchmark dataset)
print_endline "Benchmark besnard with CpG"; (benchmark dataset ~cpg_hypermutability:true)
let query_benchmark_CpG ?(seed = 42) { tree = t ; ne_s ; branch_scale ; _ } = (benchmark dataset ~gBGC:true)
let module Pipeline = Codepi.Simulation_pipeline.Mutsel in )
Pipeline.query ~seed ~tree:(NHX t) ~branch_scale ~ne_s in
~profiles:"example/aa_fitness/263SelectedProfiles.tsv" let benchmark_besnard = benchmark_fig besnard2009 in
~n_h0:9000 ~n_ha:1000 ~rate_CpG:10. () in let benchmark_rubisco = benchmark_fig rubisco in
let benchmark_besnard_CpG = Under_mutsel.( let benchmark_orthomam_echolocation = benchmark_fig orthomam_echolocation in
benchmark besnard2009 ~query_builder:query_benchmark_CpG ()|> plot_benchmark let benchmark_online_rodent = benchmark_fig online_rodent in
) in
print_endline "Benchmark rubisco";
let benchmark_echolocation = Under_mutsel.(benchmark rubisco () |> plot_benchmark) in
Report.pdflatex [%include_script "notes/calibration.tex"] Report.pdflatex [%include_script "notes/calibration.tex"]
\documentclass{beamer} \documentclass{beamer}
\AtBeginSubsection[]
{
\begin{frame}
\frametitle{Table of Contents}
\tableofcontents[currentsection,currentsubsection,subsectionstyle=show/shaded/hide]
\end{frame}
}
\title{Detection models calibration} \title{Detection models calibration}
\begin{document} \begin{document}
...@@ -37,24 +45,19 @@ ...@@ -37,24 +45,19 @@
\section{Detection performance assessment} \section{Detection performance assessment}
\subsection{Mutsel model}
% for each topology
\begin{frame}{Benchmark Besnard2009 under Mutsel} \begin{frame}{Benchmark Besnard2009 under Mutsel}
\includegraphics[width=0.75\textwidth]{<<<Report.pdfdep benchmark_besnard>>>} \includegraphics[width=\textwidth]{<<<Report.pdfdep benchmark_besnard>>>}
\end{frame} \end{frame}
% \begin{frame}{Benchmark Echolocation under Mutsel} \begin{frame}{Benchmark echolocation under Mutsel}
% \includegraphics[width=0.75\textwidth]{<<<Report.pdfdep benchmark_echolocation>>>} \includegraphics[width=\textwidth]{<<<Report.pdfdep benchmark_orthomam_echolocation>>>}
% \end{frame} \end{frame}
\subsection{Mutsel model with gBGC}
% for each topology
\subsection{Mutsel model with CpG hypermutability} \begin{frame}{Benchmark rodents under Mutsel}
% for each topology \includegraphics[width=\textwidth]{<<<Report.pdfdep benchmark_online_rodent>>>}
\begin{frame}{Benchmark Besnard2009 under Mutsel}
\includegraphics[width=0.75\textwidth]{<<<Report.pdfdep benchmark_besnard_CpG>>>}
\end{frame} \end{frame}
\begin{frame}{Benchmark rubisco under Mutsel}
\includegraphics[width=\textwidth]{<<<Report.pdfdep benchmark_rubisco>>>}
\end{frame}
\end{document} \end{document}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment