Commit d4686c3b authored by Philippe Veber's avatar Philippe Veber
Browse files

evaluation by average precision

parent 12b017f6
......@@ -14,21 +14,21 @@ let besnard2009 = {
tree = Bistro.Workflow.input "data/besnard2009/besnard2009.nhx" ;
rooted = true ;
branch_scale = 3. ;
ne_s = 2., 2. ;
ne_s = 4., 4. ;
}
let oneline_rodent = {
tree = Bistro.Workflow.input "data/online_rodent/online_rodent.nhx" ;
rooted = true ;
branch_scale = 3. ;
ne_s = 2., 2. ;
ne_s = 4., 4. ;
}
let rubisco = {
tree = Rubisco_dataset.(Path "data/rubisco" |> Query.tree ~branch_length_unit:`Amino_acid) ;
rooted = false ;
branch_scale = 3. ;
ne_s = 2., 2. ;
ne_s = 4., 4. ;
}
let orthomam_echolocation = {
......@@ -56,7 +56,7 @@ let meth ?(col = 1) ?(requires_rooted_tree = false) result label =
let methods = Simulation_dataset.[
meth tdg09 "tdg09" ;
meth pcoc ~col:3 "pcoc" ;
meth pcoc_v2 ~col:3 "pcoc v2" ;
(* meth pcoc_v2 ~col:3 "pcoc v2" ; *)
meth (gemma ~lmm_test:`Score ~relatedness_mode:`Standardized) "gemma" ;
meth inhouse_lmm "LMM" ;
meth multinomial_asymptotic_lrt "multinomial" ;
......@@ -66,7 +66,7 @@ let methods = Simulation_dataset.[
let benchmark { tree = t ; rooted ; ne_s ; branch_scale } =
let open Simulation_dataset in
let sim =
convdet_simulation ~tree:(NHX t) ~branch_scale ~ne_s
convdet_simulation ~seed:3 ~tree:(NHX t) ~branch_scale ~ne_s
~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
~n_h0:900 ~n_ha:100 ()
in
......@@ -78,7 +78,7 @@ let benchmark { tree = t ; rooted ; ne_s ; branch_scale } =
)
|> List.unzip
in
Utils.recall_precision_curve ~oracle:(oracle sim) ~labels ~results
Utils.average_precision_plot ~oracle:(oracle sim) ~labels ~results
let report =
let module H = Bistro_utils.Html_report in
......
(library
(name codepi)
(libraries core biotk biotope bistro.utils codepitk containers gsl ocaml-r.graphics ocaml-r.grDevices phylogenetics.convergence)
(libraries core biotk biotope bistro.utils codepitk containers gsl
ocaml-r.graphics ocaml-r.grDevices phylogenetics.convergence
prc)
(preprocess
(pps ppx_jane ppx_csv_conv bistro.ppx ppx_here)))
......
......@@ -76,6 +76,68 @@ let%pworkflow recall_precision_curve ~labels ~oracle ~results : pdf file =
OCamlR_graphics.legend ~pch:[|19|] ~col:colors `topright (Array.of_list labels) ;
OCamlR_grDevices.dev_off ()
let%pworkflow average_precision_plot ~labels ~oracle ~results : pdf file =
let open Codepitk in
let n = List.length labels in
assert (n = List.length results) ;
let labels = [%param labels] in
let load_results fn col =
let df = Dataframe.from_file fn |> Rresult.R.failwith_error_msg in
match Dataframe.get_col df col with
| Some (Floats xs) -> Array.map xs ~f:Option.some
| Some (Float_opts xs) -> xs
| Some (Ints xs) -> Array.map xs ~f:(fun i -> Some (Float.of_int i))
| Some (Int_opts xs) -> Array.map xs ~f:(Option.map ~f:Float.of_int)
| _ -> failwith "expected a numeric column at pos 1"
in
let oracle =
load_results [%path oracle] 1
|> Array.map ~f:(Option.map ~f:Float.(( <> ) 0.))
in
let result_paths = [%eval Bistro.Workflow.path_list (List.map ~f:fst results)] in
let results = List.map2_exn result_paths (List.map ~f:snd results) ~f:load_results in
let make_classification_data x y =
Prc.Classification_data (
List.init (Array.length x) ~f:(fun i ->
match x.(i), y.(i) with
| Some x_i, Some y_i -> Some (x_i, y_i)
| None, _ | _, None -> None
)
|> List.filter_opt
)
in
let estimates, lower_bounds, upper_bounds =
List.map results ~f:(fun scores ->
let Prc.Classification_data xs as data = make_classification_data scores oracle in
let n = List.count xs ~f:snd in
let theta_hat = Prc.auc_trapezoidal_lt data in
let lb, ub = Prc.logit_confidence_interval ~alpha:0.05 ~theta_hat ~n in
theta_hat, lb, ub
)
|> List.unzip3
in
let open OCamlR_base in
let df = Dataframe.create [
"method", `Character (Character.of_list labels) ;
"estimate", `Numeric (Numeric.of_list estimates) ;
"lower_bound", `Numeric (Numeric.of_list lower_bounds) ;
"upper_bound", `Numeric (Numeric.of_list upper_bounds) ;
]
in
let eval s = ignore (OCamlR.eval_string s : OCamlR.sexp) in
eval {|library(ggplot2)|} ;
eval {|
average_precision <- function(df) {
print(ggplot(df, aes(x = method, y = estimate)) + geom_errorbar(aes(ymin = lower_bound, ymax = upper_bound),width=0.05,size=0.5) + geom_point(shape = 20, size = 4) + theme_bw() + ylab("Average precision") + ylim(0,1))
}
|} ;
OCamlR_grDevices.pdf [%dest] ;
OCamlR.(
(call (symbol "average_precision") [ arg Dataframe.to_sexp df ] : OCamlR.sexp)
|> ignore
) ;
OCamlR_grDevices.dev_off ()
let%pworkflow [@version 2] newick_of_nhx (t : nhx file) : newick file =
let tree_file = [%path t] in
let open Phylogenetics in
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment