prc.mli 2.77 KB
Newer Older
1 2 3
(**

References:
Philippe Veber's avatar
Philippe Veber committed
4 5
[1] The binormal assumption on precision-recall curves.
    Kay H. Brodersen, Cheng Soon Ong, Klaas E. Stephan and Joachim M. Buhmann
6

Philippe Veber's avatar
Philippe Veber committed
7 8
[2] Area Under the Precision-Recall Curve: Point Estimates and Confidence Intervals.
    Kendrick Boyd, Kevin H. Eng and C. David Page
9

Philippe Veber's avatar
Philippe Veber committed
10 11
[3] Precision-Recall-Gain Curves: PR Analysis Done Right.
    Peter A. Flach and Meelis Kull
12

Philippe Veber's avatar
Philippe Veber committed
13 14
[4] The Relationship Between Precision-Recall and ROC Curves.
    Jesse Davis and Mark Goadrich
15

Philippe Veber's avatar
Philippe Veber committed
16 17
[5] Realisable Classifiers: Improving Operating Performance on Variable Cost Problems.
    M.J.J. Scott, M. Niranjan, R.W. Prager
18 19
*)

Philippe Veber's avatar
Philippe Veber committed
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
type dataset = Dataset of (float * bool) list
(** Binary prediction scores with associated labels *)

module Precision_recall : sig
  val operating_points :
    dataset ->
    (float * float * float) list
  (** [operating_points d] computes the list of score threshold, recall
      and precision triplets, sorted by decreasing threshold. *)

  val auc_trapezoidal_lt : dataset -> float
  (** AUC lower triangular estimator (see [2] for reference) *)

  val auc_average_precision : dataset -> float
  (** AUC average precision (see [2] for reference) *)

  val logit_confidence_interval :
    alpha:float ->
    theta_hat:float ->
    n:int ->
    float * float
    (** [logit_confidence_interval ~alpha ~theta_hat ~n] computes an
        asymptotically valid confidence interval at level 1 - [alpha], when
        the estimate [theta_hat] was obtained from a sample of [n]
        observations. *)
end

(** Binormal model

   A Gaussian mixture model for which the precision-recall curve can
   be computed explicitly (see [1])
 *)
module Binormal_model : sig
  type t = {
    mu_pos : float ;
    sigma_pos : float ;
    mu_neg : float ;
    sigma_neg : float ;
    alpha : float ;
  }

  val make :
    ?mu_pos:float ->
    ?sigma_pos:float ->
    ?mu_neg:float ->
    ?sigma_neg:float ->
    float ->
    t

  val simulation :
    Gsl.Rng.t ->
    n:int ->
    t ->
    dataset

  val curve :
    ?n:int ->
    t ->
    (float * float) array

  val estimate : dataset -> t

  val auc : t -> float
end

module Plot : sig
  type t = {
    x : float array ;
    y : float array ;
    col : string option ;
    ty : [`Lines of int | `Points of int] ;
    label : string option ;
  }

  val lines :
    ?col:string ->
    ?lwd:int ->
    ?label:string ->
    (float * float) array ->
    t

  val points :
    ?col:string ->
    ?pch:int ->
    ?label:string ->
    (float * float) array ->
    t

  val recall_precision_plot :
    ?main:string ->
    t list ->
    unit
end
Philippe Veber's avatar
Philippe Veber committed
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128

module Check : sig
  val binormal_simulation :
    ?sigma:float ->
    ?alpha:float ->
    ?sample_size:int ->
    unit ->
    unit

  val discrete_simulation :
    ?sample_size:int ->
    ?support:int ->
    ?alpha:float ->
    unit ->
    unit
end