Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
12b017f6
Commit
12b017f6
authored
Nov 11, 2020
by
Philippe Veber
Browse files
implemented precision recall curve and auc calculations
parent
281111f8
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
214 additions
and
0 deletions
+214
-0
lib/prc/dune
lib/prc/dune
+4
-0
lib/prc/prc.ml
lib/prc/prc.ml
+210
-0
No files found.
lib/prc/dune
0 → 100644
View file @
12b017f6
(library
(name prc)
(public_name codepi.prc)
(libraries core_kernel gsl ocaml-r.graphics))
lib/prc/prc.ml
0 → 100644
View file @
12b017f6
open
Core_kernel
open
Gsl
type
classification_data
=
Classification_data
of
(
float
*
bool
)
list
let
empirical_curve
(
Classification_data
xs
)
=
let
npos
=
List
.
count
xs
~
f
:
snd
in
List
.
sort
xs
~
compare
:
(
Tuple
.
T2
.
compare
~
cmp1
:
Float
.
descending
~
cmp2
:
Bool
.
descending
)
|>
List
.
fold
~
init
:
(
0
,
0
,
[
0
.,
1
.
])
~
f
:
(
fun
(
pos
,
neg
,
points
)
(
_
,
b
)
->
let
pos
,
neg
=
if
b
then
pos
+
1
,
neg
else
pos
,
neg
+
1
in
let
recall
=
float
pos
/.
float
npos
in
let
precision
=
float
pos
/.
float
(
pos
+
neg
)
in
(
pos
,
neg
,
(
recall
,
precision
)
::
points
)
)
|>
trd3
|>
List
.
rev
(**
The binormal assumption on precision-recall curves
Kay H. Brodersen ∗† , Cheng Soon Ong ∗ , Klaas E. Stephan † and Joachim M. Buhmann ∗
*)
type
binormal_params
=
{
mu_pos
:
float
;
sigma_pos
:
float
;
mu_neg
:
float
;
sigma_neg
:
float
;
alpha
:
float
;
}
let
binormal_params
?
(
mu_pos
=
1
.
)
?
(
sigma_pos
=
1
.
)
?
(
mu_neg
=
0
.
)
?
(
sigma_neg
=
1
.
)
alpha
=
{
mu_pos
;
mu_neg
;
sigma_pos
;
sigma_neg
;
alpha
}
(** Binormal simulator: Z ~ alpha N(mu+,sigma+) + (1 - alpha) N(mu-, sigma-) *)
let
binormal_simulation
rng
~
n
{
mu_pos
;
mu_neg
;
sigma_pos
;
sigma_neg
;
alpha
}
=
let
sample
=
List
.
init
n
~
f
:
(
fun
_
->
let
mu
,
sigma
,
label
=
match
Randist
.
bernoulli
rng
~
p
:
alpha
with
|
0
->
mu_neg
,
sigma_neg
,
false
|
1
->
mu_pos
,
sigma_pos
,
true
|
_
->
assert
false
in
mu
+.
Randist
.
gaussian
rng
~
sigma
,
label
)
in
Classification_data
sample
let
phi
~
mu
~
sigma
x
=
Cdf
.
gaussian_P
~
x
:
(
x
-.
mu
)
~
sigma
let
inv_phi
~
mu
~
sigma
p
=
Cdf
.
gaussian_Pinv
~
p
~
sigma
+.
mu
let
binormal_curve
~
n
p
=
let
phi_neg
=
phi
~
mu
:
p
.
mu_neg
~
sigma
:
p
.
sigma_neg
in
let
inv_phi_pos
=
inv_phi
~
mu
:
p
.
mu_pos
~
sigma
:
p
.
sigma_pos
in
let
alpha
=
p
.
alpha
in
let
xs
=
List
.
init
n
~
f
:
(
fun
i
->
float
i
/.
float
n
)
in
List
.
map
xs
~
f
:
(
fun
x_i
->
let
y_i
=
alpha
*.
x_i
/.
(
alpha
*.
x_i
+.
(
1
.
-.
alpha
)
*.
(
1
.
-.
phi_neg
(
inv_phi_pos
(
1
.
-.
x_i
))))
in
x_i
,
y_i
)
let
binormal_estimate
(
Classification_data
xs
)
=
let
select
label
=
List
.
filter_map
xs
~
f
:
(
fun
(
x
,
b
)
->
if
Bool
.(
b
=
label
)
then
Some
x
else
None
)
|>
Array
.
of_list
in
let
x
=
select
false
in
let
y
=
select
true
in
let
len
t
=
float
(
Array
.
length
t
)
in
Stats
.{
mu_pos
=
mean
y
;
mu_neg
=
mean
x
;
sigma_pos
=
sd
y
;
sigma_neg
=
sd
x
;
alpha
=
len
y
/.
(
len
x
+.
len
y
)
;
}
let
xy
xs
=
let
curve
=
Array
.
of_list
xs
in
let
x
=
Array
.
map
curve
~
f
:
fst
in
let
y
=
Array
.
map
curve
~
f
:
snd
in
x
,
y
let
plot_binormal_fit
?
(
seed
=
42
)
?
(
sigma
=
1
.
)
~
n
~
alpha
()
=
let
p
=
binormal_params
~
sigma_pos
:
sigma
~
sigma_neg
:
sigma
alpha
in
let
plot
?
lwd
?
col
points
=
let
x
,
y
=
xy
points
in
OCamlR_graphics
.
lines
?
lwd
?
col
~
x
~
y
()
in
let
rng
=
Rng
.(
make
(
default
()
))
in
Rng
.
set
rng
(
Nativeint
.
of_int
seed
)
;
OCamlR_graphics
.
plot
~
xlab
:
"Recall"
~
ylab
:
"Precision"
~
xlim
:
(
0
.,
1
.
)
~
ylim
:
(
0
.,
1
.
)
~
x
:
[
|
|
]
~
y
:
[
|
|
]
()
;
for
_
=
1
to
10
do
binormal_simulation
~
n
rng
p
|>
empirical_curve
|>
plot
~
col
:
"lightgrey"
done
;
let
sim
=
binormal_simulation
rng
~
n
p
in
let
estimation
=
binormal_estimate
sim
in
plot
~
col
:
"red"
(
empirical_curve
sim
)
;
plot
~
lwd
:
2
~
col
:
"black"
(
binormal_curve
~
n
:
100
p
)
;
plot
~
lwd
:
2
~
col
:
"green"
(
binormal_curve
~
n
:
100
estimation
)
;
OCamlR_graphics
.
legend
~
col
:
[
|
"lightgrey"
;
"black"
|
]
~
lwd
:
[
|
1
.
;
2
.
|
]
`topright
[
|
"Simulations"
;
"Theoretical curve"
|
]
(**
Area Under the Precision-Recall Curve:
Point Estimates and Confidence Intervals
Kendrick Boyd 1 , Kevin H. Eng 2 , and C. David Page
*)
let
sum
a
b
~
f
=
let
rec
loop
i
acc
=
if
i
>
b
then
acc
else
loop
(
i
+
1
)
(
acc
+.
f
i
)
in
loop
a
0
.
let
auc_trapezoidal_lt
classification
=
let
curve
=
empirical_curve
classification
in
let
n
,
r
,
pmin
,
pmax
=
let
data
=
List
.
group
curve
~
break
:
(
fun
(
r1
,_
)
(
r2
,
_
)
->
Float
.(
r1
<>
r2
))
|>
List
.
map
~
f
:
(
function
|
[]
->
assert
false
|
(
r
,
_
)
::
_
as
xs
->
r
,
List
.
map
xs
~
f
:
snd
)
|>
Array
.
of_list
in
Array
.
length
data
,
Array
.
map
data
~
f
:
fst
,
Array
.
map
data
~
f
:
(
fun
(
_
,
ps
)
->
List
.
reduce_exn
ps
~
f
:
Float
.
min
)
,
Array
.
map
data
~
f
:
(
fun
(
_
,
ps
)
->
List
.
reduce_exn
ps
~
f
:
Float
.
max
)
in
sum
0
(
n
-
2
)
~
f
:
(
fun
i
->
(
pmin
.
(
i
)
+.
pmax
.
(
i
+
1
))
/.
2
.
*.
(
r
.
(
i
+
1
)
-.
r
.
(
i
)))
let
auc_average_precision
(
Classification_data
xs
)
=
let
pos
,
_
,
sum
=
List
.
sort
xs
~
compare
:
(
Tuple
.
T2
.
compare
~
cmp1
:
Float
.
descending
~
cmp2
:
Bool
.
descending
)
|>
List
.
fold
~
init
:
(
0
,
0
,
0
.
)
~
f
:
(
fun
(
pos
,
neg
,
sum
)
(
_
,
b
)
->
let
pos
,
neg
=
if
b
then
pos
+
1
,
neg
else
pos
,
neg
+
1
in
let
sum
=
if
b
then
let
precision
=
float
pos
/.
float
(
pos
+
neg
)
in
precision
+.
sum
else
sum
in
(
pos
,
neg
,
sum
)
)
in
sum
/.
float
pos
let
plot_auc_boxplots
?
(
seed
=
42
)
?
(
sigma
=
1
.
)
~
n
~
alpha
()
=
let
open
OCamlR_base
in
let
p
=
binormal_params
~
sigma_pos
:
sigma
~
sigma_neg
:
sigma
alpha
in
let
rng
=
Rng
.(
make
(
default
()
))
in
Rng
.
set
rng
(
Nativeint
.
of_int
seed
)
;
let
samples
=
List
.
init
1_000
~
f
:
(
fun
_
->
binormal_simulation
~
n
rng
p
)
in
let
compute
f
=
List
.
map
samples
~
f
|>
Numeric
.
of_list
|>
Numeric
.
to_sexp
in
let
trapezoidal_lt
=
compute
auc_trapezoidal_lt
in
let
average_precision
=
compute
auc_average_precision
in
let
l
=
List_
.
create
[
Some
"trapezoidal"
,
trapezoidal_lt
;
Some
"average_precision"
,
average_precision
;
]
in
OCamlR_graphics
.
list_boxplot
l
let
logit
p
=
Float
.
log
(
p
/.
(
1
.
-.
p
))
let
sigmoid
x
=
let
exp_x
=
Float
.
exp
x
in
exp_x
/.
(
1
.
+.
exp_x
)
let
logit_confidence_interval
~
alpha
~
theta_hat
~
n
=
let
eta_hat
=
logit
theta_hat
in
let
tau_hat
=
(
float
n
*.
theta_hat
*.
(
1
.
-.
theta_hat
))
**
(
-
0
.
5
)
in
let
delta
=
tau_hat
*.
Cdf
.
gaussian_Pinv
~
sigma
:
1
.
~
p
:
(
1
.
-.
alpha
/.
2
.
)
in
sigmoid
(
eta_hat
-.
delta
)
,
sigmoid
(
eta_hat
+.
delta
)
let
logit_confidence_interval_coverage_test
?
(
seed
=
42
)
?
(
sigma
=
1
.
)
~
n
~
alpha
()
=
let
p
=
binormal_params
~
sigma_pos
:
sigma
~
sigma_neg
:
sigma
alpha
in
let
rng
=
Rng
.(
make
(
default
()
))
in
Rng
.
set
rng
(
Nativeint
.
of_int
seed
)
;
let
nsims
=
1_000
in
let
samples
=
List
.
init
nsims
~
f
:
(
fun
_
->
binormal_simulation
~
n
rng
p
)
in
let
npos
=
List
.
map
samples
~
f
:
(
fun
(
Classification_data
xs
)
->
List
.
count
xs
~
f
:
snd
)
in
let
estimates
=
List
.
map
samples
~
f
:
auc_trapezoidal_lt
in
let
confidence_intervals
=
List
.
map2_exn
estimates
npos
~
f
:
(
fun
theta_hat
n
->
logit_confidence_interval
~
alpha
:
0
.
05
~
theta_hat
~
n
)
in
let
best_estimate
=
Array
.
of_list
estimates
|>
Stats
.
mean
in
let
nsuccessess
=
List
.
count
confidence_intervals
~
f
:
(
fun
(
lb
,
ub
)
->
Float
.(
lb
<=
best_estimate
&&
best_estimate
<=
ub
))
in
float
nsuccessess
/.
float
nsims
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment