Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
d4686c3b
Commit
d4686c3b
authored
Nov 11, 2020
by
Philippe Veber
Browse files
evaluation by average precision
parent
12b017f6
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
71 additions
and
7 deletions
+71
-7
app/lmm_benchmark.ml
app/lmm_benchmark.ml
+6
-6
lib/dune
lib/dune
+3
-1
lib/utils.ml
lib/utils.ml
+62
-0
No files found.
app/lmm_benchmark.ml
View file @
d4686c3b
...
...
@@ -14,21 +14,21 @@ let besnard2009 = {
tree
=
Bistro
.
Workflow
.
input
"data/besnard2009/besnard2009.nhx"
;
rooted
=
true
;
branch_scale
=
3
.
;
ne_s
=
2
.,
2
.
;
ne_s
=
4
.,
4
.
;
}
let
oneline_rodent
=
{
tree
=
Bistro
.
Workflow
.
input
"data/online_rodent/online_rodent.nhx"
;
rooted
=
true
;
branch_scale
=
3
.
;
ne_s
=
2
.,
2
.
;
ne_s
=
4
.,
4
.
;
}
let
rubisco
=
{
tree
=
Rubisco_dataset
.(
Path
"data/rubisco"
|>
Query
.
tree
~
branch_length_unit
:
`Amino_acid
)
;
rooted
=
false
;
branch_scale
=
3
.
;
ne_s
=
2
.,
2
.
;
ne_s
=
4
.,
4
.
;
}
let
orthomam_echolocation
=
{
...
...
@@ -56,7 +56,7 @@ let meth ?(col = 1) ?(requires_rooted_tree = false) result label =
let
methods
=
Simulation_dataset
.[
meth
tdg09
"tdg09"
;
meth
pcoc
~
col
:
3
"pcoc"
;
meth
pcoc_v2
~
col
:
3
"pcoc v2"
;
(*
meth pcoc_v2 ~col:3 "pcoc v2" ;
*)
meth
(
gemma
~
lmm_test
:
`Score
~
relatedness_mode
:
`Standardized
)
"gemma"
;
meth
inhouse_lmm
"LMM"
;
meth
multinomial_asymptotic_lrt
"multinomial"
;
...
...
@@ -66,7 +66,7 @@ let methods = Simulation_dataset.[
let
benchmark
{
tree
=
t
;
rooted
;
ne_s
;
branch_scale
}
=
let
open
Simulation_dataset
in
let
sim
=
convdet_simulation
~
tree
:
(
NHX
t
)
~
branch_scale
~
ne_s
convdet_simulation
~
seed
:
3
~
tree
:
(
NHX
t
)
~
branch_scale
~
ne_s
~
profiles
:
"example/aa_fitness/263SelectedProfiles.tsv"
~
n_h0
:
900
~
n_ha
:
100
()
in
...
...
@@ -78,7 +78,7 @@ let benchmark { tree = t ; rooted ; ne_s ; branch_scale } =
)
|>
List
.
unzip
in
Utils
.
recall
_precision_
curve
~
oracle
:
(
oracle
sim
)
~
labels
~
results
Utils
.
average
_precision_
plot
~
oracle
:
(
oracle
sim
)
~
labels
~
results
let
report
=
let
module
H
=
Bistro_utils
.
Html_report
in
...
...
lib/dune
View file @
d4686c3b
(library
(name codepi)
(libraries core biotk biotope bistro.utils codepitk containers gsl ocaml-r.graphics ocaml-r.grDevices phylogenetics.convergence)
(libraries core biotk biotope bistro.utils codepitk containers gsl
ocaml-r.graphics ocaml-r.grDevices phylogenetics.convergence
prc)
(preprocess
(pps ppx_jane ppx_csv_conv bistro.ppx ppx_here)))
...
...
lib/utils.ml
View file @
d4686c3b
...
...
@@ -76,6 +76,68 @@ let%pworkflow recall_precision_curve ~labels ~oracle ~results : pdf file =
OCamlR_graphics
.
legend
~
pch
:
[
|
19
|
]
~
col
:
colors
`topright
(
Array
.
of_list
labels
)
;
OCamlR_grDevices
.
dev_off
()
let
%
pworkflow
average_precision_plot
~
labels
~
oracle
~
results
:
pdf
file
=
let
open
Codepitk
in
let
n
=
List
.
length
labels
in
assert
(
n
=
List
.
length
results
)
;
let
labels
=
[
%
param
labels
]
in
let
load_results
fn
col
=
let
df
=
Dataframe
.
from_file
fn
|>
Rresult
.
R
.
failwith_error_msg
in
match
Dataframe
.
get_col
df
col
with
|
Some
(
Floats
xs
)
->
Array
.
map
xs
~
f
:
Option
.
some
|
Some
(
Float_opts
xs
)
->
xs
|
Some
(
Ints
xs
)
->
Array
.
map
xs
~
f
:
(
fun
i
->
Some
(
Float
.
of_int
i
))
|
Some
(
Int_opts
xs
)
->
Array
.
map
xs
~
f
:
(
Option
.
map
~
f
:
Float
.
of_int
)
|
_
->
failwith
"expected a numeric column at pos 1"
in
let
oracle
=
load_results
[
%
path
oracle
]
1
|>
Array
.
map
~
f
:
(
Option
.
map
~
f
:
Float
.((
<>
)
0
.
))
in
let
result_paths
=
[
%
eval
Bistro
.
Workflow
.
path_list
(
List
.
map
~
f
:
fst
results
)]
in
let
results
=
List
.
map2_exn
result_paths
(
List
.
map
~
f
:
snd
results
)
~
f
:
load_results
in
let
make_classification_data
x
y
=
Prc
.
Classification_data
(
List
.
init
(
Array
.
length
x
)
~
f
:
(
fun
i
->
match
x
.
(
i
)
,
y
.
(
i
)
with
|
Some
x_i
,
Some
y_i
->
Some
(
x_i
,
y_i
)
|
None
,
_
|
_
,
None
->
None
)
|>
List
.
filter_opt
)
in
let
estimates
,
lower_bounds
,
upper_bounds
=
List
.
map
results
~
f
:
(
fun
scores
->
let
Prc
.
Classification_data
xs
as
data
=
make_classification_data
scores
oracle
in
let
n
=
List
.
count
xs
~
f
:
snd
in
let
theta_hat
=
Prc
.
auc_trapezoidal_lt
data
in
let
lb
,
ub
=
Prc
.
logit_confidence_interval
~
alpha
:
0
.
05
~
theta_hat
~
n
in
theta_hat
,
lb
,
ub
)
|>
List
.
unzip3
in
let
open
OCamlR_base
in
let
df
=
Dataframe
.
create
[
"method"
,
`Character
(
Character
.
of_list
labels
)
;
"estimate"
,
`Numeric
(
Numeric
.
of_list
estimates
)
;
"lower_bound"
,
`Numeric
(
Numeric
.
of_list
lower_bounds
)
;
"upper_bound"
,
`Numeric
(
Numeric
.
of_list
upper_bounds
)
;
]
in
let
eval
s
=
ignore
(
OCamlR
.
eval_string
s
:
OCamlR
.
sexp
)
in
eval
{
|
library
(
ggplot2
)
|
}
;
eval
{
|
average_precision
<-
function
(
df
)
{
print
(
ggplot
(
df
,
aes
(
x
=
method
,
y
=
estimate
))
+
geom_errorbar
(
aes
(
ymin
=
lower_bound
,
ymax
=
upper_bound
)
,
width
=
0
.
05
,
size
=
0
.
5
)
+
geom_point
(
shape
=
20
,
size
=
4
)
+
theme_bw
()
+
ylab
(
"Average precision"
)
+
ylim
(
0
,
1
))
}
|
}
;
OCamlR_grDevices
.
pdf
[
%
dest
]
;
OCamlR
.(
(
call
(
symbol
"average_precision"
)
[
arg
Dataframe
.
to_sexp
df
]
:
OCamlR
.
sexp
)
|>
ignore
)
;
OCamlR_grDevices
.
dev_off
()
let
%
pworkflow
[
@
version
2
]
newick_of_nhx
(
t
:
nhx
file
)
:
newick
file
=
let
tree_file
=
[
%
path
t
]
in
let
open
Phylogenetics
in
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment