Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
2079ac34
Commit
2079ac34
authored
Nov 09, 2020
by
Philippe Veber
Browse files
lmm benchmark
parent
eb0e6316
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
107 additions
and
43 deletions
+107
-43
app/diffseldsparse_benchmark.ml
app/diffseldsparse_benchmark.ml
+1
-1
app/lmm_benchmark.ml
app/lmm_benchmark.ml
+95
-20
lib/simulation_dataset.ml
lib/simulation_dataset.ml
+7
-19
lib/simulation_dataset.mli
lib/simulation_dataset.mli
+2
-1
lib/utils.ml
lib/utils.ml
+2
-2
No files found.
app/diffseldsparse_benchmark.ml
View file @
2079ac34
...
...
@@ -5,7 +5,7 @@ let main ~n_h0 ~n_ha ~seed:i () =
let
open
Simulation_dataset
in
let
sim
=
bppseqgen_mixed_simulation
~
tree
:
(
NHX
"example/trees_analyses/cyp_coding.Chrysithr_root.nhx"
)
~
tree
:
(
NHX
(
Bistro
.
Workflow
.
input
"example/trees_analyses/cyp_coding.Chrysithr_root.nhx"
)
)
~
profiles
:
"example/aa_fitness/263SelectedProfiles.tsv"
~
n_h0
~
n_ha
~
ne_s
:
4
.
~
seed
:
i
()
in
...
...
app/lmm_benchmark.ml
View file @
2079ac34
open
Core_kernel
open
Bistro
open
Codepi
open
Codepi
.
File_formats
let
benchmark
d
=
let
open
Simulation_dataset
in
Utils
.
recall_precision_curve
~
oracle
:
(
oracle
d
)
~
labels
:
[
|
"topological"
;
"multinomial"
;
"gemma_score"
;
"inhouse"
;
"pcoc"
;
(* "pcocv2" ; *)
"tdg09"
|
]
~
results
:
[
topological
d
,
1
;
multinomial_asymptotic_lrt
d
,
1
;
gemma
~
lmm_test
:
`Score
~
relatedness_mode
:
`Standardized
d
,
1
;
inhouse_lmm
d
,
1
;
pcoc
d
,
3
;
(* pcoc_v2 d ; *)
tdg09
d
,
1
;
]
type
dataset
=
{
tree
:
nhx
file
;
rooted
:
bool
;
branch_scale
:
float
;
ne_s
:
float
*
float
;
}
let
()
=
let
besnard2009
=
{
tree
=
Bistro
.
Workflow
.
input
"data/besnard2009/besnard2009.nhx"
;
rooted
=
true
;
branch_scale
=
3
.
;
ne_s
=
2
.,
2
.
;
}
let
oneline_rodent
=
{
tree
=
Bistro
.
Workflow
.
input
"data/online_rodent/online_rodent.nhx"
;
rooted
=
true
;
branch_scale
=
3
.
;
ne_s
=
2
.,
2
.
;
}
let
rubisco
=
{
tree
=
Rubisco_dataset
.(
Path
"data/rubisco"
|>
Query
.
tree
~
branch_length_unit
:
`Amino_acid
)
;
rooted
=
false
;
branch_scale
=
3
.
;
ne_s
=
2
.,
2
.
;
}
let
orthomam_echolocation
=
{
tree
=
(
Orthomam
.
tree_of_db
~
branch_length_unit
:
`Amino_acid
~
convergent_species
:
Orthomam
.
species_with_echolocation
(
Codepitk
.
Orthomam_db
.
make
"omm"
)
)
;
rooted
=
false
;
branch_scale
=
3
.
;
ne_s
=
4
.,
4
.
;
}
type
detection_method
=
{
result
:
Simulation_dataset
.
t
->
text
file
;
col
:
int
;
label
:
string
;
requires_rooted_tree
:
bool
;
}
let
meth
?
(
col
=
1
)
?
(
requires_rooted_tree
=
false
)
result
label
=
{
result
;
col
;
label
;
requires_rooted_tree
}
let
methods
=
Simulation_dataset
.[
meth
tdg09
"tdg09"
;
meth
pcoc
~
col
:
3
"pcoc"
;
meth
pcoc_v2
~
col
:
3
"pcoc v2"
;
meth
(
gemma
~
lmm_test
:
`Score
~
relatedness_mode
:
`Standardized
)
"gemma"
;
meth
inhouse_lmm
"LMM"
;
meth
multinomial_asymptotic_lrt
"multinomial"
;
meth
topological
"topological"
~
requires_rooted_tree
:
true
;
]
let
benchmark
{
tree
=
t
;
rooted
;
ne_s
;
branch_scale
}
=
let
open
Simulation_dataset
in
let
sim
=
let
open
Simulation_dataset
in
convdet_simulation
~
tree
:
(
NHX
"data/besnard2009/besnard2009.nhx"
)
convdet_simulation
~
tree
:
(
NHX
t
)
~
branch_scale
~
ne_s
~
profiles
:
"example/aa_fitness/263SelectedProfiles.tsv"
~
n_h0
:
900
~
n_ha
:
100
~
branch_scale
:
3
.
~
ne_s
:
(
2
.,
2
.
)
()
~
n_h0
:
900
~
n_ha
:
100
()
in
let
results
,
labels
=
List
.
filter_map
methods
~
f
:
(
fun
m
->
if
not
m
.
requires_rooted_tree
||
rooted
then
Some
((
m
.
result
sim
,
m
.
col
)
,
m
.
label
)
else
None
)
|>
List
.
unzip
in
let
w
=
benchmark
sim
in
Utils
.
recall_precision_curve
~
oracle
:
(
oracle
sim
)
~
labels
~
results
let
report
=
let
module
H
=
Bistro_utils
.
Html_report
in
H
.
make
~
title
:
"Codepi benchmark"
[
H
.
section
"Besnard 2009 dataset"
;
H
.
pdf
(
benchmark
besnard2009
)
;
H
.
section
"Rubisco dataset"
;
H
.
pdf
(
benchmark
rubisco
)
;
H
.
section
"Rodent dataset"
;
H
.
pdf
(
benchmark
oneline_rodent
)
;
H
.
section
"Orthomam/echolocation dataset"
;
H
.
pdf
(
benchmark
orthomam_echolocation
)
;
]
|>
H
.
render
let
()
=
let
w
=
report
in
let
loggers
=
[
Bistro_utils
.
Console_logger
.
create
()
]
in
Bistro_engine
.
Scheduler
.
simple_eval_exn
~
loggers
~
np
:
4
~
mem
:
(
`GB
4
)
(
Bistro
.
Workflow
.
path
w
)
|>
print_endline
lib/simulation_dataset.ml
View file @
2079ac34
open
Base
open
Printf
open
Bistro
open
File_formats
let
calc_fixed_seed
~
(
str
:
string
)
(
seed
:
int
)
:
int
=
let
str_hash
=
Hashtbl
.
hash
str
in
Hashtbl
.
hash
(
str_hash
+
seed
)
type
tree
=
|
NHX
of
string
|
NHX
of
nhx
file
|
Pair_tree
of
{
npairs
:
int
;
branch_length1
:
float
;
...
...
@@ -72,25 +73,12 @@ let convdet_simulation ?(branch_scale = 1.) ?(ne_s = 1., 1.) ?(gBGC = 0., 0.) ?(
seed
:
int
;
}
let
prefix_of_tree
=
function
|
NHX
path
->
Caml
.
Filename
.
chop_extension
path
|
Pair_tree
{
branch_length1
;
branch_length2
;
npairs
}
->
sprintf
"pair_tree(bl1=%g,bl2=%g,np=%d)"
branch_length1
branch_length2
npairs
let
tree_prefix
=
function
|
Bppseqgen_simulation
{
tree
;
_
}
|
Bppseqgen_mixed
{
tree
;
_
}
|
Convdet_simulation
{
tree
;
_
}
->
prefix_of_tree
tree
let
tree
~
branch_length_unit
:_
=
function
|
Bppseqgen_simulation
{
tree
;
_
}
|
Bppseqgen_mixed
{
tree
;
_
}
|
Convdet_simulation
{
tree
;
_
}
->
match
tree
with
|
NHX
path
->
Workflow
.
input
path
|
NHX
w
->
w
|
Pair_tree
{
branch_length1
;
branch_length2
;
npairs
}
->
...
...
@@ -110,7 +98,7 @@ let profile ~nb_sites ~profiles ~seed =
let
bppseqgen
sim
~
hypothesis
~
nb_sites
~
profiles
=
let
model_prefix
=
Convergence_hypothesis
.
string_of_model
hypothesis
in
let
descr
=
sprintf
".%s
.%s
"
model_prefix
(
tree_prefix
sim
)
in
let
descr
=
sprintf
".%s"
model_prefix
in
let
profile
=
profile
~
nb_sites
~
profiles
~
seed
:
(
seed
sim
)
in
let
profile_f
=
profile
.
profile_f
in
let
profile_c
=
profile
.
profile_c
in
...
...
@@ -162,7 +150,7 @@ let oracle d =
let
multinomial_benchmark
d
=
Utils
.
recall_precision_curve
~
oracle
:
(
oracle
d
)
~
labels
:
[
|
"LRT"
;
"LRTsim"
;
"sparse"
;
"sparse_sim"
|
]
~
labels
:
[
"LRT"
;
"LRTsim"
;
"sparse"
;
"sparse_sim"
]
~
results
:
[
multinomial_asymptotic_lrt
d
,
1
;
multinomial_simulation_lrt
d
,
1
;
...
...
@@ -197,7 +185,7 @@ let benchmark ?mode d =
let
benchmark2
d
=
Utils
.
recall_precision_curve
~
oracle
:
(
oracle
d
)
~
labels
:
[
|
"identical"
;
"topological"
;
"multinomial"
;
"pcoc"
;
"tdg09"
|
]
~
labels
:
[
"identical"
;
"topological"
;
"multinomial"
;
"pcoc"
;
"tdg09"
]
~
results
:
[
identical
d
,
1
;
topological
d
,
1
;
...
...
@@ -228,7 +216,7 @@ let explore_params ~(f: param_t -> _) =
let
simu_of_param
?
n_h0
:
(
n_h0
=
50
)
(
p
:
param_t
)
=
let
bf
,
gbgc
=
p
in
convdet_simulation
~
tree
:
(
NHX
"example/trees_analyses/C4AmaranthaceaePolyroot.nhx"
)
~
tree
:
(
NHX
(
Workflow
.
input
"example/trees_analyses/C4AmaranthaceaePolyroot.nhx"
)
)
~
profiles
:
"example/aa_fitness/263SelectedProfiles.tsv"
~
branch_scale
:
bf
~
gBGC
:
(
match
gbgc
with
...
...
lib/simulation_dataset.mli
View file @
2079ac34
open
Bistro
open
File_formats
type
tree
=
|
NHX
of
string
|
NHX
of
nhx
file
|
Pair_tree
of
{
npairs
:
int
;
branch_length1
:
float
;
...
...
lib/utils.ml
View file @
2079ac34
...
...
@@ -35,7 +35,7 @@ let%pworkflow fasta_cappend fa1 fa2 =
let
%
pworkflow
recall_precision_curve
~
labels
~
oracle
~
results
:
pdf
file
=
let
open
Codepitk
in
let
n
=
Array
.
length
labels
in
let
n
=
List
.
length
labels
in
assert
(
n
=
List
.
length
results
)
;
let
colors
=
Array
.
sub
[
|
"blue"
;
"green"
;
"red"
;
"tan1"
;
"royalblue4"
;
"darkorchid"
;
"yellow"
|
]
~
pos
:
0
~
len
:
n
in
let
labels
=
[
%
param
labels
]
in
...
...
@@ -73,7 +73,7 @@ let%pworkflow recall_precision_curve ~labels ~oracle ~results : pdf file =
OCamlR_grDevices
.
pdf
[
%
dest
]
;
OCamlR_graphics
.
plot
~
xlab
:
"Recall"
~
ylab
:
"Precision"
~
xlim
:
(
0
.,
1
.
)
~
ylim
:
(
0
.,
1
.
)
~
x
:
[
||
]
~
y
:
[
||
]
()
;
List
.
iteri
result_curves
~
f
:
(
fun
i
(
x
,
y
)
->
OCamlR_graphics
.
lines
~
col
:
colors
.
(
i
)
~
x
~
y
()
)
;
OCamlR_graphics
.
legend
~
pch
:
[
|
19
|
]
~
col
:
colors
`topright
labels
;
OCamlR_graphics
.
legend
~
pch
:
[
|
19
|
]
~
col
:
colors
`topright
(
Array
.
of_list
labels
)
;
OCamlR_grDevices
.
dev_off
()
let
%
pworkflow
[
@
version
2
]
newick_of_nhx
(
t
:
nhx
file
)
:
newick
file
=
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment