Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
VEBER Philippe
codepi
Commits
8d3102fe
Commit
8d3102fe
authored
Nov 17, 2020
by
Philippe Veber
Browse files
Simulation_pipeline.Mutsel: separate benchmark of its rds output
parent
54d6b1a3
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
67 additions
and
52 deletions
+67
-52
lib/simulation_pipeline.ml
lib/simulation_pipeline.ml
+62
-44
lib/simulation_pipeline.mli
lib/simulation_pipeline.mli
+5
-8
No files found.
lib/simulation_pipeline.ml
View file @
8d3102fe
...
...
@@ -79,6 +79,8 @@ end
module
Mutsel
=
struct
open
Phylogenetics
type
query
=
Mutsel_query
.
t
let
query
=
Mutsel_query
.
make
let
simulation
=
Mutsel_query
.
simulation
...
...
@@ -87,36 +89,34 @@ module Mutsel = struct
type
benchmark
=
{
n_h0
:
int
;
n_ha
:
int
;
method_labels
:
string
list
;
method_outputs
:
float
option
array
list
;
average_precision
:
(
float
*
(
float
*
float
))
list
;
site_model
:
[
`Convergent
|
`Non_convergent
]
array
;
ancestral_counts
:
int
Phylogenetics
.
Amino_acid
.
table
array
;
convergent_counts
:
int
Phylogenetics
.
Amino_acid
.
table
array
;
profiles
:
(
float
array
*
float
array
)
array
;
ancestral_counts
:
int
Amino_acid
.
table
array
;
convergent_counts
:
int
Amino_acid
.
table
array
;
}
let
%
p
workflow
benchmark
_statistics
simulation
~
result
s
=
let
%
workflow
benchmark
q
method
s
=
let
open
Phylogenetics
in
let
open
Codepitk
in
let
open
OCamlR_base
in
let
open
Codepitk
.
Simulator
.
Site_independent_mutsel
in
let
module
Codon
=
Codon
.
Universal_genetic_code
.
NS
in
let
sim
:
simulation
=
[
%
eval
simulation
]
in
let
result_paths
=
[
%
eval
Bistro
.
Workflow
.
path_list
results
]
in
let
sim
:
simulation
=
[
%
eval
simulation
q
]
in
let
result_paths
=
[
%
eval
Bistro
.
Workflow
.
path_list
(
List
.
map
methods
~
f
:
(
fun
f
->
f
q
))
]
in
let
results
=
List
.
map
result_paths
~
f
:
Cpt
.
of_file
|>
Result
.
all
|>
Rresult
.
R
.
failwith_error_msg
|>
List
.
concat_map
~
f
:
Cpt
.
columns
in
let
label
s
=
List
.
ma
p
results
~
f
:
fst
in
let
method_labels
,
method_output
s
=
List
.
unzi
p
results
in
let
n_h0
=
Array
.
length
sim
.
h0_params
in
let
n_ha
=
Array
.
length
sim
.
ha_params
in
let
nsites
=
n_h0
+
n_ha
in
let
columns
=
List
.
map
results
~
f
:
(
fun
(
l
,
r
)
->
l
,
`Numeric
(
Numeric
.
of_array_opt
r
)
)
in
let
amino_acid_vector_of_codon_vector
xs
=
Amino_acid
.
Vector
.
init
(
fun
aa
->
List
.
fold
Codon
.
all
~
init
:
0
.
~
f
:
(
fun
acc
c
->
...
...
@@ -126,32 +126,28 @@ module Mutsel = struct
)
)
in
let
collect_profiles
sel
=
let
compute_profile
p
=
Mutsel
.
stationary_distribution
p
|>
amino_acid_vector_of_codon_vector
|>
Amino_acid
.
Vector
.
to_array
in
let
profiles
=
Array
.
append
sim
.
h0_params
sim
.
ha_params
|>
Array
.
map
~
f
:
(
fun
p
->
sel
p
|>
Mutsel
.
stationary_distribution
|>
amino_acid_vector_of_codon_vector
|>
Amino_acid
.
Vector
.
to_array
|>
Array
.
map
~
f
:
(
fun
(
p1
,
p2
)
->
compute_profile
p1
,
compute_profile
p2
)
|>
Numeric
.
Matrix
.
of_arrays
in
let
ancestral_profiles
=
collect_profiles
fst
in
let
convergent_profiles
=
collect_profiles
snd
in
let
counts
seqs
i
=
let
t
=
Amino_acid
.
Table
.
init
(
fun
aa
->
let
aa
=
Amino_acid
.
to_char
aa
in
List
.
count
seqs
~
f
:
(
fun
s
->
let
codon_str
=
String
.
sub
(
s
:
Dna
.
t
:>
string
)
~
pos
:
(
i
*
3
)
~
len
:
3
in
let
codon
=
match
Codon
.
of_string
codon_str
with
|
Some
c
->
c
|
None
->
assert
false
in
Char
.
equal
(
Amino_acid
.
to_char
(
Codon
.
aa_of_codon
codon
))
aa
)
)
in
(
t
:>
int
array
)
Amino_acid
.
Table
.
init
(
fun
aa
->
let
aa
=
Amino_acid
.
to_char
aa
in
List
.
count
seqs
~
f
:
(
fun
s
->
let
codon_str
=
String
.
sub
(
s
:
Dna
.
t
:>
string
)
~
pos
:
(
i
*
3
)
~
len
:
3
in
let
codon
=
match
Codon
.
of_string
codon_str
with
|
Some
c
->
c
|
None
->
assert
false
in
Char
.
equal
(
Amino_acid
.
to_char
(
Codon
.
aa_of_codon
codon
))
aa
)
)
in
let
collect_counts
cond
=
let
species
=
Convergence_tree
.
leaves
sim
.
tree
in
...
...
@@ -162,7 +158,6 @@ module Mutsel = struct
|>
List
.
filter_opt
in
Array
.
init
nsites
~
f
:
(
counts
seqs
)
|>
Integer
.
Matrix
.
of_arrays
in
let
ancestral_counts
=
collect_counts
`Ancestral
in
let
convergent_counts
=
collect_counts
`Convergent
in
...
...
@@ -176,33 +171,56 @@ module Mutsel = struct
|>
List
.
filter_opt
)
in
let
estimates
,
lower_bounds
,
upper_bounds
=
let
average_precision
=
let
oracle
=
Array
.
init
nsites
~
f
:
(
fun
i
->
if
i
<
n_h0
then
Some
false
else
Some
true
)
in
List
.
map
results
~
f
:
(
fun
(
_
,
scores
)
->
let
Prc
.
Classification_data
xs
as
data
=
make_classification_data
scores
oracle
in
let
n
=
List
.
count
xs
~
f
:
snd
in
let
theta_hat
=
Prc
.
auc_trapezoidal_lt
data
in
let
lb
,
ub
=
Prc
.
logit_confidence_interval
~
alpha
:
0
.
05
~
theta_hat
~
n
in
theta_hat
,
lb
,
ub
theta_hat
,
(
lb
,
ub
)
)
|>
List
.
unzip3
in
{
method_labels
;
method_outputs
;
ancestral_counts
;
convergent_counts
;
average_precision
;
profiles
;
n_h0
;
n_ha
}
let
%
pworkflow
rds_of_benchmark
b
=
let
{
average_precision
;
method_labels
;
n_h0
;
n_ha
;
method_outputs
;
profiles
;
ancestral_counts
;
convergent_counts
}
=
[
%
eval
b
]
in
let
open
OCamlR_base
in
let
auc_estimates
=
Dataframe
.
create
[
"method"
,
`Character
(
Character
.
of_list
labels
)
;
let
collect_profiles
sel
=
Array
.
map
profiles
~
f
:
sel
|>
Numeric
.
Matrix
.
of_arrays
in
let
ancestral_profiles
=
collect_profiles
fst
in
let
convergent_profiles
=
collect_profiles
snd
in
let
collect_counts
c
=
Array
.
map
c
~
f
:
(
fun
c
->
(
c
:
int
Amino_acid
.
table
:>
int
array
))
|>
Integer
.
Matrix
.
of_arrays
in
let
ancestral_counts
=
collect_counts
ancestral_counts
in
let
convergent_counts
=
collect_counts
convergent_counts
in
let
auc_estimates
=
let
estimates
,
bounds
=
List
.
unzip
average_precision
in
let
lower_bounds
,
upper_bounds
=
List
.
unzip
bounds
in
Dataframe
.
create
[
"method"
,
`Character
(
Character
.
of_list
method_labels
)
;
"estimate"
,
`Numeric
(
Numeric
.
of_list
estimates
)
;
"lower_bound"
,
`Numeric
(
Numeric
.
of_list
lower_bounds
)
;
"upper_bound"
,
`Numeric
(
Numeric
.
of_list
upper_bounds
)
;
]
in
let
oracle
=
Array
.(
append
(
map
sim
.
h0_profiles
~
f
:
(
Fn
.
const
false
))
(
map
sim
.
ha_profiles
~
f
:
(
Fn
.
const
true
))
)
Array
.
init
(
n_h0
+
n_ha
)
~
f
:
(
fun
i
->
if
i
<
n_h0
then
false
else
true
)
|>
Logical
.
of_array
in
let
columns
=
List
.
map2_exn
method_labels
method_outputs
~
f
:
(
fun
l
r
->
l
,
`Numeric
(
Numeric
.
of_array_opt
r
)
)
in
let
results
=
Dataframe
.
create
columns
in
let
data
=
List_
.
create
[
Some
"results"
,
Dataframe
.
to_sexp
results
;
...
...
lib/simulation_pipeline.mli
View file @
8d3102fe
...
...
@@ -38,23 +38,20 @@ module Mutsel : sig
type
benchmark
=
{
n_h0
:
int
;
n_ha
:
int
;
method_labels
:
string
list
;
method_outputs
:
float
option
array
list
;
average_precision
:
(
float
*
(
float
*
float
))
list
;
site_model
:
[
`Convergent
|
`Non_convergent
]
array
;
profiles
:
(
float
array
*
float
array
)
array
;
ancestral_counts
:
int
Phylogenetics
.
Amino_acid
.
table
array
;
convergent_counts
:
int
Phylogenetics
.
Amino_acid
.
table
array
;
}
val
benchmark_statistics
:
Codepitk
.
Simulator
.
Site_independent_mutsel
.
simulation
workflow
->
results
:
cpt
file
list
->
binary_file
file
val
benchmark
:
query
->
(
query
->
cpt
file
)
list
->
benchmark
workflow
(* val benchmark : t -> (t -> cpt file) list -> benchmark workflow
*
* val rds_of_benchmark : benchmark workflow -> rds file *)
val
rds_of_benchmark
:
benchmark
workflow
->
rds
file
(** stuff for gbgc SBME paper *)
type
gBGC_t
=
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment