Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
702df100
Commit
702df100
authored
Nov 09, 2020
by
Philippe Veber
Browse files
refactored simulation to use tk
parent
194e8dde
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
60 additions
and
101 deletions
+60
-101
lib/simulation_dataset.ml
lib/simulation_dataset.ml
+10
-10
lib/simulation_dataset.mli
lib/simulation_dataset.mli
+1
-1
lib/simulator.ml
lib/simulator.ml
+37
-87
lib/simulator.mli
lib/simulator.mli
+12
-3
No files found.
lib/simulation_dataset.ml
View file @
702df100
...
...
@@ -32,7 +32,7 @@ type t =
}
|
Convdet_simulation
of
{
tree
:
tree
;
branch_
factor
:
float
;
branch_
scale
:
float
;
profiles
:
string
;
n_h0
:
int
;
n_ha
:
int
;
...
...
@@ -60,7 +60,7 @@ let bppseqgen_simulation ~hyp ~tree ~profiles ~nb_sites ~seed =
seed
;
}
let
convdet_simulation
?
(
branch_
factor
=
1
.
)
?
(
ne_s
=
1
.,
1
.
)
?
(
gBGC
=
0
.,
0
.
)
?
(
seed
=
0
)
~
tree
~
profiles
~
n_h0
~
n_ha
()
=
let
convdet_simulation
?
(
branch_
scale
=
1
.
)
?
(
ne_s
=
1
.,
1
.
)
?
(
gBGC
=
0
.,
0
.
)
?
(
seed
=
0
)
~
tree
~
profiles
~
n_h0
~
n_ha
()
=
Convdet_simulation
{
tree
;
profiles
;
...
...
@@ -68,7 +68,7 @@ let convdet_simulation ?(branch_factor = 1.) ?(ne_s = 1., 1.) ?(gBGC = 0., 0.) ?
n_ha
;
ne_s
;
gBGC
;
branch_
factor
;
branch_
scale
;
seed
:
int
;
}
...
...
@@ -127,11 +127,11 @@ let rec nucleotide_alignment = function
let
h0
=
nucleotide_alignment
(
Bppseqgen_simulation
{
hypothesis
=
H0
(
Fixed
ne_s
)
;
profiles
;
seed
;
nb_sites
=
n_h0
;
tree
})
in
let
ha
=
nucleotide_alignment
(
Bppseqgen_simulation
{
hypothesis
=
HaPC
(
Fixed
ne_s
)
;
profiles
;
seed
;
nb_sites
=
n_ha
;
tree
})
in
Utils
.
fasta_cappend
h0
ha
|
Convdet_simulation
{
n_h0
;
n_ha
;
profiles
;
ne_s
;
gBGC
;
branch_
factor
;
seed
;
_
}
as
sim
->
|
Convdet_simulation
{
n_h0
;
n_ha
;
profiles
;
ne_s
;
gBGC
;
branch_
scale
;
seed
;
_
}
as
sim
->
let
tree
=
tree
~
branch_length_unit
:
`Nucleotide
sim
in
let
fitness_profiles
=
Workflow
.
input
profiles
in
Simulator
.
simulat
or
~
branch_
factor
~
n_ha
~
n_h0
~
ne_s
~
gBGC
~
tree
~
seed
~
fitness_profiles
()
|>
fst
Simulator
.
simulat
ion
~
branch_
scale
~
n_ha
~
n_h0
~
ne_s
~
gBGC
~
tree
~
seed
~
fitness_profiles
()
|>
Simulator
.
alignment_of_simulation
include
Detection_pipeline
.
Make
(
struct
type
nonrec
t
=
t
...
...
@@ -207,8 +207,8 @@ let benchmark2 d =
]
(* param exploration for SMBE paper *)
(* type branch_
factor
_t = float *)
let
branch_
factor
_range
=
[
1
.;
3
.;
6
.;
9
.
]
(* type branch_
scale
_t = float *)
let
branch_
scale
_range
=
[
1
.;
3
.;
6
.;
9
.
]
type
gBGC_t
=
Global
of
float
|
Convergent
of
float
*
float
let
gBGC_range
=
...
...
@@ -221,7 +221,7 @@ let gBGC_range =
type
param_t
=
float
*
gBGC_t
let
explore_params
~
(
f
:
param_t
->
_
)
=
List
.
map
branch_
factor
_range
~
f
:
(
fun
(
bf
:
float
)
->
List
.
map
branch_
scale
_range
~
f
:
(
fun
(
bf
:
float
)
->
List
.
map
gBGC_range
~
f
:
(
fun
(
gbgc
:
gBGC_t
)
->
((
bf
,
gbgc
)
,
f
(
bf
,
gbgc
)))
)
|>
List
.
concat
...
...
@@ -230,7 +230,7 @@ let simu_of_param ?n_h0:(n_h0=50) (p: param_t) =
convdet_simulation
~
tree
:
(
NHX
"example/trees_analyses/C4AmaranthaceaePolyroot.nhx"
)
~
profiles
:
"example/aa_fitness/263SelectedProfiles.tsv"
~
branch_
factor
:
bf
~
branch_
scale
:
bf
~
gBGC
:
(
match
gbgc
with
|
Convergent
(
a
,
c
)
->
(
a
,
c
)
|
Global
g
->
(
g
,
g
))
...
...
lib/simulation_dataset.mli
View file @
702df100
...
...
@@ -29,7 +29,7 @@ val bppseqgen_simulation :
t
val
convdet_simulation
:
?
branch_
factor
:
float
->
?
branch_
scale
:
float
->
?
ne_s
:
float
*
float
->
?
gBGC
:
float
*
float
->
?
seed
:
int
->
...
...
lib/simulator.ml
View file @
702df100
open
Core_kernel
let
%
p
workflow
simulat
or
?
branch_
factor
?
seed
~
n_h0
~
n_ha
~
ne_s
:
(
ne_s0
,
ne_s1
)
~
gBGC
:
(
gBGC0
,
gBGC1
)
~
tree
~
fitness_profiles
()
=
let
%
workflow
simulat
ion
?
branch_
scale
?
seed
~
n_h0
~
n_ha
~
ne_s
:
(
ne_s0
,
ne_s1
)
~
gBGC
:
(
gBGC0
,
gBGC1
)
~
tree
~
fitness_profiles
()
=
let
open
Phylogenetics
in
let
module
Convsim
=
Phylogenetics_convergence
.
Simulator
in
let
()
=
Option
.
iter
~
f
:
Random
.
init
[
%
param
seed
]
in
let
n_h0
=
[
%
param
n_h0
]
in
let
n_ha
=
[
%
param
n_ha
]
in
...
...
@@ -10,97 +9,48 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
let
ne_s1
=
[
%
param
ne_s1
]
in
let
gBGC0
=
[
%
param
gBGC0
]
in
let
gBGC1
=
[
%
param
gBGC1
]
in
let
branch_factor
=
[
%
param
branch_factor
]
in
let
tree
=
Codepitk
.
Utils
.
tree_from_file
?
alpha
:
branch_factor
[
%
path
tree
]
in
let
fitness_profiles
=
Phylogenetics_convergence
.
Profile_tsv
.(
read
[
%
path
fitness_profiles
]
|>
to_fitness
)
in
let
rescale_fitness
beta
=
Amino_acid
.
Vector
.
map
~
f
:
((
*.
)
beta
)
in
let
base_param
=
let
p
=
Mutsel
.
random_param
~
alpha_nucleotide
:
10
.
~
alpha_fitness
:
0
.
1
in
{
p
with
omega
=
1
.
}
in
let
random_profile
beta
=
Random
.
int
(
Array
.
length
fitness_profiles
)
|>
Array
.
get
fitness_profiles
|>
Amino_acid
.
Vector
.
of_array_exn
|>
rescale_fitness
beta
in
let
h0_params
=
Array
.
init
n_h0
~
f
:
(
fun
_
->
let
profile
=
random_profile
1
.
in
let
p
=
{
base_param
with
gBGC
=
gBGC0
;
scaled_fitness
=
rescale_fitness
ne_s0
profile
}
in
let
q
=
{
base_param
with
gBGC
=
gBGC1
;
scaled_fitness
=
rescale_fitness
ne_s1
profile
}
in
(
p
,
q
)
)
in
let
most_probable_aa
(
pref
:
Amino_acid
.
vector
)
=
let
pref
=
Amino_acid
.
Vector
.
to_array
pref
in
let
arr
=
Array
.
mapi
pref
~
f
:
(
fun
i
x
->
x
,
i
)
in
match
Array
.
max_elt
~
compare
:
Poly
.
compare
arr
with
|
None
->
assert
false
|
Some
(
_
,
i
)
->
i
in
let
ha_params
=
Array
.
init
n_ha
~
f
:
(
fun
_
->
let
rec
loop
()
=
let
p
=
random_profile
ne_s0
in
let
q
=
random_profile
ne_s1
in
if
Poly
.
equal
p
q
||
most_probable_aa
p
=
most_probable_aa
q
then
loop
()
else
let
p
=
{
base_param
with
scaled_fitness
=
p
;
gBGC
=
gBGC0
}
in
let
q
=
{
base_param
with
scaled_fitness
=
q
;
gBGC
=
gBGC1
}
in
(
p
,
q
)
in
loop
()
)
in
let
params_of_cond
i
cond
=
let
p
,
q
=
if
i
<
n_h0
then
h0_params
.
(
i
)
else
ha_params
.
(
i
-
n_h0
)
in
match
cond
with
|
`Ancestral
->
p
|
`Convergent
->
q
let
branch_scale
=
[
%
param
branch_scale
]
in
let
tree
=
Codepitk
.
Convergence_tree
.
from_file
[
%
path
tree
]
|>
Rresult
.
R
.
get_ok
in
let
params
i
bi
=
params_of_cond
i
(
Convsim
.
Branch_info
.
condition
bi
)
let
fitness_profiles
=
Phylogenetics_convergence
.
Profile_tsv
.(
read
[
%
path
fitness_profiles
]
|>
to_fitness
|>
Array
.
map
~
f
:
Amino_acid
.
Vector
.
of_array_exn
)
in
Codepitk
.
Simulator
.
Site_independent_mutsel
.
simulation
?
branch_scale
?
seed
~
n_h0
~
n_ha
~
gBGC
:
(
gBGC0
,
gBGC1
)
~
ne_s
:
(
ne_s0
,
ne_s1
)
~
tree
~
fitness_profiles
()
let
%
pworkflow
alignment_of_simulation
sim
=
let
sim
:
Codepitk
.
Simulator
.
Site_independent_mutsel
.
simulation
=
[
%
eval
sim
]
in
let
root_condition
=
`Ancestral
in
let
root_dists
=
Array
.
init
(
n_h0
+
n_ha
)
~
f
:
(
fun
i
->
Mutsel
.
stationary_distribution
(
params_of_cond
i
root_condition
)
|>
Mutsel
.
NSCodon
.
Vector
.
to_array
|>
Mutsel
.
NSCodon
.
Table
.
of_array_exn
let
species_name
=
Phylogenetics
.
Tree
.
leaves
sim
.
tree
in
Out_channel
.
with_file
[
%
dest
]
~
f
:
(
fun
oc
->
List
.
iter2_exn
species_name
sim
.
sequences
~
f
:
(
fun
description
sequence
->
fprintf
oc
">%s
\n
%s
\n
"
description
(
sequence
:>
string
)
)
)
let
%
pworkflow
fitness_histogram
sim
=
let
open
Phylogenetics
in
let
sim
:
Codepitk
.
Simulator
.
Site_independent_mutsel
.
simulation
=
[
%
eval
sim
]
in
let
root
=
Convsim
.
Mutsel
.
hmm0
~
len
:
(
n_h0
+
n_ha
)
~
dist
:
(
Array
.
get
root_dists
)
in
let
ali
=
Convsim
.
Mutsel
.
alignment
tree
~
root
params
in
let
species_name
=
Phylogenetics
.
Tree
.
leaves
tree
|>
List
.
map
~
f
:
(
fun
{
name
}
->
Option
.
value_exn
name
)
in
let
save_alignment
fn
=
Out_channel
.
with_file
fn
~
f
:
(
fun
oc
->
List
.
iter2_exn
species_name
ali
~
f
:
(
fun
description
sequence
->
fprintf
oc
">%s
\n
%s
\n
"
description
(
sequence
:>
string
)
)
let
params
=
Array
.
append
sim
.
h0_params
sim
.
ha_params
in
let
data
=
Array
.
fold
params
~
init
:
[]
~
f
:
(
fun
acc
(
p
,
q
)
->
(
Amino_acid
.
Vector
.
to_array
p
.
scaled_fitness
)
::
(
Amino_acid
.
Vector
.
to_array
q
.
scaled_fitness
)
::
acc
)
|>
Array
.
concat
in
let
save_fitness_histogram
dest
=
let
data
=
Array
.
fold
(
Array
.
append
h0_params
ha_params
)
~
init
:
[]
~
f
:
(
fun
acc
(
p
,
q
)
->
(
Amino_acid
.
Vector
.
to_array
p
.
scaled_fitness
)
::
(
Amino_acid
.
Vector
.
to_array
q
.
scaled_fitness
)
::
acc
)
|>
Array
.
concat
in
OCamlR_grDevices
.
pdf
dest
;
ignore
(
OCamlR_graphics
.
hist
data
:
OCamlR_graphics
.
hist
)
;
OCamlR_grDevices
.
dev_off
()
in
Core
.
Unix
.
mkdir_p
[
%
dest
]
;
save_alignment
(
Filename
.
concat
[
%
dest
]
"simulation.fa"
)
;
save_fitness_histogram
(
Filename
.
concat
[
%
dest
]
"fitness_histogram.pdf"
)
let
simulator
?
branch_factor
?
seed
~
n_h0
~
n_ha
~
ne_s
~
gBGC
~
tree
~
fitness_profiles
()
=
let
dir
=
simulator
?
branch_factor
?
seed
~
n_h0
~
n_ha
~
ne_s
~
gBGC
~
tree
~
fitness_profiles
()
in
Bistro
.
Workflow
.
select
dir
[
"simulation.fa"
]
,
Bistro
.
Workflow
.
select
dir
[
"fitness_histogram.pdf"
]
OCamlR_grDevices
.
pdf
[
%
dest
]
;
ignore
(
OCamlR_graphics
.
hist
data
:
OCamlR_graphics
.
hist
)
;
OCamlR_grDevices
.
dev_off
()
let
%
pworkflow
pair_tree
~
branch_length1
~
branch_length2
~
npairs
=
let
open
Phylogenetics
in
...
...
lib/simulator.mli
View file @
702df100
open
Bistro
open
File_formats
open
Codepitk
val
simulat
or
:
?
branch_
factor
:
float
->
val
simulat
ion
:
?
branch_
scale
:
float
->
?
seed
:
int
->
n_h0
:
int
->
n_ha
:
int
->
...
...
@@ -11,7 +12,15 @@ val simulator :
tree
:
nhx
file
->
fitness_profiles
:#
text
file
->
unit
->
nucleotide_fasta
file
*
pdf
file
Simulator
.
Site_independent_mutsel
.
simulation
workflow
val
alignment_of_simulation
:
Simulator
.
Site_independent_mutsel
.
simulation
workflow
->
nucleotide_fasta
file
val
fitness_histogram
:
Simulator
.
Site_independent_mutsel
.
simulation
workflow
->
pdf
file
val
pair_tree
:
branch_length1
:
float
->
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment