Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
VEBER Philippe
codepi
Commits
a3df223f
Commit
a3df223f
authored
Dec 13, 2019
by
Philippe Veber
Browse files
update wrt phylogenetics and bistro
parent
e5695953
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
56 additions
and
43 deletions
+56
-43
lib/alistats.ml
lib/alistats.ml
+3
-3
lib/dune
lib/dune
+1
-1
lib/file_formats.ml
lib/file_formats.ml
+0
-1
lib/multinomial.ml
lib/multinomial.ml
+12
-10
lib/simulator.ml
lib/simulator.ml
+24
-27
lib/toolbox/dune
lib/toolbox/dune
+1
-1
lib/toolbox/utils.ml
lib/toolbox/utils.ml
+15
-0
No files found.
lib/alistats.ml
View file @
a3df223f
open
Core
open
Core
open
Convdet
open
Phylogenetics
let
ok_exn
err
=
function
let
ok_exn
err
=
function
|
Ok
x
->
x
|
Ok
x
->
x
...
@@ -45,7 +45,7 @@ let nucleotide_fasta_gc ?pos fa =
...
@@ -45,7 +45,7 @@ let nucleotide_fasta_gc ?pos fa =
seq_gc
?
pos
seqs
seq_gc
?
pos
seqs
let
nucleotide_fasta_gc_ac
?
pos
tree
fa
=
let
nucleotide_fasta_gc_ac
?
pos
tree
fa
=
let
tree
=
Convdet
.
Simulator
.
tree_from_file
tree
in
let
tree
=
Reviewphiltrans_toolbox
.
Utils
.
tree_from_file
tree
in
let
seqs
=
strings_from_fasta
fa
in
let
seqs
=
strings_from_fasta
fa
in
let
leaf_state
=
let
leaf_state
=
Phylogenetics
.
Tree
.
map_leaves
tree
~
root
:
(
0
.,
0
)
~
f
:
(
fun
_
b
->
snd
b
=
0
)
Phylogenetics
.
Tree
.
map_leaves
tree
~
root
:
(
0
.,
0
)
~
f
:
(
fun
_
b
->
snd
b
=
0
)
...
@@ -76,7 +76,7 @@ let command =
...
@@ -76,7 +76,7 @@ let command =
main
~
alignment
main
~
alignment
]
]
let
%
pworkflow
histogram
(
fa
:
#
Bistro
_bioinfo
.
fasta
Bistro
.
pworkflow
)
=
let
%
pworkflow
histogram
(
fa
:
#
Bistro
.
fasta
Bistro
.
pworkflow
)
=
let
al
=
ok_exn
Alignment
.
show_parsing_error
@@
Alignment
.
from_fasta
[
%
path
fa
]
in
let
al
=
ok_exn
Alignment
.
show_parsing_error
@@
Alignment
.
from_fasta
[
%
path
fa
]
in
let
float_array_of_int_list
x
=
let
float_array_of_int_list
x
=
Array
.
of_list
x
Array
.
of_list
x
...
...
lib/dune
View file @
a3df223f
(library
(library
(name reviewphiltrans)
(name reviewphiltrans)
(libraries bistro.bioinfo bistro.utils
convdet
gzt ocaml-r.graphics ocaml-r.grDevices reviewphiltrans_toolbox )
(libraries bistro.bioinfo bistro.utils gzt ocaml-r.graphics ocaml-r.grDevices
phylogenetics.convergence
reviewphiltrans_toolbox )
(preprocess
(preprocess
(pps ppx_jane ppx_csv_conv bistro.ppx ppx_here)))
(pps ppx_jane ppx_csv_conv bistro.ppx ppx_here)))
...
...
lib/file_formats.ml
View file @
a3df223f
open
Bistro
open
Bistro
open
Bistro_bioinfo
class
type
nhx
=
object
class
type
nhx
=
object
inherit
text_file
inherit
text_file
...
...
lib/multinomial.ml
View file @
a3df223f
...
@@ -17,7 +17,7 @@ let multinomial ?(descr="") ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkfl
...
@@ -17,7 +17,7 @@ let multinomial ?(descr="") ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkfl
let
%
pworkflow
multinomial_ocaml_implementation
~
meth
~
(
tree_sc
:_
pworkflow
)
~
(
faa
:
aminoacid_fasta
pworkflow
)
(* : text_file pworkflow *)
=
let
%
pworkflow
multinomial_ocaml_implementation
~
meth
~
(
tree_sc
:_
pworkflow
)
~
(
faa
:
aminoacid_fasta
pworkflow
)
(* : text_file pworkflow *)
=
let
open
Phylogenetics
in
let
open
Phylogenetics
in
let
open
Convdet
in
let
open
Phylogenetics_convergence
in
let
module
MT
=
Multinomial_test
in
let
module
MT
=
Multinomial_test
in
let
meth
=
[
%
param
meth
]
in
let
meth
=
[
%
param
meth
]
in
let
test
=
match
meth
with
let
test
=
match
meth
with
...
@@ -28,20 +28,22 @@ let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ pworkflow) ~(fa
...
@@ -28,20 +28,22 @@ let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ pworkflow) ~(fa
in
in
let
fold_leaves
(
root
:
_
Tree
.
t
)
~
init
~
f
=
let
fold_leaves
(
root
:
_
Tree
.
t
)
~
init
~
f
=
let
open
Tree
in
let
open
Tree
in
let
rec
node
acc
branch_data
n
=
let
rec
node
acc
branch_data
=
function
match
n
.
branches
with
|
Leaf
d
->
|
[]
->
f
acc
branch_data
n
.
node_data
f
acc
branch_data
d
|
xs
->
|
Node
n
->
List
.
fold
x
s
~
init
:
acc
~
f
:
branch
Non_empty_list
.
fold
n
.
branche
s
~
init
:
acc
~
f
:
branch
and
branch
acc
b
=
node
acc
b
.
branch_data
b
.
tip
in
and
branch
acc
(
Branch
b
)
=
node
acc
b
.
data
b
.
tip
in
List
.
fold
root
.
branches
~
init
~
f
:
branch
match
root
with
|
Leaf
_
->
init
|
Node
n
->
Non_empty_list
.
fold
n
.
branches
~
init
~
f
:
branch
in
in
let
alignment
=
let
alignment
=
Alignment
.
from_fasta
[
%
path
faa
]
Alignment
.
from_fasta
[
%
path
faa
]
|>
Rresult
.
R
.
get_ok
|>
Rresult
.
R
.
get_ok
in
in
let
tree
=
Simulator
.
tree_from_file
[
%
path
tree_sc
]
in
let
tree
=
Reviewphiltrans_toolbox
.
Utils
.
tree_from_file
[
%
path
tree_sc
]
in
let
leaves
=
fold_leaves
tree
~
init
:
[]
~
f
:
(
fun
acc
(
_
,
cond
)
ni
->
let
leaves
=
fold_leaves
tree
~
init
:
[]
~
f
:
(
fun
acc
(
_
,
cond
)
ni
->
match
ni
.
name
with
match
ni
.
name
with
|
None
->
failwith
"Leaves of the tree should be named"
|
None
->
failwith
"Leaves of the tree should be named"
...
@@ -59,7 +61,7 @@ let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ pworkflow) ~(fa
...
@@ -59,7 +61,7 @@ let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ pworkflow) ~(fa
)
)
in
in
let
counts
seqs
i
=
let
counts
seqs
i
=
Amino_acid
.
vector
(
fun
aa
->
Amino_acid
.
Table
.
init
(
fun
aa
->
let
aa
=
Amino_acid
.
to_char
aa
in
let
aa
=
Amino_acid
.
to_char
aa
in
List
.
count
seqs
~
f
:
(
fun
s
->
Char
.
equal
s
.
[
i
]
aa
)
List
.
count
seqs
~
f
:
(
fun
s
->
Char
.
equal
s
.
[
i
]
aa
)
)
)
...
...
lib/simulator.ml
View file @
a3df223f
open
Core_kernel
open
Core_kernel
let
%
pworkflow
simulator
?
branch_factor
?
seed
~
n_h0
~
n_ha
~
ne_s
:
(
ne_s0
,
ne_s1
)
~
gBGC
:
(
gBGC0
,
gBGC1
)
~
tree
~
fitness_profiles
()
=
let
%
pworkflow
simulator
?
branch_factor
?
seed
~
n_h0
~
n_ha
~
ne_s
:
(
ne_s0
,
ne_s1
)
~
gBGC
:
(
gBGC0
,
gBGC1
)
~
tree
~
fitness_profiles
()
=
let
open
Phylogenetics
in
let
()
=
Option
.
iter
~
f
:
Random
.
init
[
%
param
seed
]
in
let
()
=
Option
.
iter
~
f
:
Random
.
init
[
%
param
seed
]
in
let
n_h0
=
[
%
param
n_h0
]
in
let
n_h0
=
[
%
param
n_h0
]
in
let
n_ha
=
[
%
param
n_ha
]
in
let
n_ha
=
[
%
param
n_ha
]
in
...
@@ -9,17 +10,17 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
...
@@ -9,17 +10,17 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
let
gBGC0
=
[
%
param
gBGC0
]
in
let
gBGC0
=
[
%
param
gBGC0
]
in
let
gBGC1
=
[
%
param
gBGC1
]
in
let
gBGC1
=
[
%
param
gBGC1
]
in
let
branch_factor
=
[
%
param
branch_factor
]
in
let
branch_factor
=
[
%
param
branch_factor
]
in
let
tree
=
Convdet
.
Simulator
.
tree_from_file
?
alpha
:
branch_factor
[
%
path
tree
]
in
let
tree
=
Reviewphiltrans_toolbox
.
Utils
.
tree_from_file
?
alpha
:
branch_factor
[
%
path
tree
]
in
let
fitness_profiles
=
Convdet
.
Profile_tsv
.(
read
[
%
path
fitness_profiles
]
|>
to_fitness
)
in
let
fitness_profiles
=
Phylogenetics_convergence
.
Profile_tsv
.(
read
[
%
path
fitness_profiles
]
|>
to_fitness
)
in
let
rescale_fitness
beta
=
Convdet
.
Amino_acid
.
v
ector
_
map
~
f
:
((
*.
)
beta
)
in
let
rescale_fitness
beta
=
Amino_acid
.
V
ector
.
map
~
f
:
((
*.
)
beta
)
in
let
base_param
=
let
base_param
=
let
p
=
Convdet
.
Mutsel
.
random_param
~
alpha_nucleotide
:
10
.
~
alpha_fitness
:
0
.
1
in
let
p
=
Mutsel
.
random_param
~
alpha_nucleotide
:
10
.
~
alpha_fitness
:
0
.
1
in
{
p
with
omega
=
1
.
}
{
p
with
omega
=
1
.
}
in
in
let
random_profile
beta
=
let
random_profile
beta
=
Random
.
int
(
Array
.
length
fitness_profiles
)
Random
.
int
(
Array
.
length
fitness_profiles
)
|>
Array
.
get
fitness_profiles
|>
Array
.
get
fitness_profiles
|>
Convdet
.
Amino_acid
.
v
ector
_
of_array_exn
|>
Amino_acid
.
V
ector
.
of_array_exn
|>
rescale_fitness
beta
|>
rescale_fitness
beta
in
in
let
h0_params
=
Array
.
init
n_h0
~
f
:
(
fun
_
->
let
h0_params
=
Array
.
init
n_h0
~
f
:
(
fun
_
->
...
@@ -29,8 +30,9 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
...
@@ -29,8 +30,9 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
(
p
,
q
)
(
p
,
q
)
)
)
in
in
let
most_probable_aa
(
pref
:
float
Convdet
.
Amino_acid
.
vector
)
=
let
most_probable_aa
(
pref
:
Amino_acid
.
vector
)
=
let
arr
=
Array
.
mapi
(
pref
:>
float
array
)
~
f
:
(
fun
i
x
->
x
,
i
)
in
let
pref
=
Amino_acid
.
Vector
.
to_array
pref
in
let
arr
=
Array
.
mapi
pref
~
f
:
(
fun
i
x
->
x
,
i
)
in
match
Array
.
max_elt
~
compare
:
Poly
.
compare
arr
with
match
Array
.
max_elt
~
compare
:
Poly
.
compare
arr
with
|
None
->
assert
false
|
None
->
assert
false
|
Some
(
_
,
i
)
->
i
|
Some
(
_
,
i
)
->
i
...
@@ -58,13 +60,17 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
...
@@ -58,13 +60,17 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
|
1
->
q
|
1
->
q
|
_
->
assert
false
|
_
->
assert
false
in
in
let
root_condition
=
Convdet
.
Simulator
.
Mutsel
.
root_condition
tree
in
let
root_condition
=
Option
.
value_exn
(
Phylogenetics_convergence
.
Simulator
.
root_condition
tree
)
in
let
root_dists
=
Array
.
init
(
n_h0
+
n_ha
)
~
f
:
(
fun
i
->
let
root_dists
=
Array
.
init
(
n_h0
+
n_ha
)
~
f
:
(
fun
i
->
Convdet
.
Mutsel
.
stationary_distribution
(
params
i
root_condition
)
Mutsel
.
stationary_distribution
(
params
i
root_condition
)
|>
Mutsel
.
NSCodon
.
Vector
.
to_array
|>
Mutsel
.
NSCodon
.
Table
.
of_array_exn
)
)
in
in
let
root
=
Convdet
.
Simulator
.
Mutsel
.
hmm0
~
len
:
(
n_h0
+
n_ha
)
~
dist
:
(
Array
.
get
root_dists
)
in
let
root
=
Phylogenetics_convergence
.
Simulator
.
Mutsel
.
hmm0
~
len
:
(
n_h0
+
n_ha
)
~
dist
:
(
Array
.
get
root_dists
)
in
let
ali
=
Convdet
.
Simulator
.
Mutsel
.
alignment
tree
~
root
params
in
let
ali
=
Phylogenetics_convergence
.
Simulator
.
Mutsel
.
alignment
tree
~
root
params
in
let
species_name
=
let
species_name
=
Phylogenetics
.
Tree
.
leaves
tree
Phylogenetics
.
Tree
.
leaves
tree
|>
List
.
map
~
f
:
(
fun
{
name
}
->
Option
.
value_exn
name
)
in
|>
List
.
map
~
f
:
(
fun
{
name
}
->
Option
.
value_exn
name
)
in
...
@@ -78,7 +84,7 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
...
@@ -78,7 +84,7 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
let
save_fitness_histogram
dest
=
let
save_fitness_histogram
dest
=
let
data
=
let
data
=
Array
.
fold
(
Array
.
append
h0_params
ha_params
)
~
init
:
[]
~
f
:
(
fun
acc
(
p
,
q
)
->
Array
.
fold
(
Array
.
append
h0_params
ha_params
)
~
init
:
[]
~
f
:
(
fun
acc
(
p
,
q
)
->
(
p
.
scaled_fitness
:
>
float
array
)
::
(
q
.
scaled_fitness
:>
float
array
)
::
acc
(
Amino_acid
.
Vector
.
to_array
p
.
scaled_fitness
)
:
:
(
Amino_acid
.
Vector
.
to_array
q
.
scaled_fitness
)
::
acc
)
)
|>
Array
.
concat
|>
Array
.
concat
in
in
...
@@ -98,29 +104,20 @@ let simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s ~gBGC ~tree ~fitness_profil
...
@@ -98,29 +104,20 @@ let simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s ~gBGC ~tree ~fitness_profil
let
%
pworkflow
pair_tree
~
branch_length1
~
branch_length2
~
npairs
=
let
%
pworkflow
pair_tree
~
branch_length1
~
branch_length2
~
npairs
=
let
open
Phylogenetics
in
let
open
Phylogenetics
in
let
branch_length1
,
branch_length2
,
npairs
=
[
%
param
branch_length1
,
branch_length2
,
npairs
]
in
let
branch_length1
,
branch_length2
,
npairs
=
[
%
param
branch_length1
,
branch_length2
,
npairs
]
in
let
tree
?
name
branches
=
{
let
leaf
name
=
Tree
.
leaf
{
Newick_ast
.
name
=
Some
name
}
in
Tree
.
node_data
=
{
Newick_ast
.
name
}
;
branches
;
}
in
let
branch
~
length
~
condition
tip
=
let
branch
~
length
~
condition
tip
=
let
tags
=
match
condition
with
let
tags
=
match
condition
with
|
`Ancestral
->
[
"Condition"
,
"0"
]
|
`Ancestral
->
[
"Condition"
,
"0"
]
|
`Convergent
->
[
"Condition"
,
"1"
;
"Transition"
,
"1"
]
|
`Convergent
->
[
"Condition"
,
"1"
;
"Transition"
,
"1"
]
in
in
{
Tree
.
branch
{
Newick_ast
.
length
=
Some
length
;
tags
}
tip
in
Tree
.
branch_data
=
{
Newick_ast
.
length
=
Some
length
;
tags
}
;
tip
;
}
in
let
make_pair
i
=
let
make_pair
i
=
tree
[
Tree
.
binary_node
{
Newick_ast
.
name
=
None
}
branch
~
length
:
branch_length2
~
condition
:
`Ancestral
(
tree
~
name
:
(
sprintf
"A%d"
i
)
[]
)
;
(
branch
~
length
:
branch_length2
~
condition
:
`Ancestral
(
leaf
(
sprintf
"A%d"
i
)))
branch
~
length
:
branch_length2
~
condition
:
`Convergent
(
tree
~
name
:
(
sprintf
"C%d"
i
)
[]
)
;
(
branch
~
length
:
branch_length2
~
condition
:
`Convergent
(
leaf
(
sprintf
"C%d"
i
)))
]
|>
branch
~
length
:
branch_length1
~
condition
:
`Ancestral
|>
branch
~
length
:
branch_length1
~
condition
:
`Ancestral
in
in
let
tree
=
let
tree
=
Newick
.
Tree
(
t
ree
(
L
ist
.
init
npairs
~
f
:
make_pair
))
Newick
.
Tree
(
T
ree
.
node
{
Newick_ast
.
name
=
None
}
(
Non_empty_l
ist
.
init
npairs
~
f
:
make_pair
))
in
in
Newick
.
to_file
tree
[
%
dest
]
Newick
.
to_file
tree
[
%
dest
]
lib/toolbox/dune
View file @
a3df223f
(library
(library
(name reviewphiltrans_toolbox)
(name reviewphiltrans_toolbox)
(libraries biocaml.ez
convdet
gzt ocaml-r.graphics ocaml-r.grDevices)
(libraries biocaml.ez gzt ocaml-r.graphics ocaml-r.grDevices
phylogenetics
)
(preprocess (pps ppx_jane)))
(preprocess (pps ppx_jane)))
lib/toolbox/utils.ml
0 → 100644
View file @
a3df223f
open
Core_kernel
let
tree_from_file
?
(
alpha
=
1
.
)
fn
=
match
Phylogenetics
.
Newick
.
from_file
fn
with
|
Branch
(
Branch
{
tip
=
t
;
_
})
|
Tree
t
->
Phylogenetics
.
Tree
.
map
t
~
node
:
Fn
.
id
~
leaf
:
Fn
.
id
~
branch
:
Phylogenetics
.
Newick
.(
fun
b
->
let
length
=
Option
.
value_exn
b
.
length
*.
alpha
in
let
condition
=
match
List
.
Assoc
.
find
~
equal
:
String
.
equal
b
.
tags
"Condition"
with
|
Some
s
->
Int
.
of_string
s
|
None
->
failwith
"Missing Condition tag"
in
length
,
condition
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment