Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
VEBER Philippe
codepi
Commits
6acf2dc3
Commit
6acf2dc3
authored
Oct 27, 2020
by
Louis Duchemin
Browse files
Implements Gemma as a detection pipeline
parent
272e5d76
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
169 additions
and
82 deletions
+169
-82
lib/detection_pipeline.ml
lib/detection_pipeline.ml
+96
-53
lib/detection_pipeline.mli
lib/detection_pipeline.mli
+32
-11
lib/gemma.ml
lib/gemma.ml
+1
-1
lib/simulation_dataset.mli
lib/simulation_dataset.mli
+40
-17
No files found.
lib/detection_pipeline.ml
View file @
6acf2dc3
...
...
@@ -3,9 +3,12 @@ open File_formats
module
type
Query
=
sig
type
t
val
tree
:
branch_length_unit
:
[
`Nucleotide
|
`Amino_acid
|
`Codon
]
->
t
->
nhx
file
branch_length_unit
:
[
`Nucleotide
|
`Amino_acid
|
`Codon
]
->
t
->
nhx
file
val
nucleotide_alignment
:
t
->
nucleotide_fasta
file
end
...
...
@@ -13,40 +16,58 @@ module type S = sig
type
query
val
amino_acid_alignment
:
query
->
aminoacid_fasta
file
val
gene_tree
:
query
->
nw
file
val
dn_tree
:
query
->
text
file
val
ds_tree
:
query
->
text
file
val
dnds_tree
:
query
->
text
file
val
identical
:
query
->
text
file
val
topological
:
query
->
text
file
val
multinomial
:
query
->
text
file
val
multinomial_simulation_lrt
:
query
->
text
file
val
multinomial_simulation_sparse
:
query
->
text
file
val
multinomial_asymptotic_lrt
:
query
->
text
file
val
multinomial_asymptotic_sparse
:
query
->
text
file
val
tdg09
:
query
->
text
file
val
failsafe_tdg09
:
query
->
text
file
val
pcoc
:
?
gamma
:
bool
->
?
ncat
:
int
->
query
->
text
file
val
pcoc_v2
:
?
gamma
:
bool
->
?
aa_profiles
:
Pcoc
.
aa_profiles
->
query
->
text
file
?
gamma
:
bool
->
?
aa_profiles
:
Pcoc
.
aa_profiles
->
query
->
text
file
val
gemma
:
query
->
lmm_test
:
[
`All
|
`LRT
|
`Score
|
`Wald
]
->
relatedness_mode
:
[
`Centered
|
`Standardized
]
->
text
file
val
diffsel
:
query
->
text
file
val
diffseldsparse
:
?
pi
:
float
->
?
shiftprob
:
float
*
float
->
?
eps
:
float
->
query
->
text
file
val
view_site
:
query
->
convergent_species
:
string
list
->
site_pos
:
int
->
pdf
file
val
view_site
:
query
->
convergent_species
:
string
list
->
site_pos
:
int
->
pdf
file
end
module
Make
(
Q
:
Query
)
=
struct
module
Make
(
Q
:
Query
)
=
struct
open
Q
let
amino_acid_alignment
d
=
...
...
@@ -58,31 +79,48 @@ module Make(Q : Query) = struct
let
gene_tree
d
=
Tree_dataset
.
raxmlng_fna
~
fna
:
(
nucleotide_alignment
d
)
()
let
%
pworkflow
tree_with_no_single_child
~
branch_length_unit
d
:
newick
file
=
let
%
pworkflow
tree_with_no_single_child
~
branch_length_unit
d
:
newick
file
=
let
tree_file
=
[
%
path
tree
~
branch_length_unit
d
]
in
let
open
Phylogenetics
in
let
tree
=
Newick
.
from_file
tree_file
in
let
tree
=
Newick
.
map_inner_tree
tree
~
f
:
Reviewphiltrans_toolbox
.
Convergence_tree
.
remove_nodes_with_single_child
in
let
tree
=
Newick
.
map_inner_tree
tree
~
f
:
Reviewphiltrans_toolbox
.
Convergence_tree
.
remove_nodes_with_single_child
in
Newick
.
to_file
tree
[
%
dest
]
let
identical
d
=
let
tree_sc
=
Tree_dataset
.
prepare_sc_tree
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
in
let
tree_id
=
Tree_dataset
.
prepare_tree_with_node_id
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
in
Identical
.
identical
~
tree_id
~
tree_sc
~
prot_model
:
"LG08"
~
faa
:
(
amino_acid_alignment
d
)
()
let
tree_sc
=
Tree_dataset
.
prepare_sc_tree
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
in
let
tree_id
=
Tree_dataset
.
prepare_tree_with_node_id
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
in
Identical
.
identical
~
tree_id
~
tree_sc
~
prot_model
:
"LG08"
~
faa
:
(
amino_acid_alignment
d
)
()
|>
Identical
.
results
let
topological
d
=
let
faa
=
amino_acid_alignment
d
in
let
tree_conv
=
Tree_dataset
.
prepare_topological_tree
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
in
let
tree
=
Tree_dataset
.
prepare_tree_with_node_id
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
in
let
tree_conv
=
Tree_dataset
.
prepare_topological_tree
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
in
let
tree
=
Tree_dataset
.
prepare_tree_with_node_id
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
in
Topological
.
topological
~
faa
~
tree
~
tree_conv
~
prot_model
:
"LG08"
()
|>
Topological
.
results
let
multinomial
d
=
Multinomial
.
multinomial
~
tree_sc
:
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
~
faa
:
(
amino_acid_alignment
d
)
()
~
faa
:
(
amino_acid_alignment
d
)
()
let
multinomial_asymptotic_lrt
d
=
Multinomial
.
multinomial_asymptotic_lrt
...
...
@@ -107,44 +145,49 @@ module Make(Q : Query) = struct
let
tdg09
d
=
Tamuri
.
tdg09
~
tree
:
(
tree_with_no_single_child
~
branch_length_unit
:
`Amino_acid
d
)
~
faa
:
(
amino_acid_alignment
d
)
()
~
faa
:
(
amino_acid_alignment
d
)
()
|>
Tamuri
.
results
let
%
pworkflow
mock_tdg09
d
=
match
Biotk
.
Fasta
.
from_file
[
%
path
amino_acid_alignment
d
]
with
|
Ok
(
_
,
item
::
_
)
->
let
open
Core_kernel
in
let
n
=
String
.
length
item
.
sequence
in
"Sites
\t
Tdg09_1MinusFDR
\t
Tdg09_1MinusLRT
\t
Tdg09_prob_post"
::
List
.
init
n
~
f
:
(
fun
i
->
sprintf
"%d
\t
0.0
\t
0.0
\t
NA"
(
i
+
1
))
|>
Out_channel
.
write_lines
[
%
dest
]
let
open
Core_kernel
in
let
n
=
String
.
length
item
.
sequence
in
"Sites
\t
Tdg09_1MinusFDR
\t
Tdg09_1MinusLRT
\t
Tdg09_prob_post"
::
List
.
init
n
~
f
:
(
fun
i
->
sprintf
"%d
\t
0.0
\t
0.0
\t
NA"
(
i
+
1
))
|>
Out_channel
.
write_lines
[
%
dest
]
|
_
->
failwith
"couldn't read an item in fasta"
let
failsafe_tdg09
d
=
Workflow
.
trywith
(
tdg09
d
)
(
mock_tdg09
d
)
let
gemma
q
~
lmm_test
~
relatedness_mode
=
let
alignment
=
amino_acid_alignment
q
in
let
tree
=
tree
~
branch_length_unit
:
`Amino_acid
q
in
let
genotype
=
Gemma
.
genotype_of_fasta
alignment
in
let
phenotype
=
Gemma
.
phenotype_of_tree
tree
in
let
relatedness_matrix
=
Gemma
.
calculate_relatedness_matrix
~
mode
:
relatedness_mode
~
genotype
~
phenotype
in
Gemma
.
univariate_lmm
~
lmm
:
lmm_test
~
genotype
~
phenotype
~
relatedness_matrix
|>
Gemma
.
result_table_of_output
alignment
let
diffseltree
d
=
Tree_dataset
.
prepare_diffsel_tree
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
Tree_dataset
.
prepare_diffsel_tree
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
let
diffsel
d
=
Diffsel
.
diffsel
~
phy_n
:
(
phylip_nucleotide_alignment
d
)
~
tree
:
(
diffseltree
d
)
~
w_every
:
1
~
n_cycles
:
50
()
~
tree
:
(
diffseltree
d
)
~
w_every
:
1
~
n_cycles
:
50
()
|>
Diffsel
.
selector
let
diffseldsparse
?
pi
?
shiftprob
?
eps
d
=
Diffseldsparse
.
diffseldsparse
?
pi
?
shiftprob
?
eps
Diffseldsparse
.
diffseldsparse
?
pi
?
shiftprob
?
eps
~
alignment
:
(
phylip_nucleotide_alignment
d
)
~
tree
:
(
diffseltree
d
)
~
w_every
:
1
~
n_cycles
:
50
()
|>
Diffseldsparse
.
readdiffseldsparse
|>
Diffseldsparse
.
results
~
tree
:
(
diffseltree
d
)
~
w_every
:
1
~
n_cycles
:
50
()
|>
Diffseldsparse
.
readdiffseldsparse
|>
Diffseldsparse
.
results
let
pcoc
?
(
gamma
=
true
)
?
(
ncat
=
10
)
d
=
let
faa
=
amino_acid_alignment
d
in
...
...
@@ -155,14 +198,17 @@ module Make(Q : Query) = struct
let
pcoc_v2
?
(
gamma
=
true
)
?
(
aa_profiles
=
`C10
)
d
=
let
faa
=
amino_acid_alignment
d
in
let
tree
=
tree
~
branch_length_unit
:
`Amino_acid
d
in
Pcoc
.
pcoc_v2
~
aa_profiles
~
gamma
~
faa
~
tree
()
|>
Pcoc
.
results
Pcoc
.
pcoc_v2
~
aa_profiles
~
gamma
~
faa
~
tree
()
|>
Pcoc
.
results
let
dn_ds_dnds_trees
d
=
Testnh
.
dn_ds_trees_real_data
~
fna
:
(
nucleotide_alignment
d
)
~
tree
:
(
tree
~
branch_length_unit
:
`Nucleotide
d
)
()
Testnh
.
dn_ds_trees_real_data
~
fna
:
(
nucleotide_alignment
d
)
~
tree
:
(
tree
~
branch_length_unit
:
`Nucleotide
d
)
()
let
dn_tree
d
=
(
dn_ds_dnds_trees
d
)
.
dn_tsv
let
ds_tree
d
=
(
dn_ds_dnds_trees
d
)
.
ds_tsv
let
dn_tree
d
=
(
dn_ds_dnds_trees
d
)
.
dn_tsv
let
ds_tree
d
=
(
dn_ds_dnds_trees
d
)
.
ds_tsv
let
dnds_tree
d
=
(
dn_ds_dnds_trees
d
)
.
dnds_tsv
let
%
pworkflow
view_site
query
~
convergent_species
~
site_pos
=
...
...
@@ -172,17 +218,14 @@ module Make(Q : Query) = struct
let
convergent_species
=
[
%
param
convergent_species
]
in
let
site_pos
=
[
%
param
site_pos
]
in
let
condition
s
=
if
List
.
mem
s
convergent_species
then
`Convergent
else
`Ancestral
if
List
.
mem
s
convergent_species
then
`Convergent
else
`Ancestral
in
let
tree
=
Phylogenetics
.
Newick
.
from_file
tree_path
in
let
alignment
=
Biotk
.
Fasta
.
from_file
alignment_path
|>
Base
.
Result
.
ok_or_failwith
|>
snd
let
alignment
=
Biotk
.
Fasta
.
from_file
alignment_path
|>
Base
.
Result
.
ok_or_failwith
|>
snd
in
CS
.
make
~
condition
tree
alignment
site_pos
|>
CS
.
draw
|>
Biotk_croquis
.
Croquis
.
Layout
.
simple
|>
CS
.
draw
|>
Biotk_croquis
.
Croquis
.
Layout
.
simple
|>
Base
.
Fn
.
flip
Biotk_croquis
.
Croquis
.
Layout
.
render_pdf
[
%
dest
]
end
lib/detection_pipeline.mli
View file @
6acf2dc3
...
...
@@ -3,9 +3,12 @@ open File_formats
module
type
Query
=
sig
type
t
val
tree
:
branch_length_unit
:
[
`Nucleotide
|
`Amino_acid
|
`Codon
]
->
t
->
nhx
file
branch_length_unit
:
[
`Nucleotide
|
`Amino_acid
|
`Codon
]
->
t
->
nhx
file
val
nucleotide_alignment
:
t
->
nucleotide_fasta
file
end
...
...
@@ -13,37 +16,55 @@ module type S = sig
type
query
val
amino_acid_alignment
:
query
->
aminoacid_fasta
file
val
gene_tree
:
query
->
nw
file
val
dn_tree
:
query
->
text
file
val
ds_tree
:
query
->
text
file
val
dnds_tree
:
query
->
text
file
val
identical
:
query
->
text
file
val
topological
:
query
->
text
file
val
multinomial
:
query
->
text
file
val
multinomial_simulation_lrt
:
query
->
text
file
val
multinomial_simulation_sparse
:
query
->
text
file
val
multinomial_asymptotic_lrt
:
query
->
text
file
val
multinomial_asymptotic_sparse
:
query
->
text
file
val
tdg09
:
query
->
text
file
val
failsafe_tdg09
:
query
->
text
file
val
pcoc
:
?
gamma
:
bool
->
?
ncat
:
int
->
query
->
text
file
val
pcoc_v2
:
?
gamma
:
bool
->
?
aa_profiles
:
Pcoc
.
aa_profiles
->
query
->
text
file
?
gamma
:
bool
->
?
aa_profiles
:
Pcoc
.
aa_profiles
->
query
->
text
file
val
gemma
:
query
->
lmm_test
:
[
`All
|
`LRT
|
`Score
|
`Wald
]
->
relatedness_mode
:
[
`Centered
|
`Standardized
]
->
text
file
val
diffsel
:
query
->
text
file
val
diffseldsparse
:
?
pi
:
float
->
?
shiftprob
:
float
*
float
->
?
eps
:
float
->
query
->
text
file
val
view_site
:
query
->
convergent_species
:
string
list
->
site_pos
:
int
->
pdf
file
val
view_site
:
query
->
convergent_species
:
string
list
->
site_pos
:
int
->
pdf
file
end
module
Make
(
Q
:
Query
)
:
S
with
type
query
:=
Q
.
t
module
Make
(
Q
:
Query
)
:
S
with
type
query
:=
Q
.
t
lib/gemma.ml
View file @
6acf2dc3
...
...
@@ -80,7 +80,7 @@ let calculate_relatedness_matrix ~mode ~genotype ~phenotype =
cmd
"cp"
[
tmp
//
relatedness_filename
mode
;
dest
];
]
let
%
pworkflow
[
@
version
3
]
result_table_of_output
alignment
gemma_output
=
let
%
pworkflow
[
@
version
4
]
result_table_of_output
alignment
gemma_output
=
let
module
R
=
Reviewphiltrans_toolbox
.
Gemma
.
Result_file
in
match
R
.
of_file
[
%
path
gemma_output
]
with
|
Error
msg
->
failwith
msg
...
...
lib/simulation_dataset.mli
View file @
6acf2dc3
...
...
@@ -3,9 +3,9 @@ open Bistro
type
tree
=
|
NHX
of
string
|
Pair_tree
of
{
npairs
:
int
;
branch_length1
:
float
;
branch_length2
:
float
;
npairs
:
int
;
branch_length1
:
float
;
branch_length2
:
float
;
}
type
t
...
...
@@ -13,40 +13,63 @@ type t
val
bppseqgen_mixed_simulation
:
?
ne_s
:
float
->
?
seed
:
int
->
tree
:
tree
->
profiles
:
string
->
n_h0
:
int
->
n_ha
:
int
->
unit
->
t
tree
:
tree
->
profiles
:
string
->
n_h0
:
int
->
n_ha
:
int
->
unit
->
t
val
bppseqgen_simulation
:
hyp
:
Convergence_hypothesis
.
t
->
tree
:
tree
->
profiles
:
string
->
nb_sites
:
int
->
seed
:
int
->
t
tree
:
tree
->
profiles
:
string
->
nb_sites
:
int
->
seed
:
int
->
t
val
convdet_simulation
:
?
branch_factor
:
float
->
?
ne_s
:
float
*
float
->
?
gBGC
:
float
*
float
->
?
seed
:
int
->
tree
:
tree
->
profiles
:
string
->
n_h0
:
int
->
n_ha
:
int
->
unit
->
t
?
branch_factor
:
float
->
?
ne_s
:
float
*
float
->
?
gBGC
:
float
*
float
->
?
seed
:
int
->
tree
:
tree
->
profiles
:
string
->
n_h0
:
int
->
n_ha
:
int
->
unit
->
t
include
Detection_pipeline
.
Query
with
type
t
:=
t
include
Detection_pipeline
.
S
with
type
query
:=
t
val
oracle
:
t
->
text
file
val
alignment_plot
:
t
->
svg
file
val
multinomial_benchmark
:
t
->
pdf
file
val
benchmark
:
?
mode
:
[
`fast
|
`full
]
->
t
->
svg
file
val
benchmark
:
?
mode
:
[
`fast
|
`full
]
->
t
->
svg
file
val
benchmark2
:
t
->
pdf
file
(** stuff for gbgc SBME paper *)
type
gBGC_t
=
Global
of
float
|
Convergent
of
float
*
float
type
gBGC_t
=
|
Global
of
float
|
Convergent
of
float
*
float
type
param_t
val
explore_params
:
f
:
(
param_t
->
'
a
)
->
((
float
*
gBGC_t
)
*
'
a
)
list
val
simu_of_param
:
?
n_h0
:
int
->
param_t
->
t
val
filter_results
:
f
:
(
'
a
->
bool
)
->
(
param_t
*
'
a
)
list
->
(
param_t
*
'
a
)
list
val
filter_results
:
f
:
(
'
a
->
bool
)
->
(
param_t
*
'
a
)
list
->
(
param_t
*
'
a
)
list
type
record_t
val
record_of_simu
:
t
->
record_t
workflow
val
realistic_result
:
record_t
->
bool
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment