Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
b8717f8a
Commit
b8717f8a
authored
Jun 21, 2018
by
Carine Rey
Browse files
add pcoc/pcoc_gamma/diffsel
parent
d316236f
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
277 additions
and
8 deletions
+277
-8
etc/docker/pcoc/Dockerfile
etc/docker/pcoc/Dockerfile
+92
-0
etc/docker/pcoc/build_docker.sh
etc/docker/pcoc/build_docker.sh
+14
-0
lib/bppsuite.ml
lib/bppsuite.ml
+19
-2
lib/bppsuite.mli
lib/bppsuite.mli
+4
-0
lib/convergence_detection.ml
lib/convergence_detection.ml
+39
-0
lib/convergence_detection.mli
lib/convergence_detection.mli
+25
-0
lib/file_formats.ml
lib/file_formats.ml
+29
-2
lib/pipeline.ml
lib/pipeline.ml
+55
-4
No files found.
etc/docker/pcoc/Dockerfile
0 → 100644
View file @
b8717f8a
FROM
debian:stretch
MAINTAINER
Carine Rey carine.rey@ens-lyon.org
RUN
apt-get update
&&
\
apt-get
install
--no-install-recommends
-qy
\
git
\
make
\
cmake
\
python2.7-minimal
\
ca-certificates
\
curl
\
g++
\
gcc
\
wget
\
gnupg2
\
dirmngr
\
python-pip
\
python-numpy
\
python-pandas
\
python-setuptools
\
python-dev
\
sudo
\
less
\
xvfb
\
gosu
\
pyqt4-dev-tools
\
xauth
\
libcurl4-openssl-dev
\
libxml2-dev
\
libssl-dev
\
libcairo2-dev
RUN
pip
install
ete3
==
3.0.0b35
RUN
pip
install
biopython
ENV
LD_LIBRARY_PATH=/usr/local/lib/
# # Install R
# RUN echo """deb http://cloud.r-project.org/bin/linux/debian stretch-cran34/""" >> /etc/apt/sources.list \
# && apt-key adv --keyserver keyserver.ubuntu.com --recv-keys FCAE2A0E115C3D8A
# RUN apt-get update && apt-get install --no-install-recommends -qy r-base r-base-dev \
# && rm -rf /var/lib/apt/lists/* \
# && echo 'install.packages(c("ggplot2", "plyr", "reshape2", "readr", "cowplot"), repos="http://cran.us.r-project.org", dependencies=TRUE)' > /tmp/packages.R \
# && Rscript /tmp/packages.R
# install bpp:
WORKDIR
$bpp_dir/sources/bpp-core
RUN
git clone https://github.com/BioPP/bpp-core
.
&&
\
git checkout 405cab5
&&
\
cmake
.
&&
\
(
make
-j
4
||
make
)
&&
\
make
install
WORKDIR
$bpp_dir/sources/bpp-seq
RUN
git clone https://github.com/BioPP/bpp-seq
.
&&
\
git checkout 32d9c67
&&
\
cmake
.
&&
\
(
make
-j
4
||
make
)
&&
\
make
install
WORKDIR
$bpp_dir/sources/bpp-popgen
RUN
git clone https://github.com/BioPP/bpp-popgen
.
&&
\
git checkout 77d712e
&&
\
cmake
.
&&
\
(
make
-j
4
||
make
)
&&
\
make
install
WORKDIR
$bpp_dir/sources/bpp-phyl
RUN
git clone
--branch
devel https://github.com/BioPP/bpp-phyl
.
&&
\
git checkout 561ac70
&&
\
cmake
.
&&
\
(
make
-j
4
||
make
)
&&
\
make
install
WORKDIR
$bpp_dir/sources/bppsuite
RUN
git clone https://github.com/BioPP/bppsuite
.
&&
\
git checkout 77ccc0a
&&
\
cmake
.
&&
\
(
make
-j
4
||
make
)
&&
\
make
install
WORKDIR
$HOME/pcoc
RUN
git clone https://github.com/CarineRey/pcoc.git
.
&&
\
git checkout 957a4b1
RUN
cp
-r
data /data/
&&
\
cp
README.md /usr/local/etc/ &
\
cp
etc/entrypoint.sh /usr/local/bin/entrypoint.sh &
\
cp
src/
*
/usr/local/bin/
CMD
["cat", "/usr/local/etc/README.md"]
ENTRYPOINT
["bash", "/usr/local/bin/entrypoint.sh"]
etc/docker/pcoc/build_docker.sh
0 → 100644
View file @
b8717f8a
#! /bin/bash
set
-e
IMAGE_NAME
=
pcoc
DOCKERFILE_DIR
=
.
TAG
=
06212018
REPO
=
carinerey/
$IMAGE_NAME
:
$TAG
docker build
-t
$REPO
-f
./Dockerfile
$DOCKERFILE_DIR
if
[[
$1
==
"push_yes"
]]
then
docker push
$REPO
fi
lib/bppsuite.ml
View file @
b8717f8a
...
...
@@ -28,7 +28,7 @@ let bppseqgen ~nb_sites ~tree ~config : nucleotide_fasta workflow =
]
let
conf_file_bppseqman
~
fna
=
let
conf_file_bppseqman
_fna2faa
~
fna
=
seq
~
sep
:
"
\n
"
[
assign
"input.sequence.file"
(
dep
fna
)
;
assign
"output.sequence.file"
dest
;
...
...
@@ -42,6 +42,23 @@ let conf_file_bppseqman ~fna =
let
fna2faa
~
(
fna
:
nucleotide_fasta
workflow
)
:
aminoacid_fasta
workflow
=
workflow
~
descr
:
"bppsuite.fna2faa"
[
cmd
"bppseqman"
~
env
[
assign
"param"
(
file_dump
(
conf_file_bppseqman
~
fna
))
;
assign
"param"
(
file_dump
(
conf_file_bppseqman_fna2faa
~
fna
))
;
]
]
let
conf_file_bppseqman_fa2phy
~
fna
=
seq
~
sep
:
"
\n
"
[
assign
"input.sequence.file"
(
dep
fna
)
;
assign
"output.sequence.file"
dest
;
assign
"output.sequence.format"
(
string
"Phylip"
)
;
string
{
|
input
.
alignment
=
true
sequence
.
manip
=
|
}
]
let
fa2phy
~
(
fna
:
nucleotide_fasta
workflow
)
:
nucleotide_phylip
workflow
=
workflow
~
descr
:
"bppsuite.fa2phy"
[
cmd
"bppseqman"
~
env
[
assign
"param"
(
file_dump
(
conf_file_bppseqman_fa2phy
~
fna
))
;
]
]
lib/bppsuite.mli
View file @
b8717f8a
...
...
@@ -11,3 +11,7 @@ val bppseqgen :
val
fna2faa
:
fna
:
nucleotide_fasta
workflow
->
aminoacid_fasta
workflow
val
fa2phy
:
fna
:
nucleotide_fasta
workflow
->
nucleotide_phylip
workflow
lib/convergence_detection.ml
0 → 100644
View file @
b8717f8a
open
Bistro
.
EDSL
open
Bistro
.
Std
open
Core
open
File_formats
open
Bistro_bioinfo
.
Std
type
pcoc_out
type
diffsel_out
type
det_out
=
|
Pcoc_out
|
Diffsel_out
let
pcoc
?
gamma
~
(
faa
:
aminoacid_fasta
workflow
)
~
(
tree
:_
workflow
)
:
(*`pcoc*)
det_out
directory
workflow
=
let
env
=
docker_image
~
account
:
"carinerey"
~
name
:
"pcoc"
~
tag
:
"06212018"
()
in
workflow
~
descr
:
"convergence_detection.pcoc"
[
cmd
"pcoc_det.py"
~
env
[
opt
"-t"
dep
tree
;
opt
"-m"
string
"-"
;
opt
"-aa"
dep
faa
;
opt
"-o"
ident
dest
;
option
(
flag
string
"--gamma"
)
gamma
;
]
]
let
diffsel
~
(
phy_n
:
nucleotide_phylip
workflow
)
~
(
tree
:
_
workflow
)
~
(
w_every
:
int
)
~
(
n_cycles
:
int
)
:
(*`diffsel*)
det_out
directory
workflow
=
let
env
=
docker_image
~
account
:
"vlanore"
~
name
:
"diffsel"
~
tag
:
"v1.0"
()
in
(*_build/diffsel -t data/samhd1.tree -d data/samhd1.ali -ncond 3 -x 1 10000 myrun*)
workflow
~
descr
:
"convergence_detection.diffsel"
[
mkdir_p
dest
;
cmd
"_build/diffsel"
~
env
[
opt
"-t"
dep
tree
;
opt
"-d"
dep
phy_n
;
opt
"-ncond"
int
2
;
opt
"-x"
seq
[
int
w_every
;
string
" "
;
int
n_cycles
];
seq
[
ident
dest
;
string
"/myrun"
]
;
]
]
lib/convergence_detection.mli
0 → 100644
View file @
b8717f8a
open
Bistro
.
EDSL
open
Bistro
.
Std
open
Core
open
File_formats
open
Bistro_bioinfo
.
Std
type
pcoc_out
type
diffsel_out
type
det_out
=
|
Pcoc_out
|
Diffsel_out
val
pcoc
:
?
gamma
:
bool
->
faa
:
aminoacid_fasta
workflow
->
tree
:
_
workflow
->
(*[`pcoc]*)
det_out
directory
workflow
val
diffsel
:
phy_n
:
nucleotide_phylip
workflow
->
tree
:
_
workflow
->
w_every
:
int
->
n_cycles
:
int
->
(*[`diffsel]*)
det_out
directory
workflow
lib/file_formats.ml
View file @
b8717f8a
...
...
@@ -18,7 +18,12 @@ end
class
type
aminoacid_fasta
=
object
inherit
fasta
method
alphabet
:
[
`Nucleotide
]
method
alphabet
:
[
`Aminoacid
]
end
class
type
nucleotide_phylip
=
object
inherit
text_file
method
format
:
[
`Nucleotide
]
end
...
...
@@ -43,7 +48,6 @@ type ready_dataset = {
faa
:
aminoacid_fasta
workflow
;
}
type
(
'
a
,
'
b
)
w_dataset
=
|
Raw_dataset
of
'
a
|
Ready_dataset
of
'
b
...
...
@@ -54,3 +58,26 @@ type dataset = {
tree_prefix
:
string
;
ready_dataset
:
ready_dataset
}
type
det_meth
=
|
Pcoc
|
Pcoc_gamma
|
Diffsel
let
string_of_det_meth
=
function
|
Pcoc
->
"pcoc"
|
Pcoc_gamma
->
"pcoc_gamma"
|
Diffsel
->
"diffsel"
type
'
a
w_det_meth
=
|
Pcoc_w
of
'
a
|
Pcoc_gamma_w
of
'
a
|
Diffsel
of
'
a
type
'
a
det_result
=
{
dataset
:
dataset
;
det_meth
:
det_meth
;
det_result
:
'
a
workflow
}
lib/pipeline.ml
View file @
b8717f8a
...
...
@@ -50,7 +50,6 @@ let repo_of_dataset_l ~preview dataset_l =
|
Raw_dataset
d
->
repo_of_raw_dataset
d
in
Repo
.
shift
tree_prefix
(
Repo
.
shift
model_prefix
repo_d
)
(*repo_d*)
)
|>
List
.
concat
...
...
@@ -74,11 +73,57 @@ let derive_from_tree ~tree_dir ~tree ~preview =
derive_from_model
~
model
~
tree
~
tree_dataset
~
tree_prefix
~
preview
)
let
derive
~
tree_dir
~
trees
~
preview
=
let
derive
_sim
~
tree_dir
~
trees
~
preview
=
List
.
map
trees
~
f
:
(
fun
tree
->
derive_from_tree
~
tree_dir
~
tree
~
preview
)
|>
List
.
concat
let
repo_of_det_results_l
~
det_results_l
=
List
.
map
det_results_l
~
f
:
(
fun
det_results
->
let
model_prefix
=
det_results
.
dataset
.
model_prefix
in
let
tree_prefix
=
det_results
.
dataset
.
tree_prefix
in
let
det_meth_prefix
=
string_of_det_meth
det_results
.
det_meth
in
let
open
Convergence_detection
in
let
w
=
det_results
.
det_result
in
let
repo_d
=
Repo
.[
item
[
det_meth_prefix
]
(
w
(*/ selector ["RUN*"]*)
)
]
in
Repo
.
shift
tree_prefix
(
Repo
.
shift
model_prefix
repo_d
)
)
|>
List
.
concat
let
derive_from_det_meth
~
det_meth
~
dataset
~
preview
=
let
open
Convergence_detection
in
let
faa
=
dataset
.
ready_dataset
.
faa
in
let
fna
=
dataset
.
ready_dataset
.
fna
in
let
phy_n
=
Bppsuite
.
fa2phy
~
fna
in
let
pcoc_tree
=
Tree_dataset
.
tree
dataset
.
ready_dataset
.
tree_dataset
`Detection
in
let
diffsel_tree
=
Tree_dataset
.
diffsel_tree
dataset
.
ready_dataset
.
tree_dataset
in
let
w_every
=
if
preview
then
1
else
10
in
let
n_cycles
=
if
preview
then
100
else
1000
in
let
det_result
=
match
det_meth
with
|
Pcoc
->
Convergence_detection
.
pcoc
~
gamma
:
false
~
faa
~
tree
:
pcoc_tree
|
Pcoc_gamma
->
Convergence_detection
.
pcoc
~
gamma
:
true
~
faa
~
tree
:
pcoc_tree
|
Diffsel
->
Convergence_detection
.
diffsel
~
phy_n
~
tree
:
diffsel_tree
~
w_every
~
n_cycles
in
{
det_meth
;
det_result
;
dataset
}
let
derive_from_dataset
~
dataset
~
preview
=
let
det_meths
=
[
Pcoc
;
Pcoc_gamma
;
Diffsel
]
in
List
.
map
det_meths
~
f
:
(
fun
det_meth
->
derive_from_det_meth
~
det_meth
~
dataset
~
preview
)
let
derive_det
~
dataset_l
~
preview
=
List
.
map
dataset_l
~
f
:
(
fun
dataset
->
derive_from_dataset
~
preview
~
dataset
)
|>
List
.
concat
let
main
~
outdir
?
(
np
=
2
)
?
(
mem
=
2
)
~
tree_dir
~
preview
()
=
let
logger
=
Logger
.
tee
[
...
...
@@ -86,8 +131,14 @@ let main ~outdir ?(np = 2) ?(mem = 2) ~tree_dir ~preview () =
Dot_output
.
create
"dag.dot"
(*dot -Tpdf example/dag.dot -o dag.pdf*)
]
in
let
trees
=
Array
.
to_list
@@
Sys
.
readdir
tree_dir
in
let
dataset_l
=
derive
~
tree_dir
~
trees
~
preview
in
let
repo
=
repo_of_dataset_l
dataset_l
~
preview
in
let
dataset_l
=
derive_sim
~
tree_dir
~
trees
~
preview
in
let
det_results_l
=
derive_det
~
dataset_l
~
preview
in
let
repo
=
[
repo_of_dataset_l
dataset_l
~
preview
;
repo_of_det_results_l
det_results_l
;
]
|>
List
.
concat
in
Repo
.
build
~
outdir
~
np
~
mem
:
(
`GB
mem
)
~
logger
repo
let
command
=
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment