Commit b8717f8a authored by Carine Rey's avatar Carine Rey
Browse files

add pcoc/pcoc_gamma/diffsel

parent d316236f
FROM debian:stretch
MAINTAINER Carine Rey carine.rey@ens-lyon.org
RUN apt-get update && \
apt-get install --no-install-recommends -qy \
git \
make \
cmake \
python2.7-minimal \
ca-certificates \
curl \
g++\
gcc \
wget \
gnupg2 \
dirmngr \
python-pip \
python-numpy \
python-pandas \
python-setuptools \
python-dev \
sudo \
less \
xvfb \
gosu \
pyqt4-dev-tools \
xauth \
libcurl4-openssl-dev \
libxml2-dev \
libssl-dev \
libcairo2-dev
RUN pip install ete3==3.0.0b35
RUN pip install biopython
ENV LD_LIBRARY_PATH=/usr/local/lib/
# # Install R
# RUN echo """deb http://cloud.r-project.org/bin/linux/debian stretch-cran34/""" >> /etc/apt/sources.list \
# && apt-key adv --keyserver keyserver.ubuntu.com --recv-keys FCAE2A0E115C3D8A
# RUN apt-get update && apt-get install --no-install-recommends -qy r-base r-base-dev \
# && rm -rf /var/lib/apt/lists/* \
# && echo 'install.packages(c("ggplot2", "plyr", "reshape2", "readr", "cowplot"), repos="http://cran.us.r-project.org", dependencies=TRUE)' > /tmp/packages.R \
# && Rscript /tmp/packages.R
# install bpp:
WORKDIR $bpp_dir/sources/bpp-core
RUN git clone https://github.com/BioPP/bpp-core . &&\
git checkout 405cab5 &&\
cmake . && \
(make -j 4 || make) && \
make install
WORKDIR $bpp_dir/sources/bpp-seq
RUN git clone https://github.com/BioPP/bpp-seq . && \
git checkout 32d9c67 &&\
cmake . && \
(make -j 4 || make) && \
make install
WORKDIR $bpp_dir/sources/bpp-popgen
RUN git clone https://github.com/BioPP/bpp-popgen . &&\
git checkout 77d712e &&\
cmake . && \
(make -j 4 || make) && \
make install
WORKDIR $bpp_dir/sources/bpp-phyl
RUN git clone --branch devel https://github.com/BioPP/bpp-phyl . &&\
git checkout 561ac70 &&\
cmake . && \
(make -j 4 || make) && \
make install
WORKDIR $bpp_dir/sources/bppsuite
RUN git clone https://github.com/BioPP/bppsuite . &&\
git checkout 77ccc0a &&\
cmake . && \
(make -j 4 || make) && \
make install
WORKDIR $HOME/pcoc
RUN git clone https://github.com/CarineRey/pcoc.git . &&\
git checkout 957a4b1
RUN cp -r data /data/ && \
cp README.md /usr/local/etc/ & \
cp etc/entrypoint.sh /usr/local/bin/entrypoint.sh &\
cp src/* /usr/local/bin/
CMD ["cat", "/usr/local/etc/README.md"]
ENTRYPOINT ["bash", "/usr/local/bin/entrypoint.sh"]
#! /bin/bash
set -e
IMAGE_NAME=pcoc
DOCKERFILE_DIR=.
TAG=06212018
REPO=carinerey/$IMAGE_NAME:$TAG
docker build -t $REPO -f ./Dockerfile $DOCKERFILE_DIR
if [[ $1 == "push_yes" ]]
then
docker push $REPO
fi
......@@ -28,7 +28,7 @@ let bppseqgen ~nb_sites ~tree ~config : nucleotide_fasta workflow =
]
let conf_file_bppseqman ~fna =
let conf_file_bppseqman_fna2faa ~fna =
seq ~sep:"\n" [
assign "input.sequence.file" (dep fna) ;
assign "output.sequence.file" dest ;
......@@ -42,6 +42,23 @@ let conf_file_bppseqman ~fna =
let fna2faa ~(fna:nucleotide_fasta workflow) : aminoacid_fasta workflow =
workflow ~descr:"bppsuite.fna2faa" [
cmd "bppseqman" ~env [
assign "param" (file_dump (conf_file_bppseqman ~fna)) ;
assign "param" (file_dump (conf_file_bppseqman_fna2faa ~fna)) ;
]
]
let conf_file_bppseqman_fa2phy ~fna =
seq ~sep:"\n" [
assign "input.sequence.file" (dep fna) ;
assign "output.sequence.file" dest ;
assign "output.sequence.format" (string "Phylip") ;
string {| input.alignment = true
sequence.manip =
|}
]
let fa2phy ~(fna: nucleotide_fasta workflow) : nucleotide_phylip workflow =
workflow ~descr:"bppsuite.fa2phy" [
cmd "bppseqman" ~env [
assign "param" (file_dump (conf_file_bppseqman_fa2phy ~fna)) ;
]
]
......@@ -11,3 +11,7 @@ val bppseqgen :
val fna2faa :
fna:nucleotide_fasta workflow ->
aminoacid_fasta workflow
val fa2phy :
fna: nucleotide_fasta workflow ->
nucleotide_phylip workflow
open Bistro.EDSL
open Bistro.Std
open Core
open File_formats
open Bistro_bioinfo.Std
type pcoc_out
type diffsel_out
type det_out =
| Pcoc_out
| Diffsel_out
let pcoc ?gamma ~(faa:aminoacid_fasta workflow) ~(tree:_ workflow) : (*`pcoc*) det_out directory workflow =
let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"06212018" () in
workflow ~descr:"convergence_detection.pcoc" [
cmd "pcoc_det.py" ~env [
opt "-t" dep tree;
opt "-m" string "-";
opt "-aa" dep faa ;
opt "-o" ident dest ;
option ( flag string "--gamma" ) gamma;
]
]
let diffsel ~(phy_n:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:int) ~(n_cycles: int) : (*`diffsel*) det_out directory workflow =
let env = docker_image ~account:"vlanore" ~name:"diffsel" ~tag:"v1.0" () in
(*_build/diffsel -t data/samhd1.tree -d data/samhd1.ali -ncond 3 -x 1 10000 myrun*)
workflow ~descr:"convergence_detection.diffsel" [
mkdir_p dest;
cmd "_build/diffsel" ~env [
opt "-t" dep tree;
opt "-d" dep phy_n ;
opt "-ncond" int 2 ;
opt "-x" seq [ int w_every; string " "; int n_cycles];
seq [ ident dest ; string "/myrun"] ;
]
]
open Bistro.EDSL
open Bistro.Std
open Core
open File_formats
open Bistro_bioinfo.Std
type pcoc_out
type diffsel_out
type det_out =
| Pcoc_out
| Diffsel_out
val pcoc :
?gamma : bool ->
faa : aminoacid_fasta workflow ->
tree : _ workflow ->
(*[`pcoc]*) det_out directory workflow
val diffsel :
phy_n : nucleotide_phylip workflow ->
tree : _ workflow ->
w_every : int ->
n_cycles: int ->
(*[`diffsel]*) det_out directory workflow
......@@ -18,7 +18,12 @@ end
class type aminoacid_fasta = object
inherit fasta
method alphabet : [`Nucleotide]
method alphabet : [`Aminoacid]
end
class type nucleotide_phylip = object
inherit text_file
method format : [`Nucleotide]
end
......@@ -43,7 +48,6 @@ type ready_dataset = {
faa: aminoacid_fasta workflow ;
}
type ('a,'b) w_dataset =
| Raw_dataset of 'a
| Ready_dataset of 'b
......@@ -54,3 +58,26 @@ type dataset = {
tree_prefix : string ;
ready_dataset : ready_dataset
}
type det_meth =
| Pcoc
| Pcoc_gamma
| Diffsel
let string_of_det_meth = function
| Pcoc -> "pcoc"
| Pcoc_gamma -> "pcoc_gamma"
| Diffsel -> "diffsel"
type 'a w_det_meth =
| Pcoc_w of 'a
| Pcoc_gamma_w of 'a
| Diffsel of 'a
type 'a det_result = {
dataset : dataset ;
det_meth : det_meth ;
det_result : 'a workflow
}
......@@ -50,7 +50,6 @@ let repo_of_dataset_l ~preview dataset_l =
| Raw_dataset d -> repo_of_raw_dataset d
in
Repo.shift tree_prefix (Repo.shift model_prefix repo_d)
(*repo_d*)
)
|> List.concat
......@@ -74,11 +73,57 @@ let derive_from_tree ~tree_dir ~tree ~preview =
derive_from_model ~model ~tree ~tree_dataset ~tree_prefix ~preview
)
let derive ~tree_dir ~trees ~preview =
let derive_sim ~tree_dir ~trees ~preview =
List.map trees ~f:(fun tree ->
derive_from_tree ~tree_dir ~tree ~preview)
|> List.concat
let repo_of_det_results_l ~det_results_l =
List.map det_results_l ~f:(fun det_results ->
let model_prefix = det_results.dataset.model_prefix in
let tree_prefix = det_results.dataset.tree_prefix in
let det_meth_prefix = string_of_det_meth det_results.det_meth in
let open Convergence_detection in
let w = det_results.det_result
in
let repo_d = Repo.[
item [det_meth_prefix] (w (*/ selector ["RUN*"]*)) ]
in
Repo.shift tree_prefix (Repo.shift model_prefix repo_d)
)
|> List.concat
let derive_from_det_meth ~det_meth ~dataset ~preview =
let open Convergence_detection in
let faa = dataset.ready_dataset.faa in
let fna = dataset.ready_dataset.fna in
let phy_n = Bppsuite.fa2phy ~fna in
let pcoc_tree = Tree_dataset.tree dataset.ready_dataset.tree_dataset `Detection in
let diffsel_tree = Tree_dataset.diffsel_tree dataset.ready_dataset.tree_dataset in
let w_every = if preview then 1 else 10 in
let n_cycles = if preview then 100 else 1000 in
let det_result = match det_meth with
| Pcoc -> Convergence_detection.pcoc ~gamma:false ~faa ~tree:pcoc_tree
| Pcoc_gamma -> Convergence_detection.pcoc ~gamma:true ~faa ~tree:pcoc_tree
| Diffsel -> Convergence_detection.diffsel ~phy_n ~tree:diffsel_tree ~w_every ~n_cycles
in
{det_meth; det_result; dataset}
let derive_from_dataset ~dataset ~preview=
let det_meths = [Pcoc;Pcoc_gamma;Diffsel] in
List.map det_meths ~f:(fun det_meth ->
derive_from_det_meth ~det_meth ~dataset ~preview
)
let derive_det ~dataset_l ~preview=
List.map dataset_l ~f:(fun dataset ->
derive_from_dataset ~preview ~dataset)
|> List.concat
let main ~outdir ?(np = 2) ?(mem = 2) ~tree_dir ~preview () =
let logger =
Logger.tee [
......@@ -86,8 +131,14 @@ let main ~outdir ?(np = 2) ?(mem = 2) ~tree_dir ~preview () =
Dot_output.create "dag.dot" (*dot -Tpdf example/dag.dot -o dag.pdf*)
] in
let trees = Array.to_list @@ Sys.readdir tree_dir in
let dataset_l = derive ~tree_dir ~trees ~preview in
let repo = repo_of_dataset_l dataset_l ~preview in
let dataset_l = derive_sim ~tree_dir ~trees ~preview in
let det_results_l = derive_det ~dataset_l ~preview in
let repo = [
repo_of_dataset_l dataset_l ~preview ;
repo_of_det_results_l det_results_l;
]
|> List.concat
in
Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo
let command =
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment