Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
b1db8894
Commit
b1db8894
authored
Jun 18, 2020
by
Philippe Veber
Browse files
update wrt bistro
parent
fecafac7
Changes
17
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
366 additions
and
389 deletions
+366
-389
lib/bppsuite.ml
lib/bppsuite.ml
+19
-21
lib/convergence_detection.ml
lib/convergence_detection.ml
+31
-35
lib/diffsel.ml
lib/diffsel.ml
+50
-56
lib/diffseldsparse.ml
lib/diffseldsparse.ml
+65
-73
lib/identical.ml
lib/identical.ml
+16
-20
lib/msd.ml
lib/msd.ml
+8
-8
lib/multinomial.ml
lib/multinomial.ml
+2
-2
lib/pcoc.ml
lib/pcoc.ml
+4
-4
lib/phyml.ml
lib/phyml.ml
+15
-16
lib/post_analyses.ml
lib/post_analyses.ml
+66
-56
lib/profile.ml
lib/profile.ml
+18
-20
lib/raw_dataset.ml
lib/raw_dataset.ml
+3
-3
lib/ready_dataset.ml
lib/ready_dataset.ml
+2
-2
lib/tamuri.ml
lib/tamuri.ml
+23
-25
lib/testnh.ml
lib/testnh.ml
+21
-23
lib/topological.ml
lib/topological.ml
+9
-11
lib/tree_dataset.ml
lib/tree_dataset.ml
+14
-14
No files found.
lib/bppsuite.ml
View file @
b1db8894
...
...
@@ -127,19 +127,17 @@ rate_distribution=Constant()
let
ne_g
=
Convergence_hypothesis
.
neg_of_model
hypothesis
in
let
ne_c
=
Convergence_hypothesis
.
nec_of_model
hypothesis
in
let
ne_a
=
ne_g
in
Workflow
.
shell
~
descr
:
(
"bppsuite.bppseqgen"
^
descr
)
[
within_container
img
(
and_list
[
mkdir_p
dest
;
mkdir_p
tmp
;
cd
tmp
;
cmd
"cat"
~
stdout
:
config_f
[
file_dump
(
conf_file_bppseqgen_multi_profiles
~
tree
~
profile_f
~
hypothesis
~
ne_c
~
ne_a
~
nb_sites_per_profile
)
;
dep
nodes
;
];
cmd
"bash"
[
file_dump
(
bppseqgen_multi_profiles_script
~
config
:
config_f
~
out
~
profile_c
:
profile_c_ok
~
seed
)];
]
)
Workflow
.
shell
~
descr
:
(
"bppsuite.bppseqgen"
^
descr
)
~
img
[
and_list
[
mkdir_p
dest
;
mkdir_p
tmp
;
cd
tmp
;
cmd
"cat"
~
stdout
:
config_f
[
file_dump
(
conf_file_bppseqgen_multi_profiles
~
tree
~
profile_f
~
hypothesis
~
ne_c
~
ne_a
~
nb_sites_per_profile
)
;
dep
nodes
;
];
cmd
"bash"
[
file_dump
(
bppseqgen_multi_profiles_script
~
config
:
config_f
~
out
~
profile_c
:
profile_c_ok
~
seed
)];
]
]
let
alignment
run_bppseqgen_multi_profiles
:
nucleotide_fasta
file
=
...
...
@@ -167,8 +165,8 @@ let conf_file_bppseqman_fna2faa ~fna =
]
let
fna2faa
(
fna
:
nucleotide_fasta
file
)
:
aminoacid_fasta
file
=
Workflow
.
shell
~
descr
:
"bppsuite.fna2faa"
[
cmd
"bppseqman"
~
img
[
Workflow
.
shell
~
descr
:
"bppsuite.fna2faa"
~
img
[
cmd
"bppseqman"
[
assign
"param"
(
file_dump
(
conf_file_bppseqman_fna2faa
~
fna
))
;
]
]
...
...
@@ -198,23 +196,23 @@ let conf_file_bppseqman_faa2phy ~faa =
]
let
fna2phy
~
(
fna
:
nucleotide_fasta
file
)
:
nucleotide_phylip
file
=
Workflow
.
shell
~
descr
:
"bppsuite.fna2phy_interleaved"
[
cmd
"bppseqman"
~
img
[
Workflow
.
shell
~
descr
:
"bppsuite.fna2phy_interleaved"
~
img
[
cmd
"bppseqman"
[
assign
"param"
(
file_dump
(
conf_file_bppseqman_fna2phy
~
fna
))
;
]
]
let
faa2phy
~
(
faa
:
aminoacid_fasta
file
)
:
aminoacid_phylip
file
=
Workflow
.
shell
~
descr
:
"bppsuite.faa2phy_interleaved"
[
cmd
"bppseqman"
~
img
[
Workflow
.
shell
~
descr
:
"bppsuite.faa2phy_interleaved"
~
img
[
cmd
"bppseqman"
[
assign
"param"
(
file_dump
(
conf_file_bppseqman_faa2phy
~
faa
))
;
]
]
let
paste_fna
~
(
fna_l
:
nucleotide_fasta
file
list
)
:
nucleotide_fasta
file
=
Workflow
.
shell
~
descr
:
"bppsuite.catfasta"
[
cmd
"catfasta2phyml.pl"
~
stdout
:
dest
~
img
(
List
.
concat
[
Workflow
.
shell
~
descr
:
"bppsuite.catfasta"
~
img
[
cmd
"catfasta2phyml.pl"
~
stdout
:
dest
(
List
.
concat
[
[
string
"-f"
]
;
List
.
map
fna_l
~
f
:
(
fun
fna
->
dep
fna
)
;
])
...
...
lib/convergence_detection.ml
View file @
b1db8894
...
...
@@ -69,8 +69,8 @@ let merge_results ?fna_infos ~(res_by_tools : result list) () : text file =
seq
~
sep
:
" "
[
opt
;
dep
w
]
)
in
Workflow
.
shell
~
descr
:
"convergence_detection.merge_results"
[
cmd
"python"
~
img
:
Env
.
env_py
[
Workflow
.
shell
~
descr
:
"convergence_detection.merge_results"
~
img
:
Env
.
env_py
[
cmd
"python"
[
file_dump
(
string
Scripts
.
merge_det_results
)
;
opt
"-o"
ident
dest
;
seq
~
sep
:
" "
command
;
...
...
@@ -79,8 +79,8 @@ let merge_results ?fna_infos ~(res_by_tools : result list) () : text file =
]
let
merge_result_tables
?
fna_infos
?
oracle
?
multinomial
?
tdg09
?
identical
?
topological
?
pcoc
?
pcoc_v2
?
pcoc_pcp
?
diffsel
?
diffseldsparse
()
:
text
file
=
Workflow
.
shell
~
descr
:
"convergence_detection.merge_results"
[
cmd
"python"
~
img
:
Env
.
env_py
[
Workflow
.
shell
~
descr
:
"convergence_detection.merge_results"
~
img
:
Env
.
env_py
[
cmd
"python"
[
file_dump
(
string
Scripts
.
merge_det_results
)
;
opt
"-o"
ident
dest
;
option
(
opt
"--multinomial"
dep
)
multinomial
;
...
...
@@ -141,50 +141,46 @@ let plot_merge_results ?t_choices ~plot_all_sites ~(res_by_tools:result list) ~t
in
let
out
=
dest
//
"results.svg"
in
let
inner
=
Workflow
.
shell
~
descr
:
"convergence_detection.plot_results"
[
within_container
img
(
and_list
[
mkdir_p
dest
;
cmd
"python"
[
Utils
.
script_dump
Scripts
.[
diffsel_script_utils
;
plot_data
;
plot_convergent_sites
]
;
opt
"-msa"
dep
faa
;
opt
"-tsv"
dep
tsv
;
opt
"-tree"
dep
tree
;
opt
"-out"
ident
out
;
opt
"-meth"
ident
meths
;
option
(
opt
"-t"
ident
)
meths_t
;
option
(
opt
"--t_tsv"
dep
)
t_choices
;
flag
string
"--all_sites"
plot_all_sites
;
]
]
)
]
in
Workflow
.
select
inner
[
"results.svg"
]
let
plot_convergent_sites
?
(
plot_all_sites
=
true
)
~
alignment
~
detection_results
~
tree
()
=
Workflow
.
shell
~
descr
:
"plot_convergent_sites.py"
[
within_container
Env
.
env_pcoc
(
Workflow
.
shell
~
descr
:
"convergence_detection.plot_results"
~
img
[
and_list
[
mkdir_p
dest
;
cmd
"python"
[
Utils
.
script_dump
Scripts
.[
diffsel_script_utils
;
plot_data
;
plot_convergent_sites
]
;
opt
"-
tsv
"
dep
detection_results
;
opt
"-
msa
"
dep
alignment
;
opt
"-
msa
"
dep
faa
;
opt
"-
tsv
"
dep
tsv
;
opt
"-tree"
dep
tree
;
opt
"-out"
ident
(
dest
//
"plot.svg"
)
;
opt
"-out"
ident
out
;
opt
"-meth"
ident
meths
;
option
(
opt
"-t"
ident
)
meths_t
;
option
(
opt
"--t_tsv"
dep
)
t_choices
;
flag
string
"--all_sites"
plot_all_sites
;
]
]
)
]
in
Workflow
.
select
inner
[
"results.svg"
]
let
plot_convergent_sites
?
(
plot_all_sites
=
true
)
~
alignment
~
detection_results
~
tree
()
=
Workflow
.
shell
~
descr
:
"plot_convergent_sites.py"
~
img
:
Env
.
env_pcoc
[
and_list
[
mkdir_p
dest
;
cmd
"python"
[
Utils
.
script_dump
Scripts
.[
diffsel_script_utils
;
plot_data
;
plot_convergent_sites
]
;
opt
"-tsv"
dep
detection_results
;
opt
"-msa"
dep
alignment
;
opt
"-tree"
dep
tree
;
opt
"-out"
ident
(
dest
//
"plot.svg"
)
;
flag
string
"--all_sites"
plot_all_sites
;
]
]
]
|>
Fn
.
flip
Workflow
.
select
[
"plot.svg"
]
let
recall_precision_curve
table
=
let
img
=
[
docker_image
~
account
:
"pveber"
~
name
:
"r_basics"
~
tag
:
"20190710"
()
]
in
Workflow
.
shell
~
descr
:
"recall_precision_curve"
[
cmd
"Rscript"
~
img
[
Workflow
.
shell
~
descr
:
"recall_precision_curve"
~
img
[
cmd
"Rscript"
[
file_dump
(
string
Scripts
.
recall_precision_curve
)
;
dep
table
;
dest
;
...
...
lib/diffsel.ml
View file @
b1db8894
...
...
@@ -61,27 +61,25 @@ let diffsel ~(phy_n:nucleotide_phylip file) ~(tree: _ file) ~(w_every:int) ~(n_c
let
n_cycles
=
if
(
n_cycles
>
200
)
then
20
else
n_cycles
in
let
script_r
=
tmp
//
"DiffselMCMCConvergenceAnalysis.Rmd"
in
(*_build/diffsel -t data/samhd1.tree -d data/samhd1.ali -ncond 3 -x 1 10000 myrun*)
Workflow
.
shell
~
descr
:
(
"convergence_detection.run_diffsel."
^
descr
)
[
within_container
env
(
and_list
[
mkdir_p
dest
;
cd
tmp
;
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffselMCMCConvergenceAnalysis
)
;
script_r
]
;
cmd
"cp"
[
dep
phy_n
;
dest_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
tree
;
dest_tree
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
phy_n
;
tmp_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
tree
;
tmp_tree
];
(* required dep to link the file in the env *)
cmd
"/diffsel/_build/diffsel"
[
opt
"-t"
ident
tmp_tree
;
opt
"-d"
ident
tmp_ali
;
opt
"-ncond"
int
2
;
opt
"-x"
seq
[
int
w_every
;
string
" "
;
int
n_cycles
];
option
(
opt
"-seed"
int
)
seed
;
ident
chainname_tmp
;
];
cmd
"bash"
[(
file_dump
(
diffsel_add_iterations_script
~
chainname
~
ali
:
tmp_ali
~
tree
:
tmp_tree
~
seed
))];
]
)
Workflow
.
shell
~
descr
:
(
"convergence_detection.run_diffsel."
^
descr
)
~
img
:
env
[
and_list
[
mkdir_p
dest
;
cd
tmp
;
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffselMCMCConvergenceAnalysis
)
;
script_r
]
;
cmd
"cp"
[
dep
phy_n
;
dest_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
tree
;
dest_tree
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
phy_n
;
tmp_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
tree
;
tmp_tree
];
(* required dep to link the file in the env *)
cmd
"/diffsel/_build/diffsel"
[
opt
"-t"
ident
tmp_tree
;
opt
"-d"
ident
tmp_ali
;
opt
"-ncond"
int
2
;
opt
"-x"
seq
[
int
w_every
;
string
" "
;
int
n_cycles
];
option
(
opt
"-seed"
int
)
seed
;
ident
chainname_tmp
;
];
cmd
"bash"
[(
file_dump
(
diffsel_add_iterations_script
~
chainname
~
ali
:
tmp_ali
~
tree
:
tmp_tree
~
seed
))];
]
]
let
check_conv
run_diffsel
:
[
`diffsel_check_conv
]
directory
=
...
...
@@ -90,24 +88,22 @@ let check_conv run_diffsel : [`diffsel_check_conv] directory =
let
trace
=
Workflow
.
select
run_diffsel
[
"myrun.trace"
]
in
let
out
=
dest
//
"out.html"
in
let
nb_new_iterations
=
dest
//
"new_iterations.txt"
in
Workflow
.
shell
~
descr
:
"convergence_detection.DiffselMCMCConvergenceAnalysis"
[
within_container
env
(
and_list
[
mkdir_p
tmp
;
mkdir_p
dest
;
cd
tmp
;
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffselMCMCConvergenceAnalysis
)
;
script
]
;
cmd
"Rscript"
[
string
"-e"
;
string
{
|
"rmarkdown::render(
\"
DiffselMCMCConvergenceAnalysis.Rmd
\"
,|} ;
string {|params=list(set_trace1=
\"
|} ;
dep trace ;
string {|
\"
))"
|
};
]
;
cmd
"cp"
[
string
"DiffselMCMCConvergenceAnalysis.html"
;
ident
out
]
;
cmd
"cp"
[
string
"new_iterations.txt"
;
ident
nb_new_iterations
]
]
)
Workflow
.
shell
~
descr
:
"convergence_detection.DiffselMCMCConvergenceAnalysis"
~
img
:
env
[
and_list
[
mkdir_p
tmp
;
mkdir_p
dest
;
cd
tmp
;
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffselMCMCConvergenceAnalysis
)
;
script
]
;
cmd
"Rscript"
[
string
"-e"
;
string
{
|
"rmarkdown::render(
\"
DiffselMCMCConvergenceAnalysis.Rmd
\"
,|} ;
string {|params=list(set_trace1=
\"
|} ;
dep trace ;
string {|
\"
))"
|
};
]
;
cmd
"cp"
[
string
"DiffselMCMCConvergenceAnalysis.html"
;
ident
out
]
;
cmd
"cp"
[
string
"new_iterations.txt"
;
ident
nb_new_iterations
]
]
]
let
selector
run_diffsel
:
text
file
=
...
...
@@ -120,25 +116,23 @@ let selector run_diffsel : text file =
let
dep_ali
=
(
dep
run_diffsel
)
//
"myrun.ali"
in
let
chainname
=
(
dep
run_diffsel
)
//
"myrun"
in
let
out
=
dest
in
Workflow
.
shell
~
descr
:
"convergence_detection.parse_diffsel"
[
within_container
env
(
and_list
[
mkdir_p
tmp
;
cd
tmp
;
Workflow
.
shell
~
descr
:
"convergence_detection.parse_diffsel"
~
img
:
env
[
and_list
[
mkdir_p
tmp
;
cd
tmp
;
cmd
"cp"
[
dep_ali
;
tmp_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep_tree
;
tmp_tree
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep_ali
;
tmp_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep_tree
;
tmp_tree
];
(* required dep to link the file in the env *)
(*python diffsel_analyze_result.py [-r /path/to/readdiffsel] [-o output_file] chainname *)
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffsel_script_utils
)
;
package
]
;
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffsel_analyze_result
)
;
script
]
;
(*python diffsel_analyze_result.py [-r /path/to/readdiffsel] [-o output_file] chainname *)
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffsel_script_utils
)
;
package
]
;
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffsel_analyze_result
)
;
script
]
;
cmd
"python"
[
string
"diffsel_analyze_result.py"
;
opt
"-r"
string
"/diffsel/_build/readdiffsel"
;
opt
"-o"
ident
out
;
ident
chainname
;
]
cmd
"python"
[
string
"diffsel_analyze_result.py"
;
opt
"-r"
string
"/diffsel/_build/readdiffsel"
;
opt
"-o"
ident
out
;
ident
chainname
;
]
)
]
]
lib/diffseldsparse.ml
View file @
b1db8894
...
...
@@ -53,30 +53,28 @@ let diffseldsparse
let
chainname
=
dest
//
"myrun"
in
let
n_cycles
=
if
(
n_cycles
>
200
)
then
200
else
n_cycles
in
let
script_r
=
tmp
//
"DiffselMCMCConvergenceAnalysis.Rmd"
in
Workflow
.
shell
~
descr
:
"convergence_detection.run_diffseldsparse"
[
within_container
img
(
and_list
[
mkdir_p
dest
;
cd
tmp
;
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffselMCMCConvergenceAnalysis
)
;
script_r
]
;
cmd
"cp"
[
dep
alignment
;
dest_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
tree
;
dest_tree
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
alignment
;
tmp_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
tree
;
tmp_tree
];
(* required dep to link the file in the env *)
cmd
"/bayescode/data/diffseldsparse"
[
opt
"-t"
ident
tmp_tree
;
opt
"-d"
ident
tmp_ali
;
option
(
opt
"-pi"
float
)
pi
;
option
(
opt
"-shiftprob"
(
fun
(
mean
,
invconv
)
->
seq
~
sep
:
" "
[
float
mean
;
float
invconv
]))
shiftprob
;
option
(
opt
"-eps"
float
)
eps
;
opt
"-ncond"
int
2
;
opt
"-x"
seq
[
int
w_every
;
string
" "
;
int
n_cycles
];
(* opt "-seed" int seed ; *)
(* not yet implemented *)
ident
chainname_tmp
;
];
cmd
"bash"
[(
file_dump
(
diffseldsparse_add_iterations_script
~
chainname
~
ali
:
tmp_ali
~
tree
:
tmp_tree
))];
]
)
Workflow
.
shell
~
descr
:
"convergence_detection.run_diffseldsparse"
~
img
[
and_list
[
mkdir_p
dest
;
cd
tmp
;
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffselMCMCConvergenceAnalysis
)
;
script_r
]
;
cmd
"cp"
[
dep
alignment
;
dest_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
tree
;
dest_tree
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
alignment
;
tmp_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
tree
;
tmp_tree
];
(* required dep to link the file in the env *)
cmd
"/bayescode/data/diffseldsparse"
[
opt
"-t"
ident
tmp_tree
;
opt
"-d"
ident
tmp_ali
;
option
(
opt
"-pi"
float
)
pi
;
option
(
opt
"-shiftprob"
(
fun
(
mean
,
invconv
)
->
seq
~
sep
:
" "
[
float
mean
;
float
invconv
]))
shiftprob
;
option
(
opt
"-eps"
float
)
eps
;
opt
"-ncond"
int
2
;
opt
"-x"
seq
[
int
w_every
;
string
" "
;
int
n_cycles
];
(* opt "-seed" int seed ; *)
(* not yet implemented *)
ident
chainname_tmp
;
];
cmd
"bash"
[(
file_dump
(
diffseldsparse_add_iterations_script
~
chainname
~
ali
:
tmp_ali
~
tree
:
tmp_tree
))];
]
]
let
check_conv
run_diffseldsparse
:
[
`diffseldsparse_check_conv
]
directory
=
...
...
@@ -84,43 +82,39 @@ let check_conv run_diffseldsparse : [`diffseldsparse_check_conv] directory =
let
trace
=
Workflow
.
select
run_diffseldsparse
[
"myrun.trace"
]
in
let
out
=
dest
//
"out.html"
in
let
nb_new_iterations
=
dest
//
"new_iterations.txt"
in
Workflow
.
shell
~
descr
:
"convergence_detection.DiffselMCMCConvergenceAnalysis"
[
within_container
Env
.
env_r
(
and_list
[
mkdir_p
tmp
;
mkdir_p
dest
;
cd
tmp
;
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffselMCMCConvergenceAnalysis
)
;
script
]
;
cmd
"Rscript"
[
string
"-e"
;
string
{
|
"rmarkdown::render(
\"
DiffselMCMCConvergenceAnalysis.Rmd
\"
,|} ;
string {|params=list(set_trace1=
\"
|} ;
dep trace ;
string {|
\"
))"
|
};
]
;
cmd
"cp"
[
string
"DiffselMCMCConvergenceAnalysis.html"
;
ident
out
]
;
cmd
"cp"
[
string
"new_iterations.txt"
;
ident
nb_new_iterations
]
]
)
Workflow
.
shell
~
descr
:
"convergence_detection.DiffselMCMCConvergenceAnalysis"
~
img
:
Env
.
env_r
[
and_list
[
mkdir_p
tmp
;
mkdir_p
dest
;
cd
tmp
;
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffselMCMCConvergenceAnalysis
)
;
script
]
;
cmd
"Rscript"
[
string
"-e"
;
string
{
|
"rmarkdown::render(
\"
DiffselMCMCConvergenceAnalysis.Rmd
\"
,|} ;
string {|params=list(set_trace1=
\"
|} ;
dep trace ;
string {|
\"
))"
|
};
]
;
cmd
"cp"
[
string
"DiffselMCMCConvergenceAnalysis.html"
;
ident
out
]
;
cmd
"cp"
[
string
"new_iterations.txt"
;
ident
nb_new_iterations
]
]
]
let
readdiffseldsparse
run
=
let
tmp_chain
=
tmp
//
"myrun"
in
Workflow
.
shell
~
descr
:
"readdiffseldsparse"
[
within_container
img
(
and_list
[
mkdir_p
tmp
;
cmd
"ln"
[
string
"-s"
;
(
dep
run
)
//
"*"
;
tmp
];
(* required dep to link the file in the env *)
cmd
"/bayescode/data/readdiffseldsparse"
[
opt
"-x"
string
"300 1 1000"
;
ident
tmp_chain
;
]
;
mkdir_p
dest
;
cmd
"mv"
[
tmp
//
"*pp"
;
dest
]
;
]
)
Workflow
.
shell
~
descr
:
"readdiffseldsparse"
~
img
[
and_list
[
mkdir_p
tmp
;
cmd
"ln"
[
string
"-s"
;
(
dep
run
)
//
"*"
;
tmp
];
(* required dep to link the file in the env *)
cmd
"/bayescode/data/readdiffseldsparse"
[
opt
"-x"
string
"300 1 1000"
;
ident
tmp_chain
;
]
;
mkdir_p
dest
;
cmd
"mv"
[
tmp
//
"*pp"
;
dest
]
;
]
]
let
posterior_probabilities
run_diffseldsparse
:
text
file
=
...
...
@@ -130,23 +124,21 @@ let posterior_probabilities run_diffseldsparse : text file =
let
dep_ali
=
(
dep
run_diffseldsparse
)
//
"myrun.ali"
in
let
chainname
=
(
dep
run_diffseldsparse
)
//
"myrun"
in
let
out
=
dest
in
Workflow
.
shell
~
descr
:
"convergence_detection.parse_diffseldsparse"
[
within_container
img
(
and_list
[
mkdir_p
tmp
;
cd
tmp
;
cmd
"cp"
[
dep_ali
;
tmp_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep_tree
;
tmp_tree
];
(* required dep to link the file in the env *)
cmd
"python"
[
Utils
.
script_dump
Scripts
.[
diffsel_script_utils
;
diffseldsparse_analyze_result
]
;
opt
"-r"
string
"/bayescode/data/readdiffseldsparse"
;
opt
"-o"
ident
out
;
ident
chainname
;
]
Workflow
.
shell
~
descr
:
"convergence_detection.parse_diffseldsparse"
~
img
[
and_list
[
mkdir_p
tmp
;
cd
tmp
;
cmd
"cp"
[
dep_ali
;
tmp_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep_tree
;
tmp_tree
];
(* required dep to link the file in the env *)
cmd
"python"
[
Utils
.
script_dump
Scripts
.[
diffsel_script_utils
;
diffseldsparse_analyze_result
]
;
opt
"-r"
string
"/bayescode/data/readdiffseldsparse"
;
opt
"-o"
ident
out
;
ident
chainname
;
]
)
]
]
let
%
pworkflow
results
dir
=
...
...
lib/identical.ml
View file @
b1db8894
...
...
@@ -36,16 +36,14 @@ let bppml ?(descr="") ~faa ~tree ~config : _ workflow =
let
env
=
Env
.
env_bppsuite
in
let
config_f
=
dest
//
"config_bppml.bpp"
in
let
out
=
ident
dest
in
Workflow
.
shell
~
descr
:
(
"bppsuite.bppml"
^
descr
)
[
within_container
env
(
and_list
[
mkdir_p
dest
;
cmd
"cat"
~
stdout
:
config_f
[(
file_dump
(
conf_file_bppml
~
tree
~
faa
~
out
~
config
))];
cmd
"bppml"
[
assign
"param"
config_f
;
]
Workflow
.
shell
~
descr
:
(
"bppsuite.bppml"
^
descr
)
~
img
:
env
[
and_list
[
mkdir_p
dest
;
cmd
"cat"
~
stdout
:
config_f
[(
file_dump
(
conf_file_bppml
~
tree
~
faa
~
out
~
config
))];
cmd
"bppml"
[
assign
"param"
config_f
;
]
)
]
]
let
conf_file_bppancestor
~
tree
~
faa
~
out
~
config
=
...
...
@@ -79,16 +77,14 @@ let bppancestor ?(descr="") ~faa ~tree ~config : _ workflow =
let
env
=
Env
.
env_bppsuite
in
let
config_f
=
dest
//
"config_bppancestor.bpp"
in
let
out
=
ident
dest
in
Workflow
.
shell
~
descr
:
(
"bppsuite.bppancestor"
^
descr
)
[
within_container
env
(
and_list
[
mkdir_p
dest
;
cmd
"cat"
~
stdout
:
config_f
[(
file_dump
(
conf_file_bppancestor
~
tree
~
faa
~
out
~
config
))];
cmd
"bppancestor"
[
assign
"param"
config_f
;
]
Workflow
.
shell
~
descr
:
(
"bppsuite.bppancestor"
^
descr
)
~
img
:
env
[
and_list
[
mkdir_p
dest
;
cmd
"cat"
~
stdout
:
config_f
[(
file_dump
(
conf_file_bppancestor
~
tree
~
faa
~
out
~
config
))];
cmd
"bppancestor"
[
assign
"param"
config_f
;
]
)
]
]
let
identical
?
(
descr
=
""
)
~
(
tree_id
:_
file
)
~
(
tree_sc
:_
file
)
~
(
faa
:
aminoacid_fasta
file
)
~
prot_model
()
:
[
`identical
]
directory
=
...
...
@@ -97,9 +93,9 @@ let identical ?(descr="") ~(tree_id:_ file) ~(tree_sc:_ file) ~(faa:aminoacid_fa
let
out2
=
dest
//
"out2.tsv"
in
let
run_bppancestor
=
bppancestor
~
descr
:
""
~
tree
:
tree_id
~
faa
~
config
in
let
proba
=
Workflow
.
select
run_bppancestor
[
"sites.tsv"
]
in
Workflow
.
shell
~
descr
:
(
"identical."
^
prot_model
^
"."
^
descr
)
[
Workflow
.
shell
~
descr
:
(
"identical."
^
prot_model
^
"."
^
descr
)
~
img
:
Env
.
env_pcoc
[
mkdir
dest
;
cmd
"python"
~
img
:
Env
.
env_pcoc
[
cmd
"python"
[
file_dump
(
string
Scripts
.
calc_identical
)
;
opt
"-t"
dep
tree_sc
;
opt
"-a"
dep
faa
;
...
...
lib/msd.ml
View file @
b1db8894
...
...
@@ -10,29 +10,29 @@ let msd ?(descr="") ~e ~(faa : aminoacid_fasta file) ~(tree_sc : _ file) : [`msd
let
tree_nw
=
dest
//
"tree.nw"
in
let
out
=
dest
//
"out.tsv"
in
let
fa_tmp
=
tmp
//
"out.tsv"
in
Workflow
.
shell
~
descr
:
(
"convergence_detection.run_msd."
^
descr
)
[
Workflow
.
shell
~
descr
:
(
"convergence_detection.run_msd."
^
descr
)
~
img
[
mkdir_p
dest
;
mkdir_p
tmp
;
(*./msd -t 1 -o <nom fichier de sortie> -e 0.05 <phylogénie Newick> <table caractère convergent> <fichier de simulation fasta> *)
cmd
"python"
~
img
[
cmd
"python"
[
file_dump
(
string
Scripts
.
parse_input_msd
)
;
opt
"-i"
dep
tree_sc
;
opt
"-o"
ident
tree_nw
;
opt
"-m"
ident
map_table
;
];
cmd
"cp"
~
img
[
cmd
"cp"
[
dep
faa
;
ident
fa_tmp
];
cmd
"cat"
~
img
[
cmd
"cat"
[
ident
fa_tmp
];
cmd
"grep"
~
img
[
cmd
"grep"
[
string
"-c"
;
string
"
\"
>
\"
"
;
ident
fa_tmp
];
cmd
"msd"
~
img
[
cmd
"msd"
[
opt
"-t"
int
1
;
opt
"-o"
ident
out
;
opt
"-e"
float
e
;
...
...
@@ -43,8 +43,8 @@ let msd ?(descr="") ~e ~(faa : aminoacid_fasta file) ~(tree_sc : _ file) : [`msd
]
let
results
run_msd
:
text
file
=
Workflow
.
shell
~
descr
:
"convergence_detection.parse_msd"
[
cmd
"python"
~
img
[
Workflow
.
shell
~
descr
:
"convergence_detection.parse_msd"
~
img
[
cmd
"python"
[
file_dump
(
string
Scripts
.
parse_output_msd
)
;
opt
"-i"
dep
(
Workflow
.
select
run_msd
[
"out.tsv"
]);
opt
"-o"
ident
dest
;
...
...
lib/multinomial.ml
View file @
b1db8894
...
...
@@ -5,8 +5,8 @@ open File_formats
let
multinomial
?
(
descr
=
""
)
~
(
tree_sc
:_
file
)
~
(
faa
:
aminoacid_fasta
file
)
()
:
text
file
=
let
img
=
Env
.
env_py
in
Workflow
.
shell
~
descr
:
(
"calc_multinomial."
^
descr
)
[
cmd
"python"
~
img
[
Workflow
.
shell
~
descr
:
(
"calc_multinomial."
^
descr
)
~
img
[
cmd
"python"
[
file_dump
(
string
Scripts
.
calc_multinomial
)
;
opt
"-t"
dep
tree_sc
;
opt
"-a"
dep
faa
;
...
...
lib/pcoc.ml
View file @
b1db8894
...
...
@@ -6,8 +6,8 @@ open File_formats
let
img
=
Env
.
env_pcoc
let
pcoc
?
(
descr
=
""
)
?
plot_complete
?
gamma
?
catx_est
?
max_gap_per_pos
?
max_gap_per_conv_leaf
~
(
faa
:
aminoacid_fasta
file
)
~
(
tree
:_
workflow
)
()
:
[
`pcoc
]
directory
=
Workflow
.
shell
~
descr
:
(
"convergence_detection.pcoc."
^
descr
)
[
cmd
"pcoc_det.py"
~
img
[
Workflow
.
shell
~
descr
:
(
"convergence_detection.pcoc."
^
descr
)
~
img
[
cmd
"pcoc_det.py"
[
opt
"-t"
dep
tree
;
opt
"-m"
string
"-"
;
opt
"-aa"
dep
faa
;
...
...
@@ -42,9 +42,9 @@ let pcoc_v2 ?(descr = "") ?gamma ~(aa_profiles:aa_profiles) ~(faa:aminoacid_fast
|
`Physic_properties
->
opt
"-est_profiles"
ident
tmp_profiles
in
Workflow
.
shell
~
descr
:
(
"convergence_detection.pcoc_v2."
^
descr
)
[
Workflow
.
shell
~
descr
:
(
"convergence_detection.pcoc_v2."
^
descr
)
~
img
:
img_v2
[
cmd
"cat"
~
stdout
:
tmp_profiles
[(
file_dump
(
string
Scripts
.
physic_properties_profiles
))];
cmd
"pcoc_det.py"
~
img
:
img_v2
[
cmd
"pcoc_det.py"
[
opt
"-t"
dep
tree
;