Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
VEBER Philippe
codepi
Commits
cbb474fb
Commit
cbb474fb
authored
Jul 29, 2020
by
Philippe Veber
Browse files
Merge branch 'orthomam'
parents
7d33bbfc
fb326a34
Changes
46
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1017 additions
and
270 deletions
+1017
-270
.ocamlinit
.ocamlinit
+3
-1
app/diffseldsparse_benchmark.ml
app/diffseldsparse_benchmark.ml
+2
-2
app/dune
app/dune
+8
-0
app/orthomam_app.ml
app/orthomam_app.ml
+17
-0
lib/bppsuite.ml
lib/bppsuite.ml
+20
-22
lib/convergence_detection.ml
lib/convergence_detection.ml
+35
-37
lib/convergence_detection.mli
lib/convergence_detection.mli
+1
-1
lib/detection_pipeline.ml
lib/detection_pipeline.ml
+76
-23
lib/detection_pipeline.mli
lib/detection_pipeline.mli
+44
-0
lib/diffsel.ml
lib/diffsel.ml
+50
-56
lib/diffseldsparse.ml
lib/diffseldsparse.ml
+65
-73
lib/dune
lib/dune
+1
-1
lib/file_formats.ml
lib/file_formats.ml
+10
-0
lib/identical.ml
lib/identical.ml
+16
-20
lib/msd.ml
lib/msd.ml
+8
-8
lib/multinomial.ml
lib/multinomial.ml
+5
-6
lib/orthomam.ml
lib/orthomam.ml
+551
-0
lib/orthomam.mli
lib/orthomam.mli
+86
-0
lib/pcoc.ml
lib/pcoc.ml
+4
-4
lib/phyml.ml
lib/phyml.ml
+15
-16
No files found.
.ocamlinit
View file @
cbb474fb
module Top = Bistro_utils.Toplevel_eval.Make(struct let np = 8 let mem = 10 end)
module Top = Bistro_utils.Toplevel_eval.Make(struct let np = 8 let mem = 10 end)()
open Top
open Reviewphiltrans
let njplot (t : Biotope.Formats.newick Bistro.file) = Sys.command (Printf.sprintf "njplot %s" (path t))
(*
open Pipeline2
...
...
app/diffseldsparse_benchmark.ml
View file @
cbb474fb
...
...
@@ -14,8 +14,8 @@ let main ~n_h0 ~n_ha ~seed:i () =
}
in
let
w
=
benchmark
sim
in
(* print
_en
dl
ine
(Debug.path (multinomial sim)) ; *
)
print_endline
(
Debug
.
path
w
)
Bistro
_en
g
ine
.
Scheduler
.
simple_eval_exn
~
np
:
4
~
mem
:
(
`GB
4
)
(
Bistro
.
Workflow
.
path
w
)
|>
print_endline
let
command
=
let
open
Command
.
Let_syntax
in
...
...
app/dune
View file @
cbb474fb
...
...
@@ -13,3 +13,11 @@
(libraries reviewphiltrans)
(preprocess
(pps ppx_jane)))
(executable
(name orthomam_app)
(public_name orthomam_convergence)
(modules orthomam_app)
(libraries reviewphiltrans)
(preprocess
(pps ppx_jane)))
app/orthomam_app.ml
0 → 100644
View file @
cbb474fb
open
Reviewphiltrans
module
Top
=
Bistro_utils
.
Toplevel_eval
.
Make
(
struct
let
np
=
3
let
mem
=
10
end
)()
let
()
=
try
Reviewphiltrans_toolbox
.
Orthomam_db
.
make
"/disk/data/omm"
|>
Orthomam
.(
site_ranking
~
convergent_species
:
species_with_echolocation
~
meth
:
`tdg09
)
|>
Top
.
eval
|>
Core
.(
Fn
.
flip
List
.
take
10
)
|>
List
.
iter
Reviewphiltrans_toolbox
.
Candidate_site
.(
fun
x
->
Option
.
iter
print_endline
x
.
alignment_id
)
with
|
Failure
_
->
()
lib/bppsuite.ml
View file @
cbb474fb
...
...
@@ -127,19 +127,17 @@ rate_distribution=Constant()
let
ne_g
=
Convergence_hypothesis
.
neg_of_model
hypothesis
in
let
ne_c
=
Convergence_hypothesis
.
nec_of_model
hypothesis
in
let
ne_a
=
ne_g
in
Workflow
.
shell
~
descr
:
(
"bppsuite.bppseqgen"
^
descr
)
[
within_container
img
(
and_list
[
mkdir_p
dest
;
mkdir_p
tmp
;
cd
tmp
;
cmd
"cat"
~
stdout
:
config_f
[
file_dump
(
conf_file_bppseqgen_multi_profiles
~
tree
~
profile_f
~
hypothesis
~
ne_c
~
ne_a
~
nb_sites_per_profile
)
;
dep
nodes
;
];
cmd
"bash"
[
file_dump
(
bppseqgen_multi_profiles_script
~
config
:
config_f
~
out
~
profile_c
:
profile_c_ok
~
seed
)];
]
)
Workflow
.
shell
~
descr
:
(
"bppsuite.bppseqgen"
^
descr
)
~
img
[
and_list
[
mkdir_p
dest
;
mkdir_p
tmp
;
cd
tmp
;
cmd
"cat"
~
stdout
:
config_f
[
file_dump
(
conf_file_bppseqgen_multi_profiles
~
tree
~
profile_f
~
hypothesis
~
ne_c
~
ne_a
~
nb_sites_per_profile
)
;
dep
nodes
;
];
cmd
"bash"
[
file_dump
(
bppseqgen_multi_profiles_script
~
config
:
config_f
~
out
~
profile_c
:
profile_c_ok
~
seed
)];
]
]
let
alignment
run_bppseqgen_multi_profiles
:
nucleotide_fasta
file
=
...
...
@@ -159,7 +157,7 @@ let conf_file_bppseqman_fna2faa ~fna =
assign
"output.sequence.file"
dest
;
string
{
|
alphabet
=
Codon
(
letter
=
DNA
)
genetic_code
=
Standard
input
.
sequence
.
remove_stop_codons
=
no
input
.
sequence
.
remove_stop_codons
=
yes
input
.
sequence
.
sites_to_use
=
all
input
.
alignment
=
true
sequence
.
manip
=
Translate
...
...
@@ -167,8 +165,8 @@ let conf_file_bppseqman_fna2faa ~fna =
]
let
fna2faa
(
fna
:
nucleotide_fasta
file
)
:
aminoacid_fasta
file
=
Workflow
.
shell
~
descr
:
"bppsuite.fna2faa"
[
cmd
"bppseqman"
~
img
[
Workflow
.
shell
~
descr
:
"bppsuite.fna2faa"
~
img
[
cmd
"bppseqman"
[
assign
"param"
(
file_dump
(
conf_file_bppseqman_fna2faa
~
fna
))
;
]
]
...
...
@@ -198,23 +196,23 @@ let conf_file_bppseqman_faa2phy ~faa =
]
let
fna2phy
~
(
fna
:
nucleotide_fasta
file
)
:
nucleotide_phylip
file
=
Workflow
.
shell
~
descr
:
"bppsuite.fna2phy_interleaved"
[
cmd
"bppseqman"
~
img
[
Workflow
.
shell
~
descr
:
"bppsuite.fna2phy_interleaved"
~
img
[
cmd
"bppseqman"
[
assign
"param"
(
file_dump
(
conf_file_bppseqman_fna2phy
~
fna
))
;
]
]
let
faa2phy
~
(
faa
:
aminoacid_fasta
file
)
:
aminoacid_phylip
file
=
Workflow
.
shell
~
descr
:
"bppsuite.faa2phy_interleaved"
[
cmd
"bppseqman"
~
img
[
Workflow
.
shell
~
descr
:
"bppsuite.faa2phy_interleaved"
~
img
[
cmd
"bppseqman"
[
assign
"param"
(
file_dump
(
conf_file_bppseqman_faa2phy
~
faa
))
;
]
]
let
paste_fna
~
(
fna_l
:
nucleotide_fasta
file
list
)
:
nucleotide_fasta
file
=
Workflow
.
shell
~
descr
:
"bppsuite.catfasta"
[
cmd
"catfasta2phyml.pl"
~
stdout
:
dest
~
img
(
List
.
concat
[
Workflow
.
shell
~
descr
:
"bppsuite.catfasta"
~
img
[
cmd
"catfasta2phyml.pl"
~
stdout
:
dest
(
List
.
concat
[
[
string
"-f"
]
;
List
.
map
fna_l
~
f
:
(
fun
fna
->
dep
fna
)
;
])
...
...
lib/convergence_detection.ml
View file @
cbb474fb
...
...
@@ -69,8 +69,8 @@ let merge_results ?fna_infos ~(res_by_tools : result list) () : text file =
seq
~
sep
:
" "
[
opt
;
dep
w
]
)
in
Workflow
.
shell
~
descr
:
"convergence_detection.merge_results"
[
cmd
"python"
~
img
:
Env
.
env_py
[
Workflow
.
shell
~
descr
:
"convergence_detection.merge_results"
~
img
:
Env
.
env_py
[
cmd
"python"
[
file_dump
(
string
Scripts
.
merge_det_results
)
;
opt
"-o"
ident
dest
;
seq
~
sep
:
" "
command
;
...
...
@@ -79,8 +79,8 @@ let merge_results ?fna_infos ~(res_by_tools : result list) () : text file =
]
let
merge_result_tables
?
fna_infos
?
oracle
?
multinomial
?
tdg09
?
identical
?
topological
?
pcoc
?
pcoc_v2
?
pcoc_pcp
?
diffsel
?
diffseldsparse
()
:
text
file
=
Workflow
.
shell
~
descr
:
"convergence_detection.merge_results"
[
cmd
"python"
~
img
:
Env
.
env_py
[
Workflow
.
shell
~
descr
:
"convergence_detection.merge_results"
~
img
:
Env
.
env_py
[
cmd
"python"
[
file_dump
(
string
Scripts
.
merge_det_results
)
;
opt
"-o"
ident
dest
;
option
(
opt
"--multinomial"
dep
)
multinomial
;
...
...
@@ -141,50 +141,46 @@ let plot_merge_results ?t_choices ~plot_all_sites ~(res_by_tools:result list) ~t
in
let
out
=
dest
//
"results.svg"
in
let
inner
=
Workflow
.
shell
~
descr
:
"convergence_detection.plot_results"
[
within_container
img
(
and_list
[
mkdir_p
dest
;
cmd
"python"
[
Utils
.
script_dump
Scripts
.[
diffsel_script_utils
;
plot_data
;
plot_convergent_sites
]
;
opt
"-msa"
dep
faa
;
opt
"-tsv"
dep
tsv
;
opt
"-tree"
dep
tree
;
opt
"-out"
ident
out
;
opt
"-meth"
ident
meths
;
option
(
opt
"-t"
ident
)
meths_t
;
option
(
opt
"--t_tsv"
dep
)
t_choices
;
flag
string
"--all_sites"
plot_all_sites
;
]
]
)
]
in
Workflow
.
select
inner
[
"results.svg"
]
let
plot_convergent_sites
?
(
plot_all_sites
=
true
)
~
alignment
~
detection_results
~
tree
()
=
Workflow
.
shell
~
descr
:
"plot_convergent_sites.py"
[
within_container
Env
.
env_pcoc
(
Workflow
.
shell
~
descr
:
"convergence_detection.plot_results"
~
img
[
and_list
[
mkdir_p
dest
;
cmd
"python"
[
Utils
.
script_dump
Scripts
.[
diffsel_script_utils
;
plot_data
;
plot_convergent_sites
]
;
opt
"-
tsv
"
dep
detection_results
;
opt
"-
msa
"
dep
alignment
;
opt
"-
msa
"
dep
faa
;
opt
"-
tsv
"
dep
tsv
;
opt
"-tree"
dep
tree
;
opt
"-out"
ident
(
dest
//
"plot.svg"
)
;
opt
"-out"
ident
out
;
opt
"-meth"
ident
meths
;
option
(
opt
"-t"
ident
)
meths_t
;
option
(
opt
"--t_tsv"
dep
)
t_choices
;
flag
string
"--all_sites"
plot_all_sites
;
]
]
)
]
in
Workflow
.
select
inner
[
"results.svg"
]
let
plot_convergent_sites
?
(
plot_all_sites
=
true
)
~
alignment
~
detection_results
~
tree
()
=
Workflow
.
shell
~
descr
:
"plot_convergent_sites.py"
~
img
:
Env
.
env_pcoc
[
and_list
[
mkdir_p
dest
;
cmd
"python"
[
Utils
.
script_dump
Scripts
.[
diffsel_script_utils
;
plot_data
;
plot_convergent_sites
]
;
opt
"-tsv"
dep
detection_results
;
opt
"-msa"
dep
alignment
;
opt
"-tree"
dep
tree
;
opt
"-out"
ident
(
dest
//
"plot.svg"
)
;
flag
string
"--all_sites"
plot_all_sites
;
]
]
]
|>
Fn
.
flip
Workflow
.
select
[
"plot.svg"
]
let
recall_precision_curve
table
=
let
img
=
[
docker_image
~
account
:
"pveber"
~
name
:
"r_basics"
~
tag
:
"20190710"
()
]
in
Workflow
.
shell
~
descr
:
"recall_precision_curve"
[
cmd
"Rscript"
~
img
[
Workflow
.
shell
~
descr
:
"recall_precision_curve"
~
img
[
cmd
"Rscript"
[
file_dump
(
string
Scripts
.
recall_precision_curve
)
;
dep
table
;
dest
;
...
...
@@ -193,8 +189,10 @@ let recall_precision_curve table =
let
%
workflow
recall_precision_auc_table
table
=
let
module
RT
=
Reviewphiltrans_toolbox
.
Result_table
in
let
{
RT
.
labels
;
scores_per_meth
}
=
RT
.
of_file
[
%
path
table
]
in
Array
.
map
scores_per_meth
~
f
:
(
fun
(
meth
,
scores
)
->
let
{
RT
.
oracle
;
scores_per_meth
}
=
RT
.
of_file
[
%
path
table
]
in
let
labels
=
Option
.
value_exn
oracle
in
List
.
map
scores_per_meth
~
f
:
(
fun
(
meth
,
scores
)
->
let
scores
=
Array
.
filter_opt
scores
in
let
_
,
auc
=
Biocaml_unix
.
Bin_pred
.
recall_precision_curve
~
labels
~
scores
in
meth
,
auc
)
...
...
lib/convergence_detection.mli
View file @
cbb474fb
...
...
@@ -76,4 +76,4 @@ val oracle :
val
recall_precision_auc_table
:
text
file
->
(
string
*
float
)
array
workflow
(
string
*
float
)
list
workflow
lib/detection_pipeline.ml
View file @
cbb474fb
open
Bistro
open
File_formats
module
type
Dataset
=
sig
module
type
Query
=
sig
type
t
val
tree
:
t
->
nhx
file
val
tree
:
branch_length_unit
:
[
`Nucleotide
|
`Amino_acid
|
`Codon
]
->
t
->
nhx
file
val
nucleotide_alignment
:
t
->
nucleotide_fasta
file
end
module
Make
(
D
:
Dataset
)
=
struct
open
D
module
type
S
=
sig
type
query
val
amino_acid_alignment
:
query
->
aminoacid_fasta
file
val
gene_tree
:
query
->
nw
file
val
dn_tree
:
query
->
text
file
val
ds_tree
:
query
->
text
file
val
dnds_tree
:
query
->
text
file
val
identical
:
query
->
text
file
val
topological
:
query
->
text
file
val
multinomial
:
query
->
text
file
val
multinomial_simulation_lrt
:
query
->
text
file
val
multinomial_simulation_sparse
:
query
->
text
file
val
multinomial_asymptotic_lrt
:
query
->
text
file
val
multinomial_asymptotic_sparse
:
query
->
text
file
val
tdg09
:
query
->
text
file
val
failsafe_tdg09
:
query
->
text
file
val
pcoc
:
?
gamma
:
bool
->
?
ncat
:
int
->
query
->
text
file
val
pcoc_v2
:
?
gamma
:
bool
->
?
aa_profiles
:
Pcoc
.
aa_profiles
->
query
->
text
file
val
diffsel
:
query
->
text
file
val
diffseldsparse
:
?
pi
:
float
->
?
shiftprob
:
float
*
float
->
?
eps
:
float
->
query
->
text
file
end
module
Make
(
Q
:
Query
)
=
struct
open
Q
let
amino_acid_alignment
d
=
Bppsuite
.
fna2fa
a
(
nucleotide_alignment
d
)
Utils
.
amino_acid_fasta_of_nucleotide_fast
a
(
nucleotide_alignment
d
)
let
phylip_nucleotide_alignment
d
=
Bppsuite
.
fna2phy
~
fna
:
(
nucleotide_alignment
d
)
...
...
@@ -20,53 +53,73 @@ module Make(D : Dataset) = struct
let
gene_tree
d
=
Tree_dataset
.
raxmlng_fna
~
fna
:
(
nucleotide_alignment
d
)
()
let
%
pworkflow
tree_with_no_single_child
~
branch_length_unit
d
:
newick
file
=
let
tree_file
=
[
%
path
tree
~
branch_length_unit
d
]
in
let
open
Phylogenetics
in
let
tree
=
Newick
.
from_file
tree_file
in
let
tree
=
Newick
.
map_inner_tree
tree
~
f
:
Reviewphiltrans_toolbox
.
Convergence_tree
.
remove_nodes_with_single_child
in
Newick
.
to_file
tree
[
%
dest
]
let
identical
d
=
let
tree_sc
=
Tree_dataset
.
prepare_sc_tree
(
tree
d
)
in
let
tree_id
=
Tree_dataset
.
prepare_tree_with_node_id
(
tree
d
)
in
let
tree_sc
=
Tree_dataset
.
prepare_sc_tree
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
in
let
tree_id
=
Tree_dataset
.
prepare_tree_with_node_id
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
in
Identical
.
identical
~
tree_id
~
tree_sc
~
prot_model
:
"LG08"
~
faa
:
(
amino_acid_alignment
d
)
()
|>
Identical
.
results
let
topological
d
=
let
faa
=
amino_acid_alignment
d
in
let
tree_conv
=
Tree_dataset
.
prepare_topological_tree
(
tree
d
)
in
let
tree
=
Tree_dataset
.
prepare_tree_with_node_id
(
tree
d
)
in
let
tree_conv
=
Tree_dataset
.
prepare_topological_tree
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
in
let
tree
=
Tree_dataset
.
prepare_tree_with_node_id
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
in
Topological
.
topological
~
faa
~
tree
~
tree_conv
~
prot_model
:
"LG08"
()
|>
Topological
.
results
let
multinomial
d
=
Multinomial
.
multinomial
~
tree_sc
:
(
tree
d
)
~
tree_sc
:
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
~
faa
:
(
amino_acid_alignment
d
)
()
let
multinomial_asymptotic_lrt
d
=
Multinomial
.
multinomial_asymptotic_lrt
~
tree_sc
:
(
tree
d
)
~
tree_sc
:
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
~
faa
:
(
amino_acid_alignment
d
)
let
multinomial_asymptotic_sparse
d
=
Multinomial
.
multinomial_asymptotic_sparse
~
tree_sc
:
(
tree
d
)
~
tree_sc
:
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
~
faa
:
(
amino_acid_alignment
d
)
let
multinomial_simulation_lrt
d
=
Multinomial
.
multinomial_simulation_lrt
~
tree_sc
:
(
tree
d
)
~
tree_sc
:
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
~
faa
:
(
amino_acid_alignment
d
)
let
multinomial_simulation_sparse
d
=
Multinomial
.
multinomial_simulation_sparse
~
tree_sc
:
(
tree
d
)
~
tree_sc
:
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
~
faa
:
(
amino_acid_alignment
d
)
let
tdg09
d
=
Tamuri
.
tdg09
~
tree
:
(
tree
d
)
~
tree
:
(
tree
_with_no_single_child
~
branch_length_unit
:
`Amino_acid
d
)
~
faa
:
(
amino_acid_alignment
d
)
()
|>
Tamuri
.
results
let
%
pworkflow
mock_tdg09
d
=
match
Biotk
.
Fasta
.
from_file
[
%
path
amino_acid_alignment
d
]
with
|
Ok
(
_
,
item
::
_
)
->
let
open
Core_kernel
in
let
n
=
String
.
length
item
.
sequence
in
"Sites
\t
Tdg09_1MinusFDR
\t
Tdg09_1MinusLRT
\t
Tdg09_prob_post"
::
List
.
init
n
~
f
:
(
fun
i
->
sprintf
"%d
\t
0.0
\t
0.0
\t
NA"
(
i
+
1
))
|>
Out_channel
.
write_lines
[
%
dest
]
|
_
->
failwith
"couldn't read an item in fasta"
let
failsafe_tdg09
d
=
Workflow
.
trywith
(
tdg09
d
)
(
mock_tdg09
d
)
let
diffseltree
d
=
Tree_dataset
.
prepare_diffsel_tree
(
tree
d
)
Tree_dataset
.
prepare_diffsel_tree
(
tree
~
branch_length_unit
:
`Amino_acid
d
)
let
diffsel
d
=
Diffsel
.
diffsel
...
...
@@ -90,21 +143,21 @@ module Make(D : Dataset) = struct
let
pcoc
?
(
gamma
=
true
)
?
(
ncat
=
10
)
d
=
let
faa
=
amino_acid_alignment
d
in
let
tree
=
tree
d
in
let
tree
=
tree
~
branch_length_unit
:
`Amino_acid
d
in
Pcoc
.
pcoc
~
catx_est
:
ncat
~
plot_complete
:
false
~
gamma
~
faa
~
tree
()
|>
Pcoc
.
results
let
pcoc_v2
?
(
gamma
=
true
)
?
(
aa_profiles
=
`C10
)
d
=
let
faa
=
amino_acid_alignment
d
in
let
tree
=
tree
d
in
let
tree
=
tree
~
branch_length_unit
:
`Amino_acid
d
in
Pcoc
.
pcoc_v2
~
aa_profiles
~
gamma
~
faa
~
tree
()
|>
Pcoc
.
results
let
dn_ds_dnds_trees
d
=
Testnh
.
dn_ds_trees_real_data
~
fna
:
(
nucleotide_alignment
d
)
~
tree
:
(
tree
d
)
()
Testnh
.
dn_ds_trees_real_data
~
fna
:
(
nucleotide_alignment
d
)
~
tree
:
(
tree
~
branch_length_unit
:
`Nucleotide
d
)
()
let
dn_tree
d
=
(
dn_ds_dnds_trees
d
)
.
dn_tsv
let
ds_tree
d
=
(
dn_ds_dnds_trees
d
)
.
ds_tsv
let
dnds_tree
d
=
(
dn_ds_dnds_trees
d
)
.
dnds_tsv
end
lib/detection_pipeline.mli
0 → 100644
View file @
cbb474fb
open
Bistro
open
File_formats
module
type
Query
=
sig
type
t
val
tree
:
branch_length_unit
:
[
`Nucleotide
|
`Amino_acid
|
`Codon
]
->
t
->
nhx
file
val
nucleotide_alignment
:
t
->
nucleotide_fasta
file
end
module
type
S
=
sig
type
query
val
amino_acid_alignment
:
query
->
aminoacid_fasta
file
val
gene_tree
:
query
->
nw
file
val
dn_tree
:
query
->
text
file
val
ds_tree
:
query
->
text
file
val
dnds_tree
:
query
->
text
file
val
identical
:
query
->
text
file
val
topological
:
query
->
text
file
val
multinomial
:
query
->
text
file
val
multinomial_simulation_lrt
:
query
->
text
file
val
multinomial_simulation_sparse
:
query
->
text
file
val
multinomial_asymptotic_lrt
:
query
->
text
file
val
multinomial_asymptotic_sparse
:
query
->
text
file
val
tdg09
:
query
->
text
file
val
failsafe_tdg09
:
query
->
text
file
val
pcoc
:
?
gamma
:
bool
->
?
ncat
:
int
->
query
->
text
file
val
pcoc_v2
:
?
gamma
:
bool
->
?
aa_profiles
:
Pcoc
.
aa_profiles
->
query
->
text
file
val
diffsel
:
query
->
text
file
val
diffseldsparse
:
?
pi
:
float
->
?
shiftprob
:
float
*
float
->
?
eps
:
float
->
query
->
text
file
end
module
Make
(
Q
:
Query
)
:
S
with
type
query
:=
Q
.
t
lib/diffsel.ml
View file @
cbb474fb
...
...
@@ -61,27 +61,25 @@ let diffsel ~(phy_n:nucleotide_phylip file) ~(tree: _ file) ~(w_every:int) ~(n_c
let
n_cycles
=
if
(
n_cycles
>
200
)
then
20
else
n_cycles
in
let
script_r
=
tmp
//
"DiffselMCMCConvergenceAnalysis.Rmd"
in
(*_build/diffsel -t data/samhd1.tree -d data/samhd1.ali -ncond 3 -x 1 10000 myrun*)
Workflow
.
shell
~
descr
:
(
"convergence_detection.run_diffsel."
^
descr
)
[
within_container
env
(
and_list
[
mkdir_p
dest
;
cd
tmp
;
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffselMCMCConvergenceAnalysis
)
;
script_r
]
;
cmd
"cp"
[
dep
phy_n
;
dest_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
tree
;
dest_tree
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
phy_n
;
tmp_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
tree
;
tmp_tree
];
(* required dep to link the file in the env *)
cmd
"/diffsel/_build/diffsel"
[
opt
"-t"
ident
tmp_tree
;
opt
"-d"
ident
tmp_ali
;
opt
"-ncond"
int
2
;
opt
"-x"
seq
[
int
w_every
;
string
" "
;
int
n_cycles
];
option
(
opt
"-seed"
int
)
seed
;
ident
chainname_tmp
;
];
cmd
"bash"
[(
file_dump
(
diffsel_add_iterations_script
~
chainname
~
ali
:
tmp_ali
~
tree
:
tmp_tree
~
seed
))];
]
)
Workflow
.
shell
~
descr
:
(
"convergence_detection.run_diffsel."
^
descr
)
~
img
:
env
[
and_list
[
mkdir_p
dest
;
cd
tmp
;
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffselMCMCConvergenceAnalysis
)
;
script_r
]
;
cmd
"cp"
[
dep
phy_n
;
dest_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
tree
;
dest_tree
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
phy_n
;
tmp_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep
tree
;
tmp_tree
];
(* required dep to link the file in the env *)
cmd
"/diffsel/_build/diffsel"
[
opt
"-t"
ident
tmp_tree
;
opt
"-d"
ident
tmp_ali
;
opt
"-ncond"
int
2
;
opt
"-x"
seq
[
int
w_every
;
string
" "
;
int
n_cycles
];
option
(
opt
"-seed"
int
)
seed
;
ident
chainname_tmp
;
];
cmd
"bash"
[(
file_dump
(
diffsel_add_iterations_script
~
chainname
~
ali
:
tmp_ali
~
tree
:
tmp_tree
~
seed
))];
]
]
let
check_conv
run_diffsel
:
[
`diffsel_check_conv
]
directory
=
...
...
@@ -90,24 +88,22 @@ let check_conv run_diffsel : [`diffsel_check_conv] directory =
let
trace
=
Workflow
.
select
run_diffsel
[
"myrun.trace"
]
in
let
out
=
dest
//
"out.html"
in
let
nb_new_iterations
=
dest
//
"new_iterations.txt"
in
Workflow
.
shell
~
descr
:
"convergence_detection.DiffselMCMCConvergenceAnalysis"
[
within_container
env
(
and_list
[
mkdir_p
tmp
;
mkdir_p
dest
;
cd
tmp
;
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffselMCMCConvergenceAnalysis
)
;
script
]
;
cmd
"Rscript"
[
string
"-e"
;
string
{
|
"rmarkdown::render(
\"
DiffselMCMCConvergenceAnalysis.Rmd
\"
,|} ;
string {|params=list(set_trace1=
\"
|} ;
dep trace ;
string {|
\"
))"
|
};
]
;
cmd
"cp"
[
string
"DiffselMCMCConvergenceAnalysis.html"
;
ident
out
]
;
cmd
"cp"
[
string
"new_iterations.txt"
;
ident
nb_new_iterations
]
]
)
Workflow
.
shell
~
descr
:
"convergence_detection.DiffselMCMCConvergenceAnalysis"
~
img
:
env
[
and_list
[
mkdir_p
tmp
;
mkdir_p
dest
;
cd
tmp
;
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffselMCMCConvergenceAnalysis
)
;
script
]
;
cmd
"Rscript"
[
string
"-e"
;
string
{
|
"rmarkdown::render(
\"
DiffselMCMCConvergenceAnalysis.Rmd
\"
,|} ;
string {|params=list(set_trace1=
\"
|} ;
dep trace ;
string {|
\"
))"
|
};
]
;
cmd
"cp"
[
string
"DiffselMCMCConvergenceAnalysis.html"
;
ident
out
]
;
cmd
"cp"
[
string
"new_iterations.txt"
;
ident
nb_new_iterations
]
]
]
let
selector
run_diffsel
:
text
file
=
...
...
@@ -120,25 +116,23 @@ let selector run_diffsel : text file =
let
dep_ali
=
(
dep
run_diffsel
)
//
"myrun.ali"
in
let
chainname
=
(
dep
run_diffsel
)
//
"myrun"
in
let
out
=
dest
in
Workflow
.
shell
~
descr
:
"convergence_detection.parse_diffsel"
[
within_container
env
(
and_list
[
mkdir_p
tmp
;
cd
tmp
;
Workflow
.
shell
~
descr
:
"convergence_detection.parse_diffsel"
~
img
:
env
[
and_list
[
mkdir_p
tmp
;
cd
tmp
;
cmd
"cp"
[
dep_ali
;
tmp_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep_tree
;
tmp_tree
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep_ali
;
tmp_ali
];
(* required dep to link the file in the env *)
cmd
"cp"
[
dep_tree
;
tmp_tree
];
(* required dep to link the file in the env *)
(*python diffsel_analyze_result.py [-r /path/to/readdiffsel] [-o output_file] chainname *)
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffsel_script_utils
)
;
package
]
;
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffsel_analyze_result
)
;
script
]
;
(*python diffsel_analyze_result.py [-r /path/to/readdiffsel] [-o output_file] chainname *)
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffsel_script_utils
)
;
package
]
;
cmd
"cp"
[
file_dump
(
string
Scripts
.
diffsel_analyze_result
)
;
script
]
;
cmd
"python"
[
string
"diffsel_analyze_result.py"
;
opt
"-r"
string
"/diffsel/_build/readdiffsel"
;
opt
"-o"
ident
out
;
ident
chainname
;
]
cmd
"python"
[
string
"diffsel_analyze_result.py"
;
opt
"-r"
string
"/diffsel/_build/readdiffsel"
;
opt
"-o"
ident
out
;
ident
chainname
;
]
)
]
]
lib/diffseldsparse.ml
View file @
cbb474fb
...
...
@@ -53,30 +53,28 @@ let diffseldsparse
let
chainname
=
dest
//
"myrun"
in