Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
VEBER Philippe
codepi
Commits
bbfc9c93
Commit
bbfc9c93
authored
Feb 12, 2021
by
Louis Duchemin
Browse files
RER converge : refactor R scripts to save RDS file
parent
bbefd69d
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
116 additions
and
31 deletions
+116
-31
lib/R/best_candidate_summary.R
lib/R/best_candidate_summary.R
+8
-10
lib/R/rer_converge.R
lib/R/rer_converge.R
+18
-10
lib/orthomam.ml
lib/orthomam.ml
+3
-3
lib/rer_converge.ml
lib/rer_converge.ml
+61
-2
lib/rer_converge.mli
lib/rer_converge.mli
+26
-6
No files found.
lib/R/best_candidate_summary.R
View file @
bbfc9c93
...
...
@@ -3,22 +3,20 @@ suppressPackageStartupMessages({
library
(
RERconverge
)
library
(
ggtree
)
})
masterTree
<-
read.tree
(
file
=
"<<<dep master_tree>>>"
)
results
<-
read_tsv
(
"<<< dep results_table >>>"
)
max_read
<-
nrow
(
results
)
gene_trees
<-
readTrees
(
"<<<dep gene_tree_set>>>"
,
max.read
=
max_read
,
masterTree
=
masterTree
)
convergent_species
<-
read_tsv
(
"<<<dep phenotypes>>>"
,
col_names
=
"species"
)
%>%
pull
(
species
)
rds
=
readRDS
(
"<<< dep rds >>>"
)
master_tree
<-
rds
$
master_tree
results
<-
rds
$
results
max_read
<-
nrow
(
results
)
gene_trees
<-
rds
$
gene_trees
convergent_species
<-
rds
$
phenotypes
ranking
<-
order
(
results
$
p.adj
)
gene_ids
<-
names
(
gene_trees
$
trees
)
pdf
(
"<<<dest>>>"
,
pointsize
=
9
,
height
=
12
)
plotTreeHighlightBranches
(
master
T
ree
,
plotTreeHighlightBranches
(
master
_t
ree
,
hlspecies
=
convergent_species
,
hlcols
=
"red"
,
useGG
=
T
,
...
...
lib/R/rer_converge.R
View file @
bbfc9c93
...
...
@@ -3,8 +3,8 @@ suppressPackageStartupMessages({
library
(
RERconverge
)
})
master
T
ree
<-
read.tree
(
file
=
"<<<dep master_tree>>>"
)
gene_trees
<-
readTrees
(
"<<<dep gene_tree_set>>>"
,
max.read
=<<<
max_read
>>>
,
master
T
ree
=
master
T
ree
,
minSpecs
=<<<
min_specs
>>>
)
master
_t
ree
<-
read.tree
(
file
=
"<<<dep master_tree>>>"
)
gene_trees
<-
readTrees
(
"<<<dep gene_tree_set>>>"
,
max.read
=<<<
max_read
>>>
,
master
_t
ree
=
master
_t
ree
,
minSpecs
=<<<
min_specs
>>>
)
relative_rates
<-
getAllResiduals
(
gene_trees
,
...
...
@@ -24,18 +24,19 @@ load_traits_file <- function(path, is_continuous) {
load_species_file
(
path
)
}
load_traits_paths
<-
function
(
traits_file_path
,
phenotypes
<-
load_traits_file
(
"<<<dep phenotypes>>>"
,
is_continuous
)
build_traits_paths
<-
function
(
phenotypes
,
gene_trees
,
is_continuous
,
useSpecies
=
NULL
,
clade
=
c
(
"ancestral"
,
"terminal"
,
"all"
))
{
traits
<-
load_traits_file
(
traits_file_path
,
is_continuous
)
if
(
is_continuous
)
{
char2Paths
(
trait
s
,
gene_trees
)
char2Paths
(
phenotype
s
,
gene_trees
)
}
else
{
foreground2Tree
(
trait
s
,
foreground2Tree
(
phenotype
s
,
gene_trees
,
clade
=
clade
,
useSpecies
=
useSpecies
,
...
...
@@ -45,8 +46,8 @@ load_traits_paths <-
}
}
traits_paths
<-
loa
d_traits_paths
(
"<<<dep
phenotypes
>>>"
,
traits_paths
<-
buil
d_traits_paths
(
phenotypes
,
gene_trees
,
<<<
template_bool
continuous
>>>
)
...
...
@@ -67,4 +68,11 @@ res <- res %>%
mutate
(
gene_id
=
names
(
gene_trees
$
trees
))
%>%
select
(
gene_id
,
everything
())
write_tsv
(
res
,
"<<<dest>>>"
)
\ No newline at end of file
saveRDS
(
list
(
relative_rates
=
relative_rates
,
results
=
res
,
traits_paths
=
traits_paths
,
master_tree
=
master_tree
,
gene_trees
=
gene_trees
,
phenotypes
=
phenotypes
),
file
=
"<<<dest>>>"
)
lib/orthomam.ml
View file @
bbfc9c93
...
...
@@ -699,10 +699,10 @@ let rer_converge ?transform ?weighted ?scale ?continuous ?(summary_n_genes = 10)
in
let
gene_tree_set
=
RER
.
gene_tree_file
gene_trees
in
let
phenotypes
=
RER
.
phenotypes_of_convergent_species
phenotypes
in
let
r
esults_table
=
RER
.
rer_converge
?
transform
?
weighted
?
scale
?
continuous
let
r
ds
=
RER
.
rer_converge
?
transform
?
weighted
?
scale
?
continuous
~
min_specs
:
10
~
master_tree
:
species_tree
~
gene_tree_set
~
phenotypes
()
in
let
candidates_summary
=
RER
.
b
es
t_candidate_summary
~
n_genes
:
summary_n_genes
~
gene_tree_set
~
master_tree
:
species_tree
~
results_table
~
phenotype
s
let
results_table
=
RER
.
r
es
ult_table_of_rds
rds
in
let
candidates_summary
=
RER
.
best_candidate_summary
~
n_genes
:
summary_n_genes
rd
s
in
results_table
,
candidates_summary
...
...
lib/rer_converge.ml
View file @
bbfc9c93
...
...
@@ -4,11 +4,18 @@ open Bistro.Shell_dsl
open
Phylogenetics
open
Tree
class
type
rds
=
object
inherit
binary_file
method
format
:
[
`format
]
end
type
tree
=
(
Newick
.
node_info
,
string
,
float
)
Tree
.
t
type
annotated_tree
=
(
Newick
.
node_info
,
string
,
float
*
String
.
Set
.
t
)
Tree
.
t
let
img
=
[
docker_image
~
account
:
"lsdch"
~
name
:
"rerconverge"
~
tag
:
"0.1.1"
()
]
...
...
@@ -60,6 +67,14 @@ let rer_converge ?max_read ?min_specs ?(transform = `sqrt)
(
script
?
max_read
~
transform
~
weighted
~
scale
~
continuous
?
min_specs
~
gene_tree_set
~
phenotypes
~
master_tree
)
let
result_table_of_rds
rds
=
let
script
=
[
%
script
{
|
library
(
tidyverse
)
rds
=
readRDS
(
"<<< dep rds >>>"
)
write_tsv
(
rds
$
results
,
"<<<dest>>>"
)
|
}]
in
Bistro_utils
.
R_script
.
workflow
~
img
~
descr
:
"extract_result_table"
script
let
annotate_branches_with_tip_leaves
(
tree
:
tree
)
:
annotated_tree
=
let
rec
traverse_tree
tree
=
match
tree
with
...
...
@@ -163,7 +178,7 @@ let match_species_tree_position ~gene_tree ~clipped_species_tree =
Workflow
.
path_plugin
~
descr
:
"match_species_tree_position"
f
let
best_candidate_summary
?
(
n_genes
=
10
)
~
gene_tree_set
~
master_tree
~
results_table
~
phenotypes
=
let
best_candidate_summary
?
(
n_genes
=
10
)
rds
=
let
n_genes
=
int
n_genes
in
let
script
=
[
%
include_script
"lib/R/best_candidate_summary.R"
]
in
Bistro_utils
.
R_script
.
workflow
~
img
~
descr
:
"RER_converge.best_candidate_summary"
script
...
...
@@ -172,4 +187,48 @@ let match_species_tree_position ~gene_tree ~clipped_species_tree =
let
n_min_genes
=
int
n_min_genes
in
let
script
=
[
%
include_script
"lib/R/enrichment_analysis.R"
]
in
Bistro_utils
.
R_script
.
workflow
~
img
~
descr
:
"RER_converge.enrichment_analysis"
script
\ No newline at end of file
module
Candidate
=
struct
type
t
=
{
gene
:
string
;
rho
:
float
;
sp_count
:
int
;
p
:
float
;
padj
:
float
;
}
let
parse_header
=
function
|
hd
::
tl
->
Ok
(
hd
,
tl
)
|
_
->
Error
"Empty RER converge result file"
let
parse
(
line
:
string
list
)
=
match
line
with
|
[
gene
;
rho
;
sp_count
;
p
;
padj
]
->
Ok
{
gene
;
rho
=
float_of_string
rho
;
sp_count
=
int_of_string
sp_count
;
p
=
float_of_string
p
;
padj
=
float_of_string
padj
}
|
_
->
Error
"Invalid line form in RER converge result file"
let
list_of_file
filename
=
let
open
Result
.
Monad_infix
in
In_channel
.
read_lines
filename
|>
List
.
map
~
f
:
(
String
.
split
~
on
:
'\t'
)
|>
parse_header
>>=
fun
(
_
,
split_lines
)
->
Result
.
all
(
List
.
map
~
f
:
parse
split_lines
)
let
take_n_best
~
candidates
~
n
=
List
.
sort
candidates
~
compare
:
(
fun
a
b
->
Float
.
compare
a
.
padj
b
.
padj
)
|>
Fn
.
flip
List
.
take
n
let
load_best_candidates
~
result_table
~
n
=
let
f
=
fun
%
workflow
()
->
match
list_of_file
[
%
path
result_table
]
with
|
Ok
candidates
->
take_n_best
~
candidates
~
n
|
Error
msg
->
failwith
msg
in
Workflow
.
plugin
~
descr
:
"RER.load_best_candidates"
f
end
\ No newline at end of file
lib/rer_converge.mli
View file @
bbfc9c93
open
Bistro
open
File_formats
class
type
rds
=
object
inherit
binary_file
method
format
:
[
`format
]
end
val
phenotypes_of_convergent_species
:
string
list
workflow
->
tsv
path
workflow
...
...
@@ -17,21 +22,36 @@ val rer_converge :
gene_tree_set
:
tsv
file
->
phenotypes
:
tsv
file
->
unit
->
tsv
file
rds
file
val
result_table_of_rds
:
rds
file
->
tsv
file
val
match_species_tree_position
:
gene_tree
:
newick
file
->
clipped_species_tree
:
newick
file
->
newick
file
val
best_candidate_summary
:
?
n_genes
:
int
->
gene_tree_set
:
tsv
file
->
master_tree
:
newick
file
->
results_table
:
tsv
file
->
phenotypes
:
tsv
file
->
rds
file
->
pdf
file
val
enrichment_analysis
:
?
n_min_genes
:
int
->
results_table
:
tsv
file
->
annotations
:
tsv
file
->
tsv
file
\ No newline at end of file
tsv
file
module
Candidate
:
sig
type
t
=
{
gene
:
string
;
rho
:
float
;
sp_count
:
int
;
p
:
float
;
padj
:
float
;
}
val
load_best_candidates
:
result_table
:
tsv
file
->
n
:
int
->
t
list
workflow
end
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment