Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit bbfc9c93 authored by Louis Duchemin's avatar Louis Duchemin
Browse files

RER converge : refactor R scripts to save RDS file

parent bbefd69d
......@@ -3,22 +3,20 @@ suppressPackageStartupMessages({
library(RERconverge)
library(ggtree)
})
masterTree <- read.tree(file = "<<<dep master_tree>>>")
results <- read_tsv("<<< dep results_table >>>")
max_read <- nrow(results)
gene_trees <-
readTrees("<<<dep gene_tree_set>>>",
max.read = max_read,
masterTree = masterTree)
convergent_species <-
read_tsv("<<<dep phenotypes>>>", col_names = "species") %>% pull(species)
rds = readRDS("<<< dep rds >>>")
master_tree <- rds$master_tree
results <- rds$results
max_read <- nrow(results)
gene_trees <-rds$gene_trees
convergent_species <- rds$phenotypes
ranking <- order(results$p.adj)
gene_ids <- names(gene_trees$trees)
pdf("<<<dest>>>", pointsize = 9, height = 12)
plotTreeHighlightBranches(masterTree,
plotTreeHighlightBranches(master_tree,
hlspecies = convergent_species,
hlcols = "red",
useGG = T,
......
......@@ -3,8 +3,8 @@ suppressPackageStartupMessages({
library(RERconverge)
})
masterTree <- read.tree(file="<<<dep master_tree>>>")
gene_trees <- readTrees("<<<dep gene_tree_set>>>", max.read=<<<max_read>>>, masterTree=masterTree, minSpecs=<<<min_specs>>>)
master_tree <- read.tree(file="<<<dep master_tree>>>")
gene_trees <- readTrees("<<<dep gene_tree_set>>>", max.read=<<<max_read>>>, master_tree=master_tree, minSpecs=<<<min_specs>>>)
relative_rates <- getAllResiduals(
gene_trees,
......@@ -24,18 +24,19 @@ load_traits_file <- function(path, is_continuous) {
load_species_file(path)
}
load_traits_paths <-
function(traits_file_path,
phenotypes <- load_traits_file("<<<dep phenotypes>>>", is_continuous)
build_traits_paths <-
function(phenotypes,
gene_trees,
is_continuous,
useSpecies = NULL,
clade = c("ancestral", "terminal", "all"))
{
traits <- load_traits_file(traits_file_path, is_continuous)
if (is_continuous) {
char2Paths(traits, gene_trees)
char2Paths(phenotypes, gene_trees)
} else {
foreground2Tree(traits,
foreground2Tree(phenotypes,
gene_trees,
clade = clade,
useSpecies = useSpecies,
......@@ -45,8 +46,8 @@ load_traits_paths <-
}
}
traits_paths <- load_traits_paths(
"<<<dep phenotypes>>>",
traits_paths <- build_traits_paths(
phenotypes,
gene_trees,
<<<template_bool continuous>>>
)
......@@ -67,4 +68,11 @@ res <- res %>%
mutate(gene_id=names(gene_trees$trees)) %>%
select(gene_id, everything())
write_tsv(res, "<<<dest>>>")
\ No newline at end of file
saveRDS(list(
relative_rates = relative_rates,
results = res,
traits_paths = traits_paths,
master_tree = master_tree,
gene_trees = gene_trees,
phenotypes = phenotypes
), file="<<<dest>>>")
......@@ -699,10 +699,10 @@ let rer_converge ?transform ?weighted ?scale ?continuous ?(summary_n_genes = 10)
in
let gene_tree_set = RER.gene_tree_file gene_trees in
let phenotypes = RER.phenotypes_of_convergent_species phenotypes in
let results_table = RER.rer_converge ?transform ?weighted ?scale ?continuous
let rds = RER.rer_converge ?transform ?weighted ?scale ?continuous
~min_specs:10 ~master_tree:species_tree ~gene_tree_set ~phenotypes () in
let candidates_summary = RER.best_candidate_summary ~n_genes:summary_n_genes
~gene_tree_set ~master_tree:species_tree ~results_table ~phenotypes
let results_table = RER.result_table_of_rds rds in
let candidates_summary = RER.best_candidate_summary ~n_genes:summary_n_genes rds
in
results_table, candidates_summary
......
......@@ -4,11 +4,18 @@ open Bistro.Shell_dsl
open Phylogenetics
open Tree
class type rds = object
inherit binary_file
method format : [`format]
end
type tree = (Newick.node_info, string, float) Tree.t
type annotated_tree =
(Newick.node_info, string, float * String.Set.t) Tree.t
let img =
[ docker_image ~account:"lsdch" ~name:"rerconverge" ~tag:"0.1.1" () ]
......@@ -60,6 +67,14 @@ let rer_converge ?max_read ?min_specs ?(transform = `sqrt)
(script ?max_read ~transform ~weighted ~scale ~continuous ?min_specs
~gene_tree_set ~phenotypes ~master_tree)
let result_table_of_rds rds =
let script = [%script {|
library(tidyverse)
rds = readRDS("<<< dep rds >>>")
write_tsv(rds$results, "<<<dest>>>")
|}] in
Bistro_utils.R_script.workflow ~img ~descr:"extract_result_table" script
let annotate_branches_with_tip_leaves (tree : tree) : annotated_tree =
let rec traverse_tree tree =
match tree with
......@@ -163,7 +178,7 @@ let match_species_tree_position ~gene_tree ~clipped_species_tree =
Workflow.path_plugin ~descr:"match_species_tree_position" f
let best_candidate_summary ?(n_genes = 10) ~gene_tree_set ~master_tree ~results_table ~phenotypes =
let best_candidate_summary ?(n_genes = 10) rds=
let n_genes = int n_genes in
let script = [%include_script "lib/R/best_candidate_summary.R"] in
Bistro_utils.R_script.workflow ~img ~descr:"RER_converge.best_candidate_summary" script
......@@ -172,4 +187,48 @@ let match_species_tree_position ~gene_tree ~clipped_species_tree =
let n_min_genes = int n_min_genes in
let script = [%include_script "lib/R/enrichment_analysis.R"] in
Bistro_utils.R_script.workflow ~img ~descr:"RER_converge.enrichment_analysis" script
\ No newline at end of file
module Candidate = struct
type t = {
gene: string;
rho: float;
sp_count: int;
p: float;
padj: float;
}
let parse_header = function
|hd::tl -> Ok (hd, tl)
| _ -> Error "Empty RER converge result file"
let parse (line:string list) =
match line with
| [gene; rho ; sp_count ; p ; padj] ->
Ok {
gene;
rho = float_of_string rho;
sp_count = int_of_string sp_count;
p = float_of_string p;
padj = float_of_string padj
}
| _ -> Error "Invalid line form in RER converge result file"
let list_of_file filename =
let open Result.Monad_infix in
In_channel.read_lines filename
|> List.map ~f:(String.split ~on:'\t')
|> parse_header
>>= fun (_, split_lines) -> Result.all (List.map ~f:parse split_lines)
let take_n_best ~candidates ~n =
List.sort candidates ~compare:(fun a b -> Float.compare a.padj b.padj)
|> Fn.flip List.take n
let load_best_candidates ~result_table ~n =
let f = fun%workflow () ->
match list_of_file [%path result_table] with
| Ok candidates -> take_n_best ~candidates ~n
| Error msg -> failwith msg
in Workflow.plugin ~descr:"RER.load_best_candidates" f
end
\ No newline at end of file
open Bistro
open File_formats
class type rds = object
inherit binary_file
method format : [`format]
end
val phenotypes_of_convergent_species :
string list workflow -> tsv path workflow
......@@ -17,21 +22,36 @@ val rer_converge :
gene_tree_set:tsv file ->
phenotypes:tsv file ->
unit ->
tsv file
rds file
val result_table_of_rds : rds file -> tsv file
val match_species_tree_position :
gene_tree:newick file -> clipped_species_tree:newick file -> newick file
val best_candidate_summary :
?n_genes:int ->
gene_tree_set:tsv file ->
master_tree: newick file ->
results_table:tsv file ->
phenotypes: tsv file ->
rds file ->
pdf file
val enrichment_analysis :
?n_min_genes:int ->
results_table:tsv file ->
annotations: tsv file ->
tsv file
\ No newline at end of file
tsv file
module Candidate: sig
type t = {
gene: string;
rho: float;
sp_count: int;
p: float;
padj: float;
}
val load_best_candidates :
result_table: tsv file ->
n: int ->
t list workflow
end
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment