Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit 346dfbcf authored by Louis Duchemin's avatar Louis Duchemin
Browse files

RERconverge pipeline : working results summary

parent 659364c9
......@@ -10,8 +10,13 @@ let () =
try
let loggers = [ Bistro_utils.Console_logger.create () ] in
let allowed_containers = [ `Docker ] in
Bistro.Workflow.path target
|> Bistro_engine.Scheduler.simple_eval_exn ~allowed_containers ~np:4
~mem:(`GB 4) ~loggers
|> print_endline
let execute workflow =
Bistro.Workflow.path workflow
|> Bistro_engine.Scheduler.simple_eval_exn ~allowed_containers ~np:10
~mem:(`GB 8) ~loggers
|> print_endline
in
execute (fst target);
execute (snd target);
with Failure msg -> print_endline msg
suppressPackageStartupMessages({
library(tidyverse)
library(RERconverge)
library(ggtree)
})
masterTree <- read.tree(file = "<<<dep master_tree>>>")
results <- read_tsv("<<< dep results_table >>>")
max_read <- nrow(results)
gene_trees <-
readTrees("<<<dep gene_tree_set>>>",
max.read = max_read,
masterTree = masterTree)
convergent_species <-
read_tsv("<<<dep phenotypes>>>", col_names = "species") %>% pull(species)
ranking <- order(results$p.adj)
gene_ids <- names(gene_trees$trees)
pdf("<<<dest>>>", pointsize = 9, height = 12)
plotTreeHighlightBranches(masterTree,
hlspecies = convergent_species,
hlcols = "red",
useGG = T,
main="Species Tree")
for (i in 1:<<< n_genes >>>) {
plotTreeHighlightBranches(
gene_trees$trees[[ranking[i]]],
hlspecies = convergent_species,
hlcols = "red",
useGG = T,
main=sprintf("%s %.1f", gene_ids[i], -log10(results$p.adj[ranking[i]]))
) %>% print()
}
dev.off()
suppressPackageStartupMessages({
library(tidyverse)
library(RERconverge)
})
masterTree <- read.tree(file="<<<dep master_tree>>>")
gene_trees <- readTrees("<<<dep gene_tree_set>>>", max.read=<<<max_read>>>, masterTree=masterTree, minSpecs=<<<min_specs>>>)
relative_rates <- getAllResiduals(
gene_trees,
transform = "<<<template_transform transform>>>",
weighted = <<<template_bool weighted>>>,
scale = <<<template_bool scale>>>
)
load_species_file <- function(path) {
read_table(path, col_names = "species") %>% pull(species)
}
load_traits_file <- function(path, is_continuous) {
if (is_continuous)
read_table(path, col_names = c('species', 'value')) %>% deframe()
else
load_species_file(path)
}
load_traits_paths <-
function(traits_file_path,
gene_trees,
is_continuous,
useSpecies = NULL,
clade = c("ancestral", "terminal", "all"))
{
traits <- load_traits_file(traits_file_path, is_continuous)
if (is_continuous) {
char2Paths(traits, gene_trees)
} else {
foreground2Tree(traits,
gene_trees,
clade = clade,
useSpecies = useSpecies,
plot=F # FIXME : plot=T raises cryptic error
) %>%
tree2Paths(gene_trees)
}
}
traits_paths <- load_traits_paths(
"<<<dep phenotypes>>>",
gene_trees,
<<<template_bool continuous>>>
)
res <- if (<<<template_bool continuous>>>) {
correlateWithContinuousPhenotype(
relative_rates,
traits_paths,
)
} else {
correlateWithBinaryPhenotype(
relative_rates,
traits_paths,
)
}
write_tsv(res, "<<<dest>>>")
\ No newline at end of file
......@@ -4,7 +4,8 @@
(libraries core biotk bistro-bio bistro.utils codepitk containers gsl
ocaml-r.graphics ocaml-r.grDevices phylogenetics prc)
(preprocess
(pps ppx_jane ppx_csv_conv bistro.ppx ppx_here)))
(pps ppx_jane ppx_csv_conv bistro.ppx ppx_here))
(preprocessor_deps (glob_files R/*.R)))
(rule
(targets scripts.ml)
......
......@@ -615,7 +615,7 @@ let draw_site q pos =
let rer_converge ?max_read ?transform ?weighted ?scale ?continuous ~db
let rer_converge ?transform ?weighted ?scale ?continuous ?(summary_n_genes = 10) ~db
~phenotypes () =
let open Bistro_bio in
let module RER = Rer_converge in
......@@ -623,8 +623,10 @@ let rer_converge ?max_read ?transform ?weighted ?scale ?continuous ~db
let queries =
alignments_of_db db
|> List.map ~f:(query ~convergent_species:phenotypes)
|> Fn.flip List.take 100
(* |> Fn.flip List.take 100 *)
in
let nqueries = List.length queries in
if summary_n_genes > nqueries then invalid_arg "summary_n_genes greater than number of queries used for analysis" ;
let gene_ids = List.map ~f:family_id queries in
let ali_files = List.map ~f:Q.alignment queries in
(* let model = Iqtree.model_spec ~freq_type:`F `ECMK07 in *)
......@@ -647,6 +649,12 @@ let rer_converge ?max_read ?transform ?weighted ?scale ?continuous ~db
|> List.zip_exn gene_ids
in
let gene_tree_set = RER.gene_tree_file gene_trees in
let phenotype = RER.phenotypes_of_convergent_species phenotypes in
RER.rer_converge ?max_read ?transform ?weighted ?scale ?continuous
~min_specs:10 ~master_tree:species_tree ~gene_tree_set ~phenotype
let phenotypes = RER.phenotypes_of_convergent_species phenotypes in
let results_table = RER.rer_converge ?transform ?weighted ?scale ?continuous
~min_specs:10 ~master_tree:species_tree ~gene_tree_set ~phenotypes () in
let candidates_summary = RER.best_candidate_summary ~n_genes:summary_n_genes
~gene_tree_set ~master_tree:species_tree ~results_table ~phenotypes
in
results_table, candidates_summary
......@@ -87,12 +87,12 @@ val site_ranking :
val draw_site : query -> int -> pdf file
val rer_converge :
?max_read:int ->
?transform:[ `none | `sqrt | `log ] ->
?weighted:bool ->
?scale:bool ->
?continuous:bool ->
?summary_n_genes:int ->
db:Orthomam_db.t ->
phenotypes:string list workflow ->
unit ->
tsv file
tsv file * pdf file
......@@ -10,7 +10,7 @@ type annotated_tree =
(Newick.node_info, string, float * String.Set.t) Tree.t
let img =
[ docker_image ~account:"lsdch" ~name:"rerconverge" ~tag:"0.1.0" () ]
[ docker_image ~account:"lsdch" ~name:"rerconverge" ~tag:"0.1.1" () ]
let phenotypes_of_convergent_species species_list =
let f =
......@@ -48,86 +48,17 @@ let template_bool t = string (if t then "T" else "F")
let ropt f = Option.value_map ~f ~default:(string "NA")
let script ?max_read ?min_specs ~transform ~weighted ~scale ~continuous
~gene_tree_set ~phenotype ~master_tree =
~gene_tree_set ~phenotypes ~master_tree =
let max_read = ropt int max_read in
let min_specs = ropt int min_specs in
[%script
{|
suppressPackageStartupMessages({
library(tidyverse)
library(RERconverge)
})
masterTree <- read.tree(file="{{dep master_tree}}")
gene_trees <- readTrees("{{dep gene_tree_set}}", max.read={{max_read}}, masterTree=masterTree, minSpecs={{min_specs}})
relative_rates <- getAllResiduals(
gene_trees,
transform = "{{template_transform transform}}",
weighted = {{template_bool weighted}},
scale = {{template_bool scale}}
)
load_species_file <- function(path) {
read_table(path, col_names = "species") %>% pull(species)
}
load_traits_file <- function(path, is_continuous) {
if (is_continuous)
read_table(path, col_names = c('species', 'value')) %>% deframe()
else
load_species_file(path)
}
load_traits_paths <-
function(traits_file_path,
gene_trees,
is_continuous,
useSpecies = NULL,
clade = c("ancestral", "terminal", "all"))
{
traits <- load_traits_file(traits_file_path, is_continuous)
if (is_continuous) {
char2Paths(traits, gene_trees)
} else {
foreground2Tree(traits,
gene_trees,
clade = clade,
useSpecies = useSpecies,
plot=F # FIXME : plot=T raises cryptic error
) %>%
tree2Paths(gene_trees)
}
}
traits_paths <- load_traits_paths(
"{{dep phenotype}}",
gene_trees,
{{template_bool continuous}}
)
res <- if ({{template_bool continuous}}) {
correlateWithContinuousPhenotype(
relative_rates,
traits_paths,
)
} else {
correlateWithBinaryPhenotype(
relative_rates,
traits_paths,
)
}
write_tsv(res, "{{dest}}")
|}]
[%include_script "lib/R/rer_converge.R"]
let rer_converge ?max_read ?min_specs ?(transform = `sqrt)
?(weighted = true) ?(scale = true) ?(continuous = false)
~master_tree ~gene_tree_set ~phenotype =
~master_tree ~gene_tree_set ~phenotypes ()=
Bistro_utils.R_script.workflow ~img ~descr:"rer_converge"
(script ?max_read ~transform ~weighted ~scale ~continuous ?min_specs
~gene_tree_set ~phenotype ~master_tree)
~gene_tree_set ~phenotypes ~master_tree)
let annotate_branches_with_tip_leaves (tree : tree) : annotated_tree =
let rec traverse_tree tree =
......@@ -232,30 +163,7 @@ let match_species_tree_position ~gene_tree ~clipped_species_tree =
Workflow.path_plugin ~descr:"match_species_tree_position" f
let best_candidate_summary ?(n_genes = 10) ~gene_tree_set ~master_tree ~results_table ~phenotype =
let best_candidate_summary ?(n_genes = 10) ~gene_tree_set ~master_tree ~results_table ~phenotypes =
let n_genes = int (n_genes) in
let script ~n_genes ~gene_tree_set ~master_tree ~results_table ~phenotype =
[%script
{|
suppressPackageStartupMessages({
library(tidyverse)
library(RERconverge)
})
masterTree <- read.tree(file="{{dep master_tree}}")
results <- read_tsv("{{ dep results_table }}")
max_read <- nrow(results)
gene_trees <- readTrees("{{dep gene_tree_set}}", max.read=max_read, masterTree=masterTree)
convergent_species <- read_tsv("{{dep phenotype}}", col_names="species") %>% pull(species)
ranking <- order(results$padj)
plotTreeHighlightBranches(masterTree, hlspecies=convergent_species, hlcols="red", useGG=T)
pdf("test.pdf", pointsize = 9, height=12, )
for ( i in 1:{{n_genes}} ) {
plotTreeHighlightBranches(gene_trees$trees[[i]], hlspecies=convergent_species, hlcols="red", useGG=T)
}
|}
] in
script ~n_genes ~gene_tree_set ~master_tree ~results_table ~phenotype
\ No newline at end of file
let script = [%include_script "lib/R/best_candidate_summary.R"] in
Bistro_utils.R_script.workflow ~img ~descr:"RER_converge.best_candidate_summary" script
\ No newline at end of file
......@@ -15,7 +15,8 @@ val rer_converge :
?continuous:bool ->
master_tree:newick file ->
gene_tree_set:tsv file ->
phenotype:tsv file ->
phenotypes:tsv file ->
unit ->
tsv file
val match_species_tree_position :
......@@ -26,5 +27,5 @@ val best_candidate_summary :
gene_tree_set:tsv file ->
master_tree: newick file ->
results_table:tsv file ->
phenotype: text file ->
phenotypes: tsv file ->
pdf file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment