Commit d15d6813 authored by Louis Duchemin's avatar Louis Duchemin
Browse files

RERconverge : compute results for each foreground clade specification

parent b722c7d9
......@@ -24,13 +24,25 @@ let () =
|> print_endline
in
List.iter2 (fun label phenotypes ->
let target = pipeline ~phenotypes in
pipeline ~phenotypes
|> List.iter (fun (clade, target) ->
print_endline label;
execute (target.rer_plot ~pat:"*LIM2*")
print_endline ("Foreground clade : " ^ (Codepi.Rer_converge.string_of_clade clade));
print_string "Result table : ";
execute Codepi.Orthomam.(target.result_table);
print_string "RER plots : ";
execute (target.rer_plot ~pat:"*LIM2*");
print_string "Candidates summary : ";
execute target.best_candidate_summary;
)
)
["Subterraneans"]
["Subterraneans"; "Marines"]
(* ; "Marines "; "Echolocators"] *)
[subterraneans]
[subterraneans; marines]
(* ; marines; echolocators] *)
with Failure msg -> print_endline msg
......@@ -5,9 +5,10 @@ suppressPackageStartupMessages({
})
rds = readRDS("<<< dep rds >>>")
clade = "<<<template_of_clade clade>>>"
master_tree <- rds$master_tree
results <- rds$results
results <- rds[[clade]]$table
max_read <- nrow(results)
gene_trees <-rds$gene_trees
convergent_species <- rds$phenotypes
......
......@@ -5,11 +5,12 @@ suppressPackageStartupMessages({
rds = readRDS("<<<dep rds>>>")
clade = "<<<template_of_clade clade>>>"
gene_ids = <<<gene_ids>>>
rers = rds$relative_rates
pdf("<<<dest>>>")
pdf("<<<dest>>>", pointsize = 9, height = 12, width=10)
for (gene_id in gene_ids) {
plotRers(rers, gene_id, phenv=rds$traits_paths)
plotRers(rers, gene_id, phenv=rds[[clade]]$traits_paths)
}
dev.off()
\ No newline at end of file
......@@ -40,40 +40,53 @@ build_traits_paths <-
foreground2Tree(phenotypes,
gene_trees,
clade = clade,
useSpecies = useSpecies,
# useSpecies = useSpecies,
plot=F # FIXME : plot=T raises cryptic error
) %>%
tree2Paths(gene_trees)
}
}
traits_paths <- build_traits_paths(
clades = c("ancestral", "terminal", "all")
results = lapply(clades, FUN=function(clade){
traits_paths <- build_traits_paths(
phenotypes,
gene_trees,
is_continuous
)
)
res <- if (is_continuous) {
res <- if (is_continuous) {
correlateWithContinuousPhenotype(
relative_rates,
traits_paths,
)
} else {
} else {
correlateWithBinaryPhenotype(
relative_rates,
traits_paths,
)
}
}
res <- res %>%
table <- res %>%
mutate(gene_id=names(gene_trees$trees)) %>%
select(gene_id, everything())
saveRDS(list(
list(
table=table,
traits_paths=traits_paths
)
})
names(results) = clades
results = results %>%
list_modify(
relative_rates = relative_rates,
results = res,
traits_paths = traits_paths,
master_tree = master_tree,
gene_trees = gene_trees,
phenotypes = phenotypes
), file="<<<dest>>>")
)
saveRDS(results, file="<<<dest>>>")
......@@ -707,17 +707,18 @@ let rer_converge ?transform ?weighted ?scale ?continuous ?(summary_n_genes = 10)
let phenotypes = RER.phenotypes_of_convergent_species phenotypes in
let rds = RER.rer_converge ?transform ?weighted ?scale ?continuous
~min_specs:10 ~master_tree:species_tree ~gene_tree_set ~phenotypes () in
let result_table = RER.result_table_of_rds rds in
let best_candidate_summary = RER.best_candidate_summary ~n_genes:summary_n_genes rds in
List.map RER.[All; Ancestral; Terminal] ~f:(fun clade ->
let result_table = RER.result_table_of_rds rds clade in
let best_candidate_summary = RER.best_candidate_summary
~n_genes:summary_n_genes rds clade
in
let rer_plot ~pat =
search_alignments ~pat db
|> List.map ~f:(fun (Alignment (_, ali)) -> family_id_of_alignment ali)
|> RER.plot_relative_rates rds
|> RER.plot_relative_rates rds clade
in
{
result_table;
best_candidate_summary;
rer_plot;
}
clade, { result_table; best_candidate_summary; rer_plot; }
)
......@@ -106,7 +106,7 @@ val rer_converge :
db:Orthomam_db.t ->
phenotypes:string list workflow ->
unit ->
rer_converge_output
(Rer_converge.foreground_clade * rer_converge_output) list
module Binary_phenotype: sig
type t = {
......
......@@ -11,10 +11,23 @@ end
type tree = (Newick.node_info, string, float) Tree.t
(** like a [tree] but includes on each branch the set of leaves in its tip subtree *)
type annotated_tree =
(Newick.node_info, string, float * String.Set.t) Tree.t
(* which of the branches within a foreground clade to keep as foreground.
Options are "ancestral" to keep only the inferred transition branch,
"terminal" to keep only terminal branches,
and "all" to keep all branches. *)
type foreground_clade = All | Ancestral | Terminal
let string_of_clade = function
| All -> "all"
| Ancestral -> "ancestral"
| Terminal -> "terminal"
let template_of_clade clade = string_of_clade clade |> string
let img =
[ docker_image ~account:"lsdch" ~name:"rerconverge" ~tag:"0.1.1" () ]
......@@ -67,14 +80,18 @@ let rer_converge ?max_read ?min_specs ?(transform = `sqrt)
(script ?max_read ~transform ~weighted ~scale ~continuous ?min_specs
~gene_tree_set ~phenotypes ~master_tree)
let result_table_of_rds rds =
let result_table_of_rds rds clade =
let script = [%script {|
library(tidyverse)
clade = "<<<template_of_clade clade>>>"
rds = readRDS("<<< dep rds >>>")
write_tsv(rds$results, "<<<dest>>>")
print(names(rds))
print(names(rds[["all"]]))
write_tsv(rds[[clade]]$table, "<<<dest>>>")
|}] in
Bistro_utils.R_script.workflow ~img ~descr:"extract_result_table" script
(* creates an annotated tree from a tree *)
let annotate_branches_with_tip_leaves (tree : tree) : annotated_tree =
let rec traverse_tree tree =
match tree with
......@@ -177,14 +194,14 @@ let match_species_tree_position ~gene_tree ~clipped_species_tree =
in
Workflow.path_plugin ~descr:"match_species_tree_position" f
let plot_relative_rates rds gene_ids =
let plot_relative_rates rds clade gene_ids =
let module R = Bistro_utils.R_script in
let gene_ids = R.(make [strings gene_ids]) in
let script = [%include_script "lib/R/plot_rers.R"] in
R.workflow ~img ~descr:"RER_converge.plot_relative_rates" script
let best_candidate_summary ?(n_genes = 10) rds=
let best_candidate_summary ?(n_genes = 10) rds clade =
let n_genes = int n_genes in
let script = [%include_script "lib/R/best_candidate_summary.R"] in
Bistro_utils.R_script.workflow ~img ~descr:"RER_converge.best_candidate_summary" script
......
......@@ -6,6 +6,10 @@ class type rds = object
method format : [`format]
end
type foreground_clade = All | Ancestral | Terminal
val string_of_clade : foreground_clade -> string
val phenotypes_of_convergent_species :
string list workflow -> tsv path workflow
......@@ -24,19 +28,21 @@ val rer_converge :
unit ->
rds file
val result_table_of_rds : rds file -> tsv file
val result_table_of_rds : rds file -> foreground_clade -> tsv file
val match_species_tree_position :
gene_tree:newick file -> clipped_species_tree:newick file -> newick file
val plot_relative_rates :
rds file ->
foreground_clade ->
string list ->
pdf file
val best_candidate_summary :
?n_genes:int ->
rds file ->
foreground_clade ->
pdf file
val enrichment_analysis :
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment