Commit d15d6813 authored by Louis Duchemin's avatar Louis Duchemin
Browse files

RERconverge : compute results for each foreground clade specification

parent b722c7d9
...@@ -24,13 +24,25 @@ let () = ...@@ -24,13 +24,25 @@ let () =
|> print_endline |> print_endline
in in
List.iter2 (fun label phenotypes -> List.iter2 (fun label phenotypes ->
let target = pipeline ~phenotypes in pipeline ~phenotypes
print_endline label; |> List.iter (fun (clade, target) ->
execute (target.rer_plot ~pat:"*LIM2*") print_endline label;
print_endline ("Foreground clade : " ^ (Codepi.Rer_converge.string_of_clade clade));
print_string "Result table : ";
execute Codepi.Orthomam.(target.result_table);
print_string "RER plots : ";
execute (target.rer_plot ~pat:"*LIM2*");
print_string "Candidates summary : ";
execute target.best_candidate_summary;
)
) )
["Subterraneans"] ["Subterraneans"; "Marines"]
(* ; "Marines "; "Echolocators"] *) (* ; "Marines "; "Echolocators"] *)
[subterraneans] [subterraneans; marines]
(* ; marines; echolocators] *) (* ; marines; echolocators] *)
with Failure msg -> print_endline msg with Failure msg -> print_endline msg
...@@ -5,9 +5,10 @@ suppressPackageStartupMessages({ ...@@ -5,9 +5,10 @@ suppressPackageStartupMessages({
}) })
rds = readRDS("<<< dep rds >>>") rds = readRDS("<<< dep rds >>>")
clade = "<<<template_of_clade clade>>>"
master_tree <- rds$master_tree master_tree <- rds$master_tree
results <- rds$results results <- rds[[clade]]$table
max_read <- nrow(results) max_read <- nrow(results)
gene_trees <-rds$gene_trees gene_trees <-rds$gene_trees
convergent_species <- rds$phenotypes convergent_species <- rds$phenotypes
......
...@@ -5,11 +5,12 @@ suppressPackageStartupMessages({ ...@@ -5,11 +5,12 @@ suppressPackageStartupMessages({
rds = readRDS("<<<dep rds>>>") rds = readRDS("<<<dep rds>>>")
clade = "<<<template_of_clade clade>>>"
gene_ids = <<<gene_ids>>> gene_ids = <<<gene_ids>>>
rers = rds$relative_rates rers = rds$relative_rates
pdf("<<<dest>>>") pdf("<<<dest>>>", pointsize = 9, height = 12, width=10)
for (gene_id in gene_ids) { for (gene_id in gene_ids) {
plotRers(rers, gene_id, phenv=rds$traits_paths) plotRers(rers, gene_id, phenv=rds[[clade]]$traits_paths)
} }
dev.off() dev.off()
\ No newline at end of file
...@@ -40,40 +40,53 @@ build_traits_paths <- ...@@ -40,40 +40,53 @@ build_traits_paths <-
foreground2Tree(phenotypes, foreground2Tree(phenotypes,
gene_trees, gene_trees,
clade = clade, clade = clade,
useSpecies = useSpecies, # useSpecies = useSpecies,
plot=F # FIXME : plot=T raises cryptic error plot=F # FIXME : plot=T raises cryptic error
) %>% ) %>%
tree2Paths(gene_trees) tree2Paths(gene_trees)
} }
} }
traits_paths <- build_traits_paths( clades = c("ancestral", "terminal", "all")
phenotypes,
gene_trees, results = lapply(clades, FUN=function(clade){
is_continuous
)
res <- if (is_continuous) { traits_paths <- build_traits_paths(
correlateWithContinuousPhenotype( phenotypes,
relative_rates, gene_trees,
traits_paths, is_continuous
) )
} else {
correlateWithBinaryPhenotype( res <- if (is_continuous) {
relative_rates, correlateWithContinuousPhenotype(
traits_paths, relative_rates,
traits_paths,
)
} else {
correlateWithBinaryPhenotype(
relative_rates,
traits_paths,
)
}
table <- res %>%
mutate(gene_id=names(gene_trees$trees)) %>%
select(gene_id, everything())
list(
table=table,
traits_paths=traits_paths
) )
} })
res <- res %>% names(results) = clades
mutate(gene_id=names(gene_trees$trees)) %>%
select(gene_id, everything()) results = results %>%
list_modify(
relative_rates = relative_rates,
master_tree = master_tree,
gene_trees = gene_trees,
phenotypes = phenotypes
)
saveRDS(list( saveRDS(results, file="<<<dest>>>")
relative_rates = relative_rates,
results = res,
traits_paths = traits_paths,
master_tree = master_tree,
gene_trees = gene_trees,
phenotypes = phenotypes
), file="<<<dest>>>")
...@@ -707,17 +707,18 @@ let rer_converge ?transform ?weighted ?scale ?continuous ?(summary_n_genes = 10) ...@@ -707,17 +707,18 @@ let rer_converge ?transform ?weighted ?scale ?continuous ?(summary_n_genes = 10)
let phenotypes = RER.phenotypes_of_convergent_species phenotypes in let phenotypes = RER.phenotypes_of_convergent_species phenotypes in
let rds = RER.rer_converge ?transform ?weighted ?scale ?continuous let rds = RER.rer_converge ?transform ?weighted ?scale ?continuous
~min_specs:10 ~master_tree:species_tree ~gene_tree_set ~phenotypes () in ~min_specs:10 ~master_tree:species_tree ~gene_tree_set ~phenotypes () in
let result_table = RER.result_table_of_rds rds in
let best_candidate_summary = RER.best_candidate_summary ~n_genes:summary_n_genes rds in List.map RER.[All; Ancestral; Terminal] ~f:(fun clade ->
let rer_plot ~pat = let result_table = RER.result_table_of_rds rds clade in
search_alignments ~pat db let best_candidate_summary = RER.best_candidate_summary
|> List.map ~f:(fun (Alignment (_, ali)) -> family_id_of_alignment ali) ~n_genes:summary_n_genes rds clade
|> RER.plot_relative_rates rds in
in let rer_plot ~pat =
{ search_alignments ~pat db
result_table; |> List.map ~f:(fun (Alignment (_, ali)) -> family_id_of_alignment ali)
best_candidate_summary; |> RER.plot_relative_rates rds clade
rer_plot; in
} clade, { result_table; best_candidate_summary; rer_plot; }
)
...@@ -106,7 +106,7 @@ val rer_converge : ...@@ -106,7 +106,7 @@ val rer_converge :
db:Orthomam_db.t -> db:Orthomam_db.t ->
phenotypes:string list workflow -> phenotypes:string list workflow ->
unit -> unit ->
rer_converge_output (Rer_converge.foreground_clade * rer_converge_output) list
module Binary_phenotype: sig module Binary_phenotype: sig
type t = { type t = {
......
...@@ -11,10 +11,23 @@ end ...@@ -11,10 +11,23 @@ end
type tree = (Newick.node_info, string, float) Tree.t type tree = (Newick.node_info, string, float) Tree.t
(** like a [tree] but includes on each branch the set of leaves in its tip subtree *)
type annotated_tree = type annotated_tree =
(Newick.node_info, string, float * String.Set.t) Tree.t (Newick.node_info, string, float * String.Set.t) Tree.t
(* which of the branches within a foreground clade to keep as foreground.
Options are "ancestral" to keep only the inferred transition branch,
"terminal" to keep only terminal branches,
and "all" to keep all branches. *)
type foreground_clade = All | Ancestral | Terminal
let string_of_clade = function
| All -> "all"
| Ancestral -> "ancestral"
| Terminal -> "terminal"
let template_of_clade clade = string_of_clade clade |> string
let img = let img =
[ docker_image ~account:"lsdch" ~name:"rerconverge" ~tag:"0.1.1" () ] [ docker_image ~account:"lsdch" ~name:"rerconverge" ~tag:"0.1.1" () ]
...@@ -67,14 +80,18 @@ let rer_converge ?max_read ?min_specs ?(transform = `sqrt) ...@@ -67,14 +80,18 @@ let rer_converge ?max_read ?min_specs ?(transform = `sqrt)
(script ?max_read ~transform ~weighted ~scale ~continuous ?min_specs (script ?max_read ~transform ~weighted ~scale ~continuous ?min_specs
~gene_tree_set ~phenotypes ~master_tree) ~gene_tree_set ~phenotypes ~master_tree)
let result_table_of_rds rds = let result_table_of_rds rds clade =
let script = [%script {| let script = [%script {|
library(tidyverse) library(tidyverse)
clade = "<<<template_of_clade clade>>>"
rds = readRDS("<<< dep rds >>>") rds = readRDS("<<< dep rds >>>")
write_tsv(rds$results, "<<<dest>>>") print(names(rds))
print(names(rds[["all"]]))
write_tsv(rds[[clade]]$table, "<<<dest>>>")
|}] in |}] in
Bistro_utils.R_script.workflow ~img ~descr:"extract_result_table" script Bistro_utils.R_script.workflow ~img ~descr:"extract_result_table" script
(* creates an annotated tree from a tree *)
let annotate_branches_with_tip_leaves (tree : tree) : annotated_tree = let annotate_branches_with_tip_leaves (tree : tree) : annotated_tree =
let rec traverse_tree tree = let rec traverse_tree tree =
match tree with match tree with
...@@ -177,14 +194,14 @@ let match_species_tree_position ~gene_tree ~clipped_species_tree = ...@@ -177,14 +194,14 @@ let match_species_tree_position ~gene_tree ~clipped_species_tree =
in in
Workflow.path_plugin ~descr:"match_species_tree_position" f Workflow.path_plugin ~descr:"match_species_tree_position" f
let plot_relative_rates rds gene_ids = let plot_relative_rates rds clade gene_ids =
let module R = Bistro_utils.R_script in let module R = Bistro_utils.R_script in
let gene_ids = R.(make [strings gene_ids]) in let gene_ids = R.(make [strings gene_ids]) in
let script = [%include_script "lib/R/plot_rers.R"] in let script = [%include_script "lib/R/plot_rers.R"] in
R.workflow ~img ~descr:"RER_converge.plot_relative_rates" script R.workflow ~img ~descr:"RER_converge.plot_relative_rates" script
let best_candidate_summary ?(n_genes = 10) rds= let best_candidate_summary ?(n_genes = 10) rds clade =
let n_genes = int n_genes in let n_genes = int n_genes in
let script = [%include_script "lib/R/best_candidate_summary.R"] in let script = [%include_script "lib/R/best_candidate_summary.R"] in
Bistro_utils.R_script.workflow ~img ~descr:"RER_converge.best_candidate_summary" script Bistro_utils.R_script.workflow ~img ~descr:"RER_converge.best_candidate_summary" script
......
...@@ -6,6 +6,10 @@ class type rds = object ...@@ -6,6 +6,10 @@ class type rds = object
method format : [`format] method format : [`format]
end end
type foreground_clade = All | Ancestral | Terminal
val string_of_clade : foreground_clade -> string
val phenotypes_of_convergent_species : val phenotypes_of_convergent_species :
string list workflow -> tsv path workflow string list workflow -> tsv path workflow
...@@ -24,19 +28,21 @@ val rer_converge : ...@@ -24,19 +28,21 @@ val rer_converge :
unit -> unit ->
rds file rds file
val result_table_of_rds : rds file -> tsv file val result_table_of_rds : rds file -> foreground_clade -> tsv file
val match_species_tree_position : val match_species_tree_position :
gene_tree:newick file -> clipped_species_tree:newick file -> newick file gene_tree:newick file -> clipped_species_tree:newick file -> newick file
val plot_relative_rates : val plot_relative_rates :
rds file -> rds file ->
foreground_clade ->
string list -> string list ->
pdf file pdf file
val best_candidate_summary : val best_candidate_summary :
?n_genes:int -> ?n_genes:int ->
rds file -> rds file ->
foreground_clade ->
pdf file pdf file
val enrichment_analysis : val enrichment_analysis :
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment