Commit 822dfab1 authored by Louis Duchemin's avatar Louis Duchemin

RER converge pipeline

parent 12048c65
open Core_kernel
open Bistro
open Bistro.Shell_dsl
let gene_tree_file gene_trees =
let gene_ids, newicks = List.unzip gene_trees in
let f =
[%workflow
fun dest ->
let load_tree newick_path =
In_channel.read_all newick_path
|> String.filter ~f:(function '\n' | '\t' -> false | _ -> true) in
let newick_trees =
[%eval Workflow.path_list newicks] |> List.map ~f:load_tree in
let lines =
List.map2_exn
~f:(fun gene tree -> String.concat ~sep:"\t" [gene; tree])
gene_ids newick_trees in
Out_channel.write_lines dest lines] in
Workflow.path_plugin ~descr:"rer_converge.gene_tree_file" f
let template_transform t =
string (match t with `sqrt -> "sqrt" | `none -> "none" | `log -> "log")
let template_bool t = string (if t then "T" else "F")
let script ?max_read ~transform ~weighted ~scale ~continuous ~gene_tree_set
~phenotype =
let max_read = Option.value_map max_read ~default:(string "NA") ~f:int in
[%script
{|
suppressPackageStartupMessages({
library(tidyverse)
library(RERconverge)
})
gene_trees <- readTrees({{dep gene_tree_set}}, max.read={{max_read}})
relative_rates <- getAllResiduals(
gene_trees,
useSpecies = useSpecies,
transform = {{template_transform transform}},
weighted = {{template_bool weighted}},
scale = {{template_bool scale}}
)
load_species_file <- function(path) {
read_table(path, col_names = "species") %>% pull(species)
}
load_traits_file <- function(path, is_continuous) {
if (is_continuous)
read_table(path, col_names = c('species', 'value')) %>% deframe()
else
load_species_file(path)
}
load_traits_paths <-
function(traits_file_path,
gene_trees,
is_continuous,
useSpecies = NULL,
clade = c("ancestral", "terminal", "all"))
{
traits <- load_traits_file(traits_file_path, is_continuous)
if (is_continuous) {
char2Paths(traits, gene_trees)
} else {
foreground2Tree(traits,
gene_trees,
clade = clade,
useSpecies = useSpecies,
plot=F # FIXME : plot=T raises cryptic error
) %>%
tree2Paths(gene_trees)
}
}
traits_paths <- load_traits_paths(
{{dep phenotype}},
{{dep gene_tree_set}},
{{template_bool continuous}}
)
res <- if ({{template_bool continuous}}) {
correlateWithContinuousPhenotype(
relative_rates,
traits_paths,
)
} else {
correlateWithBinaryPhenotype(
relative_rates,
traits_paths,
)
}
write_tsv({{dest}}, res)
|}]
let rer_converge ?max_read ?(transform = `sqrt) ?(weighted = true)
?(scale = true) ?(continuous = false) ~gene_tree_set ~phenotype =
Bistro_utils.R_script.workflow ~descr:"rer_converge"
(script ?max_read ~transform ~weighted ~scale ~continuous ~gene_tree_set
~phenotype)
open Bistro
open File_formats
val gene_tree_file : (string * newick file) list -> tsv file
val rer_converge :
?max_read:int
-> ?transform:[`none | `sqrt | `log]
-> ?weighted:bool
-> ?scale:bool
-> ?continuous:bool
-> gene_tree_set:tsv file
-> phenotype:tsv file
-> tsv file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment