Gitlab is now running v13.9.0 - More info -> here <-

Commit 073aba01 authored by Louis Duchemin's avatar Louis Duchemin

RERconverge : Rearrangement of gene trees to match species tree

parent 5e8aa074
......@@ -612,6 +612,9 @@ let draw_site q pos =
in
Workflow.path_plugin ~descr:"orthomam." f
let rer_converge ?max_read ?transform ?weighted ?scale ?continuous ~db
~phenotypes () =
let open Bistro_bio in
......@@ -632,11 +635,14 @@ let rer_converge ?max_read ?transform ?weighted ?scale ?continuous ~db
let clipped_tree =
clip_tree_on_alignment ~handle_tags:false species_tree q
in
Iqtree.iqtree
let gene_tree = Iqtree.iqtree
~keep_ident:true ~te:clipped_tree
(*~m:model*) ~st:`DNA ~spp
(`phylip q)
|> Iqtree.treefile)
|> Iqtree.treefile
in
RER.match_species_tree_position ~gene_tree ~clipped_species_tree:clipped_tree
)
ali_files
|> List.zip_exn gene_ids
in
......
open Core_kernel
open Bistro
open Bistro.Shell_dsl
open Phylogenetics
open Tree
type tree = (Newick.node_info, string, float) Tree.t
type annotated_tree =
(Newick.node_info, string, float * String.Set.t) Tree.t
let img =
[ docker_image ~account:"lsdch" ~name:"rerconverge" ~tag:"0.1.0" () ]
......@@ -38,12 +45,11 @@ let template_transform t =
let template_bool t = string (if t then "T" else "F")
let ropt f =
Option.value_map ~f ~default:(string "NA")
let ropt f = Option.value_map ~f ~default:(string "NA")
let script ?max_read ?min_specs ~transform ~weighted ~scale ~continuous ~gene_tree_set
~phenotype (* ~master_tree *) =
let max_read = ropt int max_read in
let script ?max_read ?min_specs ~transform ~weighted ~scale ~continuous
~gene_tree_set ~phenotype (* ~master_tree *) =
let max_read = ropt int max_read in
let min_specs = ropt int min_specs in
[%script
{|
......@@ -116,8 +122,111 @@ res <- if ({{template_bool continuous}}) {
write_tsv("{{dest}}", res)
|}]
let rer_converge ?max_read ?min_specs ?(transform = `sqrt) ?(weighted = true)
?(scale = true) ?(continuous = false) (* ~master_tree *) ~gene_tree_set ~phenotype =
let rer_converge ?max_read ?min_specs ?(transform = `sqrt)
?(weighted = true) ?(scale = true) ?(continuous = false)
~(* ~master_tree *) gene_tree_set ~phenotype =
Bistro_utils.R_script.workflow ~img ~descr:"rer_converge"
(script ?max_read ~transform ~weighted ~scale ~continuous
?min_specs ~gene_tree_set ~phenotype (* ~master_tree *))
(script ?max_read ~transform ~weighted ~scale ~continuous ?min_specs
~gene_tree_set ~phenotype (* ~master_tree *))
let annotate_branches_with_tip_leaves (tree : tree) : annotated_tree =
let rec traverse_tree tree =
match tree with
| Leaf leaf_name as leaf -> (leaf, String.Set.singleton leaf_name)
| Node n ->
let annotated_branches = List1.map ~f:traverse_branch n.branches in
let new_node = node n.data annotated_branches in
let tip_leaves =
String.Set.union_list
( List1.map ~f:(fun (Branch b) -> snd b.data) annotated_branches
|> List1.to_list )
in
(new_node, tip_leaves)
and traverse_branch (Branch b) =
let tree, tip_leaves = traverse_tree b.tip in
branch (b.data, tip_leaves) tree
in
fst (traverse_tree tree)
module Branch_map = Map.Make (String.Set)
let branch_lengths_map_of_tree (tree : annotated_tree) =
Tree.prefix_traversal tree ~init:Branch_map.empty ~node:Fn.const
~branch:(fun acc branch_data ->
let branch_length = fst branch_data in
Branch_map.add_exn acc ~key:(snd branch_data) ~data:branch_length)
~leaf:Fn.const
let set_branch_lengths_of_species_tree (tree : annotated_tree) branch_map
all_leaves : tree =
let rec traverse_tree tree =
match tree with
| Leaf l -> leaf l
| Node n ->
let updated_branches = List1.map ~f:traverse_branch n.branches in
node n.data updated_branches
and traverse_branch (Branch b) =
let bipartition = snd b.data in
let length_maybe = Branch_map.find branch_map bipartition in
let length =
match length_maybe with
| Some length -> length
| None ->
let complement = String.Set.diff all_leaves bipartition in
Branch_map.find_exn branch_map complement
in
let tip = traverse_tree b.tip in
branch length tip
in
traverse_tree tree
let tree_of_newick (nw : Newick.tree) : (tree, string) result =
try
Ok
(Tree.map nw ~node:Fn.id
~leaf:(fun ni ->
match ni.name with
| Some n -> n
| None -> failwith "missing leaf name")
~branch:(fun bi ->
match bi.length with
| Some l -> l
| None -> failwith "missing branch length"))
with Failure msg -> Error msg
let newick_of_tree (tree : tree) : Newick.tree =
Tree.map tree ~node:Fn.id
~leaf:(fun leaf -> Newick.{ name = Some leaf })
~branch:(fun bi -> Newick.{ length = Some bi; tags = [] })
let match_species_tree_position ~gene_tree ~clipped_species_tree =
let f =
[%workflow
fun dest ->
let master_tree = Newick.from_file [%path clipped_species_tree] in
let gene_tree = Newick.from_file [%path gene_tree] in
let rearranged_gene_tree =
Newick.map_inner_tree master_tree ~f:(fun master_tree ->
Newick.with_inner_tree gene_tree ~f:(fun gene_tree ->
let master_tree =
Result.ok_or_failwith @@ tree_of_newick master_tree
and gene_tree =
Result.ok_or_failwith @@ tree_of_newick gene_tree
in
let all_leaves =
Tree.leaves master_tree |> String.Set.of_list
and annotated_master_tree =
annotate_branches_with_tip_leaves master_tree
and annotated_gene_tree =
annotate_branches_with_tip_leaves gene_tree
in
let branch_map =
branch_lengths_map_of_tree annotated_gene_tree
in
set_branch_lengths_of_species_tree annotated_master_tree
branch_map all_leaves
|> newick_of_tree))
in
Newick.to_file rearranged_gene_tree dest]
in
Workflow.path_plugin ~descr:"match_species_tree_position" f
......@@ -17,3 +17,6 @@ val rer_converge :
gene_tree_set:tsv file ->
phenotype:tsv file ->
tsv file
val match_species_tree_position :
gene_tree:newick file -> clipped_species_tree:newick file -> newick file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment