Commit fecafac7 authored by Philippe Veber's avatar Philippe Veber
Browse files

new attempts to produce trees with no single nodes

parent 5a2a7780
......@@ -52,6 +52,13 @@ module Make(Q : Query) = struct
let gene_tree d =
Tree_dataset.raxmlng_fna ~fna:(nucleotide_alignment d) ()
let%pworkflow tree_with_no_single_child ~branch_length_unit d : newick file =
let tree_file = [%path tree ~branch_length_unit d] in
let open Phylogenetics in
let tree = Newick.from_file tree_file in
let tree = Newick.map_inner_tree tree ~f:Reviewphiltrans_toolbox.Convergence_tree.remove_nodes_with_single_child in
Newick.to_file tree [%dest]
let identical d =
let tree_sc = Tree_dataset.prepare_sc_tree (tree ~branch_length_unit:`Amino_acid d) in
let tree_id = Tree_dataset.prepare_tree_with_node_id (tree ~branch_length_unit:`Amino_acid d) in
......@@ -93,7 +100,7 @@ module Make(Q : Query) = struct
let tdg09 d =
Tamuri.tdg09
~tree:(tree ~branch_length_unit:`Amino_acid d)
~tree:(tree_with_no_single_child ~branch_length_unit:`Amino_acid d)
~faa:(amino_acid_alignment d)
()
|> Tamuri.results
......
......@@ -47,55 +47,6 @@ let%pworkflow remove_unobserved_sequences_from_alignment phylip : phylip file =
let filtered_data = Phylip.make_exn items in
Phylip.write ~strict:false filtered_data [%dest]
let rec transfer_tag_to_branches t =
let open Phylogenetics in
let category : _ Tree.t -> int = function
| Leaf (_, c) -> c
| Node n -> snd n.data
in
match t with
| Tree.Leaf (l, _) -> Tree.leaf l
| Node n ->
let cat_n = snd n.data in
List1.map n.branches ~f:(fun (Branch b) ->
let cat_child = category b.tip in
let tags = List.concat [
[ "Condition", Int.to_string cat_child ] ;
(
if cat_n <> cat_child then
[ "Transition", Int.to_string cat_child ]
else []
) ;
b.data.Newick.tags
]
in
Tree.branch
{ b.data with Newick.tags }
(transfer_tag_to_branches b.tip)
)
|> Tree.node (fst n.data)
let condition_tag (bi : Phylogenetics.Newick.branch_info) =
List.Assoc.find bi.tags "Condition" ~equal:String.equal
let tag_tree t tagged_leaves =
let open Phylogenetics in
let category (ni : Newick.node_info) =
Option.map ni.name ~f:(fun l ->
if String.Set.mem tagged_leaves l then 1 else 0
)
in
let cost x y =
match x, y with
| 0, 1 -> 2.
| 1, 0 -> 1.
| 0, 0
| 1, 1 -> 0.
| _ -> assert false
in
Fitch.fitch ~cost ~n:2 ~category t
|> transfer_tag_to_branches
let%pworkflow clip_tree_on_alignment tree ali =
let open Phylogenetics in
let tree = Newick.from_file [%path tree] in
......@@ -108,16 +59,7 @@ let%pworkflow clip_tree_on_alignment tree ali =
(fun bi -> bi.Newick.name) ali_species
with
| None -> failwith "Tree has no leaf in alignment"
| Some filtered_tree ->
Tree.simplify_node_with_single_child filtered_tree (fun bi1 bi2 ->
match bi1.length, condition_tag bi1, bi2.length, condition_tag bi2 with
| Some l1, Some c1, Some l2, Some c2 ->
if String.(c1 = c2) then
let tags = List.dedup_and_sort (bi1.tags @ bi2.tags) ~compare:[%compare: string * string] in
Some { Newick.tags ; length = Some (l1 +. l2) }
else None
| _ -> assert false
)
| Some filtered_tree -> filtered_tree
)
in
Newick.to_file clipped_tree [%dest]
......@@ -134,7 +76,9 @@ let annotate_convergent_species_in_tree (tree : newick file) species : newick fi
let ensembl_tree = Newick.from_file omm_tree in
let tagged_tree =
Newick.map_inner_tree ensembl_tree ~f:(fun t ->
tag_tree t (String.Set.of_list species)
Reviewphiltrans_toolbox.Convergence_tree.infer_binary_condition_on_branches
~convergent_leaves:(String.Set.of_list species)
t
)
in
Newick.to_file tagged_tree [%dest]
......@@ -490,7 +434,9 @@ let%pworkflow convergence_species_tree_pdf ~convergent_species db =
in
let tree_or_branch =
Newick.from_file tree_path
|> Newick.map_inner_tree ~f:(fun t -> tag_tree t convergent_species)
|> Newick.map_inner_tree ~f:(fun t ->
Reviewphiltrans_toolbox.Convergence_tree.infer_binary_condition_on_branches
t ~convergent_leaves:convergent_species)
in
render_tree tree_or_branch
|> Biotk_croquis.Croquis.Layout.simple
......
open Core_kernel
open Phylogenetics
type t = Newick.tree
module Tags = struct
let condition_label = "Condition"
let transition_label = "Transition"
let condition tags =
List.Assoc.find tags condition_label ~equal:String.equal
let set_condition tags c =
List.Assoc.(
add
(remove tags condition_label ~equal:String.equal)
condition_label c
~equal:String.equal
)
(* let other_tags tags =
* List.filter tags ~f:(fun (key, _) -> String.(key <> condition_label && key <> transition_label)) *)
let unset_transition tags =
List.Assoc.remove tags transition_label ~equal:String.equal
let set_transition tags c =
List.Assoc.(
add
(unset_transition tags)
transition_label c
~equal:String.equal
)
end
let condition_of_branch_info (bi : Newick.branch_info) =
Tags.condition bi.tags
let rec transfer_condition_to_branches t =
let category : _ Tree.t -> int = function
| Leaf (_, c) -> c
| Node n -> snd n.data
in
match t with
| Tree.Leaf (l, _) -> Tree.leaf l
| Node n ->
List1.map n.branches ~f:(fun (Branch b) ->
let cat_child = category b.tip in
let tags = Tags.set_condition b.data.Newick.tags (Int.to_string cat_child) in
Tree.branch
{ b.data with Newick.tags }
(transfer_condition_to_branches b.tip)
)
|> Tree.node (fst n.data)
let reset_transitions (tree : t) =
let rec aux mother_condition tree =
match (tree : t) with
| Leaf _ as l -> l
| Node n ->
let branches = List1.map n.branches ~f:(fun (Branch b) ->
let tags, c_b =
match Tags.condition b.data.tags with
| None -> failwith "tree tagged with condition expected"
| Some c_b ->
let tags =
if String.(c_b <> mother_condition) then Tags.set_transition b.data.tags c_b
else Tags.unset_transition b.data.tags
in
tags, c_b
in
let data = { b.data with tags } in
Tree.branch data (aux c_b b.tip)
)
in
Node { n with branches }
in
match tree with
| Leaf _ as l -> l
| Node n ->
let branches = List1.map n.branches ~f:(fun (Branch b) ->
match Tags.condition b.data.tags with
| None -> failwith "tree tagged with condition expected"
| Some c_b ->
Tree.branch b.data (aux c_b b.tip)
)
in
Node { n with branches }
let length_on_each_condition branches =
let module A = Biocaml_unix.Accu in
let acc = A.create ~bin:Fn.id ~zero:0. ~add:( +. ) () in
List.iter branches ~f:(fun bi ->
match condition_of_branch_info bi, bi.Newick.length with
| Some c, Some l -> A.add acc c l
| _ -> ()
) ;
A.to_alist acc
let remove_nodes_with_single_child (tree : t) =
Tree.simplify_node_with_single_child tree ~merge_branch_data:(fun branches ->
let condition_stats = length_on_each_condition branches in
let major_condition =
List.max_elt condition_stats ~compare:(fun (_, l) (_, l') ->
Float.compare l l'
)
in
let tags = match major_condition with
| None -> []
| Some (c, _) -> Tags.set_condition [] c
in
let length = List.fold branches ~init:0. ~f:(fun acc bi ->
acc +. Option.value_exn bi.length
) in
{ Newick.tags ; length = Some length }
)
|> reset_transitions
let infer_binary_condition_on_branches ?(gain_relative_cost = 2.) t ~convergent_leaves =
let category (ni : Newick.node_info) =
Option.map ni.name ~f:(fun l ->
if String.Set.mem convergent_leaves l then 1 else 0
)
in
let cost x y =
match x, y with
| 0, 1 -> gain_relative_cost
| 1, 0 -> 1.
| 0, 0
| 1, 1 -> 0.
| _ -> assert false
in
Fitch.fitch ~cost ~n:2 ~category t
|> transfer_condition_to_branches
|> reset_transitions
open Core_kernel
open Phylogenetics
type t = Newick.tree
val infer_binary_condition_on_branches :
?gain_relative_cost:float ->
t ->
convergent_leaves:String.Set.t ->
t
val remove_nodes_with_single_child : t -> t
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment