Commit 0040b10c authored by Louis Duchemin's avatar Louis Duchemin
Browse files

Extends Convergence_tree module features

parent 6acf2dc3
......@@ -5,18 +5,17 @@ type t = Newick.tree
module Tags = struct
let condition_label = "Condition"
let transition_label = "Transition"
let condition tags =
let condition tags =
List.Assoc.find tags condition_label ~equal:String.equal
let set_condition tags c =
List.Assoc.(
add
(remove tags condition_label ~equal:String.equal)
condition_label c
~equal:String.equal
)
condition_label c ~equal:String.equal)
(* let other_tags tags =
* List.filter tags ~f:(fun (key, _) -> String.(key <> condition_label && key <> transition_label)) *)
......@@ -26,16 +25,54 @@ module Tags = struct
let set_transition tags c =
List.Assoc.(
add
(unset_transition tags)
transition_label c
~equal:String.equal
)
add (unset_transition tags) transition_label c ~equal:String.equal)
end
let condition_of_branch_info (bi : Newick.branch_info) =
Tags.condition bi.tags
type u =
( Newick.node_info,
Newick.node_info,
Phylogenetics_convergence.Simulator.Branch_info.t )
Tree.t
let from_file fn =
let module BI = Phylogenetics_convergence.Simulator.Branch_info in
Newick.from_file fn
|> Newick.with_inner_tree ~f:(fun t ->
Tree.map t ~node:Fn.id ~leaf:Fn.id
~branch:
Phylogenetics.Newick.(
fun b ->
let length = Option.value_exn b.length in
let condition =
match
List.Assoc.find ~equal:String.equal b.tags "Condition"
with
| Some s -> (
match s with
| "0" -> `Ancestral
| "1" -> `Convergent
| _ -> failwithf "Invalid condition: %s" s () )
| None -> failwith "Missing Condition tag"
in
{ BI.length; condition }))
let leaves tree =
let rec node condition t acc =
match t with
| Tree.Node n -> List1.fold_right n.branches ~init:acc ~f:branch
| Leaf { Newick.name = Some n } -> (n, condition) :: acc
| Leaf { name = None } ->
failwith "leaves_condition: missing leaf name in nhx"
and branch (Tree.Branch b) acc =
node
(Phylogenetics_convergence.Simulator.Branch_info.condition b.data)
b.tip acc
in
node `Ancestral tree []
let rec transfer_condition_to_branches t =
let category : _ Tree.t -> int = function
| Leaf (_, c) -> c
......@@ -44,92 +81,91 @@ let rec transfer_condition_to_branches t =
match t with
| Tree.Leaf (l, _) -> Tree.leaf l
| Node n ->
List1.map n.branches ~f:(fun (Branch b) ->
let cat_child = category b.tip in
let tags = Tags.set_condition b.data.Newick.tags (Int.to_string cat_child) in
Tree.branch
{ b.data with Newick.tags }
(transfer_condition_to_branches b.tip)
)
|> Tree.node (fst n.data)
List1.map n.branches ~f:(fun (Branch b) ->
let cat_child = category b.tip in
let tags =
Tags.set_condition b.data.Newick.tags (Int.to_string cat_child)
in
Tree.branch
{ b.data with Newick.tags }
(transfer_condition_to_branches b.tip))
|> Tree.node (fst n.data)
let reset_transitions (tree : t) =
let rec aux mother_condition tree =
match (tree : t) with
| Leaf _ as l -> l
| Node n ->
let branches = List1.map n.branches ~f:(fun (Branch b) ->
let tags, c_b =
match Tags.condition b.data.tags with
| None -> failwith "tree tagged with condition expected"
| Some c_b ->
let tags =
if String.(c_b <> mother_condition) then Tags.set_transition b.data.tags c_b
else Tags.unset_transition b.data.tags
let branches =
List1.map n.branches ~f:(fun (Branch b) ->
let tags, c_b =
match Tags.condition b.data.tags with
| None -> failwith "tree tagged with condition expected"
| Some c_b ->
let tags =
if String.(c_b <> mother_condition) then
Tags.set_transition b.data.tags c_b
else Tags.unset_transition b.data.tags
in
(tags, c_b)
in
tags, c_b
in
let data = { b.data with tags } in
Tree.branch data (aux c_b b.tip)
)
in
Node { n with branches }
let data = { b.data with tags } in
Tree.branch data (aux c_b b.tip))
in
Node { n with branches }
in
match tree with
| Leaf _ as l -> l
| Node n ->
let branches = List1.map n.branches ~f:(fun (Branch b) ->
match Tags.condition b.data.tags with
| None -> failwith "tree tagged with condition expected"
| Some c_b ->
Tree.branch b.data (aux c_b b.tip)
)
in
Node { n with branches }
let branches =
List1.map n.branches ~f:(fun (Branch b) ->
match Tags.condition b.data.tags with
| None -> failwith "tree tagged with condition expected"
| Some c_b -> Tree.branch b.data (aux c_b b.tip))
in
Node { n with branches }
let length_on_each_condition branches =
let module A = Biocaml_unix.Accu in
let acc = A.create ~bin:Fn.id ~zero:0. ~add:( +. ) () in
List.iter branches ~f:(fun bi ->
match condition_of_branch_info bi, bi.Newick.length with
match (condition_of_branch_info bi, bi.Newick.length) with
| Some c, Some l -> A.add acc c l
| _ -> ()
) ;
| _ -> ()) ;
A.to_alist acc
let remove_nodes_with_single_child (tree : t) =
Tree.simplify_node_with_single_child tree ~merge_branch_data:(fun branches ->
Tree.simplify_node_with_single_child tree
~merge_branch_data:(fun branches ->
let condition_stats = length_on_each_condition branches in
let major_condition =
List.max_elt condition_stats ~compare:(fun (_, l) (_, l') ->
Float.compare l l'
)
Float.compare l l')
in
let tags = match major_condition with
let tags =
match major_condition with
| None -> []
| Some (c, _) -> Tags.set_condition [] c
in
let length = List.fold branches ~init:0. ~f:(fun acc bi ->
acc +. Option.value_exn bi.length
) in
{ Newick.tags ; length = Some length }
)
let length =
List.fold branches ~init:0. ~f:(fun acc bi ->
acc +. Option.value_exn bi.length)
in
{ Newick.tags; length = Some length })
|> reset_transitions
let infer_binary_condition_on_branches ?(gain_relative_cost = 2.) t ~convergent_leaves =
let infer_binary_condition_on_branches ?(gain_relative_cost = 2.) t
~convergent_leaves =
let category (ni : Newick.node_info) =
Option.map ni.name ~f:(fun l ->
if String.Set.mem convergent_leaves l then 1 else 0
)
if String.Set.mem convergent_leaves l then 1 else 0)
in
let cost x y =
match x, y with
match (x, y) with
| 0, 1 -> gain_relative_cost
| 1, 0 -> 1.
| 0, 0
| 1, 1 -> 0.
| 0, 0 | 1, 1 -> 0.
| _ -> assert false
in
Fitch.fitch ~cost ~n:2 ~category t
|> transfer_condition_to_branches
|> reset_transitions
|> transfer_condition_to_branches |> reset_transitions
......@@ -3,10 +3,17 @@ open Phylogenetics
type t = Newick.tree
type u =
( Newick.node_info,
Newick.node_info,
Phylogenetics_convergence.Simulator.Branch_info.t )
Tree.t
val from_file : string -> u
val leaves : u -> (string * [ `Ancestral | `Convergent ]) list
val infer_binary_condition_on_branches :
?gain_relative_cost:float ->
t ->
convergent_leaves:String.Set.t ->
t
?gain_relative_cost:float -> t -> convergent_leaves:String.Set.t -> t
val remove_nodes_with_single_child : t -> t
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment