open Core_kernel open Phylogenetics type condition = [`Ancestral | `Convergent] type branch_info = { condition : condition ; length : float ; } type t = (unit, string, branch_info) Tree.t module Tags = struct let condition_label = "Condition" let transition_label = "Transition" let condition tags = List.Assoc.find tags condition_label ~equal:String.equal let string_of_condition = function | `Ancestral -> "0" | `Convergent -> "1" let set_condition tags c = List.Assoc.( add (remove tags condition_label ~equal:String.equal) condition_label c ~equal:String.equal) (* let other_tags tags = * List.filter tags ~f:(fun (key, _) -> String.(key <> condition_label && key <> transition_label)) *) let unset_transition tags = List.Assoc.remove tags transition_label ~equal:String.equal let set_transition tags c = List.Assoc.( add (unset_transition tags) transition_label c ~equal:String.equal) end let condition_of_branch_info (bi : Newick.branch_info) = Tags.condition bi.tags let of_newick_tree t = let open Phylogenetics.Newick in try let node _ = () in let leaf (l : Newick.node_info) = match l.name with | Some n -> n | None -> failwith "missing leaf name" in let branch b = let length = match b.length with | None -> failwith "missing branch length" | Some bl -> bl in let condition = match List.Assoc.find ~equal:String.equal b.tags "Condition" with | Some s -> ( match s with | "0" -> `Ancestral | "1" -> `Convergent | _ -> failwithf "Invalid condition: %s" s () ) | None -> failwith "Missing Condition tag" in { length; condition } in Tree.map t ~node ~leaf ~branch |> Result.return with Failure msg -> Result.fail (`Msg msg) let to_newick_tree t = let node () = { Newick.name = None } in let leaf l = { Newick.name = Some l } in let branch b = { Newick.length = Some b.length ; tags = [] } in Tree.map t ~node ~leaf ~branch let from_file fn = let module BI = Phylogenetics_convergence.Simulator.Branch_info in Newick.from_file fn |> Newick.with_inner_tree ~f:of_newick_tree let leaves tree = let rec node condition t acc = match t with | Tree.Node n -> List1.fold_right n.branches ~init:acc ~f:branch | Leaf species -> (species, condition) :: acc and branch (Tree.Branch b) acc = node b.data.condition b.tip acc in node `Ancestral tree [] let%test "leaves computed in correct order" = match from_file "../../data/besnard2009/besnard2009.nhx" with | Ok t -> List.equal String.equal (List.take (leaves t) 4 |> List.map ~f:fst) [ "Chrysithr" ; "Ele.bald" ; "Ele.bal2" ; "Ele.bal4" ] | Error (`Msg msg) -> failwith msg let rec transfer_condition_to_branches t = let category : _ Tree.t -> condition = function | Leaf (_, c) -> c | Node n -> snd n.data in match t with | Tree.Leaf (l, _) -> Tree.leaf l | Node n -> List1.map n.branches ~f:(fun (Branch b) -> let cat_child = category b.tip in let tags = Tags.set_condition b.data.Newick.tags (Tags.string_of_condition cat_child) in Tree.branch { b.data with Newick.tags } (transfer_condition_to_branches b.tip)) |> Tree.node (fst n.data) let reset_transitions (tree : Newick.tree) = let rec aux mother_condition tree = match (tree : Newick.tree) with | Leaf _ as l -> l | Node n -> let branches = List1.map n.branches ~f:(fun (Branch b) -> let tags, c_b = match Tags.condition b.data.tags with | None -> failwith "tree tagged with condition expected" | Some c_b -> let tags = if String.(c_b <> mother_condition) then Tags.set_transition b.data.tags c_b else Tags.unset_transition b.data.tags in (tags, c_b) in let data = { b.data with tags } in Tree.branch data (aux c_b b.tip)) in Node { n with branches } in match tree with | Leaf _ as l -> l | Node n -> let branches = List1.map n.branches ~f:(fun (Branch b) -> match Tags.condition b.data.tags with | None -> failwith "tree tagged with condition expected" | Some c_b -> Tree.branch b.data (aux c_b b.tip)) in Node { n with branches } let length_on_each_condition branches = let module A = Biocaml_unix.Accu in let acc = A.create ~bin:Fn.id ~zero:0. ~add:( +. ) () in List.iter branches ~f:(fun bi -> match (condition_of_branch_info bi, bi.Newick.length) with | Some c, Some l -> A.add acc c l | _ -> ()) ; A.to_alist acc let remove_nodes_with_single_child (tree : Newick.tree) = Tree.simplify_node_with_single_child tree ~merge_branch_data:(fun branches -> let condition_stats = length_on_each_condition branches in let major_condition = List.max_elt condition_stats ~compare:(fun (_, l) (_, l') -> Float.compare l l') in let tags = match major_condition with | None -> [] | Some (c, _) -> Tags.set_condition [] c in let length = List.fold branches ~init:0. ~f:(fun acc bi -> acc +. Option.value_exn bi.length) in { Newick.tags; length = Some length }) |> reset_transitions let infer_binary_condition_on_branches ?(gain_relative_cost = 2.) t ~convergent_leaves = let category (ni : Newick.node_info) = Option.map ni.name ~f:(fun l -> if String.Set.mem convergent_leaves l then 1 else 0) in let cost x y = match (x, y) with | 0, 1 -> gain_relative_cost | 1, 0 -> 1. | 0, 0 | 1, 1 -> 0. | _ -> assert false in let convert_node = function | x, 0 -> x, `Ancestral | x, 1 -> x, `Convergent | _ -> assert false in Fitch.fitch ~cost ~n:2 ~category t |> Tree.map ~node:convert_node ~leaf:convert_node ~branch:Fn.id |> transfer_condition_to_branches |> reset_transitions let alignment_counts_map tree alignment f = let leaves = leaves tree |> List.map ~f:(fun (n, cond) -> match Alignment.find_sequence alignment n with | None -> failwithf "Could not find %s in alignment" n () | Some seq -> seq, cond ) in let seqs0, seqs1 = List.partition_map leaves ~f:Either.(function | (aa, `Ancestral) -> First aa | (aa, `Convergent) -> Second aa ) in let counts seqs i = Amino_acid.Table.init (fun aa -> let aa = Amino_acid.to_char aa in List.count seqs ~f:(fun s -> Char.equal s.[i] aa) ) in let site i = f (counts seqs0 i) (counts seqs1 i) in let n = Alignment.ncols alignment in List.init n ~f:site