open Core_kernel open Bistro open File_formats open Reviewphiltrans_toolbox let ensembl_tree : nhx file = Bistro_unix.wget "ftp://ftp.ensembl.org/pub/release-99/compara/species_trees/vertebrates_species-tree_Ensembl.nh" type alignment = Orthomam_db.t * string let alignments_of_db db = List.map (Orthomam_db.list_alignments db) ~f:(fun ali -> db, ali) let%pworkflow fasta_of_phylip ali = let ali = [%path ali] |> Phylogenetics.Phylip.read_exn ~strict:false in List.map ali.items ~f:(fun it -> Biotk.Fasta.{ description = it.name ; sequence = it.sequence } ) |> Biotk.Fasta.to_file [%dest] type query = { db : Orthomam_db.t ; alignment : string ; convergent_species : string list workflow ; } let query (db, alignment) ~convergent_species = { db ; alignment ; convergent_species } let rec transfer_tag_to_branches t = let open Phylogenetics in let category : _ Tree.t -> int = function | Leaf (_, c) -> c | Node n -> snd n.data in match t with | Tree.Leaf (l, _) -> Tree.leaf l | Node n -> let cat_n = snd n.data in List1.map n.branches ~f:(fun (Branch b) -> let cat_child = category b.tip in let tags = List.concat [ [ "Condition", Int.to_string cat_child ] ; ( if cat_n <> cat_child then [ "Transition", Int.to_string cat_child ] else [] ) ; b.data.Newick.tags ] in Tree.branch { b.data with Newick.tags } (transfer_tag_to_branches b.tip) ) |> Tree.node (fst n.data) let tag_tree t tagged_leaves = let open Phylogenetics in let category (ni : Newick.node_info) = Option.map ni.name ~f:(fun l -> if String.Set.mem tagged_leaves l then 1 else 0 ) in Fitch.fitch ~n:2 ~category t |> transfer_tag_to_branches let with_inner_newick_tree tree ~f = let open Phylogenetics in match (tree : Newick.t) with | Tree t -> Newick.Tree (f t) | Branch (Branch { tip ; data }) -> Newick.Branch ( Tree.branch data (f tip) ) let%pworkflow clip_tree_on_alignment tree ali = let open Phylogenetics in let tree = Newick.from_file [%path tree] in let ali = Phylip.read_exn ~strict:false [%path ali] in let ali_species = List.map ali.items ~f:(fun it -> it.name) in let clipped_tree = with_inner_newick_tree tree ~f:(fun tree -> match Tree.leafset_generated_subtree tree (fun bi -> bi.Newick.name) ali_species with | None -> failwith "Tree has no leaf in alignment" | Some filtered_tree -> filtered_tree ) in Newick.to_file clipped_tree [%dest] let omm_tree q = Workflow.input (Orthomam_db.tree q.db) let tree_of_convergent_species tree species = Workflow.path_plugin ~version:3 ~descr:"tree_of_convergent_species" ( let open Phylogenetics in let%pdeps species = species and omm_tree = path tree in let ensembl_tree = Newick.from_file omm_tree in let tagged_tree = with_inner_newick_tree ensembl_tree ~f:(fun t -> tag_tree t (String.Set.of_list species) ) in Newick.to_file tagged_tree [%dest] ) module Q = struct type t = query let alignment q : phylip file = Workflow.input q.alignment let tree q = clip_tree_on_alignment (tree_of_convergent_species (omm_tree q) q.convergent_species) (alignment q) let nucleotide_alignment q = fasta_of_phylip (alignment q) end include Q include Detection_pipeline.Make(Q) let species_with_echolocation = [ "Rhinolophus ferrumequinum" ; "Tursiops_truncatus" ; "Myotis_lucifugus" ; "Physeter_catodon" ; ]