open Core_kernel open Bistro open File_formats open Reviewphiltrans_toolbox let ensembl_tree : nhx file = Bistro_unix.wget "ftp://ftp.ensembl.org/pub/release-98/compara/species_trees/vertebrates_species-tree_Ensembl.nh" type alignment = string let alignments_of_db db = Orthomam_db.list_alignments db let%pworkflow fasta_of_phylip ali = let ali = [%path ali] |> Phylogenetics.Phylip.read_exn ~strict:false in List.map ali.items ~f:(fun it -> Biotk.Fasta.{ description = it.name ; sequence = it.sequence } ) |> Biotk.Fasta.to_file [%dest] type query = { alignment : alignment ; convergent_species : string list workflow ; } let query alignment ~convergent_species = { alignment ; convergent_species } let rec transfer_tag_to_branches t = let open Phylogenetics in let category : _ Tree.t -> int = function | Leaf (_, c) -> c | Node n -> snd n.data in match t with | Tree.Leaf (l, _) -> Tree.leaf l | Node n -> List1.map n.branches ~f:(fun (Branch b) -> let cat = category b.tip in let tags = ("cat", Int.to_string cat) :: b.data.Newick.tags in Tree.branch { b.data with Newick.tags } (transfer_tag_to_branches b.tip) ) |> Tree.node (fst n.data) let tag_tree_aux t tagged_leaves = let open Phylogenetics in let category (ni : Newick.node_info) = Option.map ni.name ~f:(fun l -> if String.Set.mem tagged_leaves l then 1 else 0 ) in Fitch.fitch ~n:2 ~category t |> transfer_tag_to_branches let tag_tree tree tagged_leaves = let open Phylogenetics in let tagged_leaves = String.Set.of_list tagged_leaves in match (tree : Newick.t) with | Tree t -> Newick.Tree (tag_tree_aux t tagged_leaves) | Branch (Branch { tip ; data }) -> Newick.Branch ( Tree.branch data (tag_tree_aux tip tagged_leaves) ) let tree_of_convergent_species species = Workflow.path_plugin ~descr:"tree_of_convergent_species" ( let open Phylogenetics in let%pdeps species = species and ensembl_tree = path ensembl_tree in let ensembl_tree = Newick.from_file ensembl_tree in let tagged_tree = tag_tree ensembl_tree species in Newick.to_file tagged_tree [%dest] ) module Q = struct type t = query let tree q = tree_of_convergent_species q.convergent_species let nucleotide_alignment q = fasta_of_phylip (Workflow.input q.alignment) end include Detection_pipeline.Make(Q)