orthomam.ml 2.39 KB
Newer Older
1 2 3 4 5 6 7 8
open Core_kernel
open Bistro
open File_formats
open Reviewphiltrans_toolbox

let ensembl_tree : nhx file =
  Bistro_unix.wget "ftp://ftp.ensembl.org/pub/release-98/compara/species_trees/vertebrates_species-tree_Ensembl.nh"

9 10
type alignment = string

11 12 13 14 15 16 17 18 19 20 21 22 23
let alignments_of_db db =
  Orthomam_db.list_alignments db

let%pworkflow fasta_of_phylip ali =
  let ali =
    [%path ali]
    |> Phylogenetics.Phylip.read_exn ~strict:false
  in
  List.map ali.items ~f:(fun it ->
      Biotk.Fasta.{ description = it.name ; sequence = it.sequence }
    )
  |> Biotk.Fasta.to_file [%dest]

24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
type query = {
  alignment : alignment ;
  convergent_species : string list workflow ;
}

let query alignment ~convergent_species =
  { alignment ; convergent_species }

let rec transfer_tag_to_branches t =
  let open Phylogenetics in
  let category : _ Tree.t -> int = function
    | Leaf (_, c) -> c
    | Node n -> snd n.data
  in
  match t with
  | Tree.Leaf (l, _) -> Tree.leaf l
  | Node n ->
    List1.map n.branches ~f:(fun (Branch b) ->
        let cat = category b.tip in
        let tags = ("cat", Int.to_string cat) :: b.data.Newick.tags in
        Tree.branch
          { b.data with Newick.tags }
          (transfer_tag_to_branches b.tip)
      )
    |> Tree.node (fst n.data)

let tag_tree_aux t tagged_leaves =
  let open Phylogenetics in
  let category (ni : Newick.node_info) =
    Option.map ni.name ~f:(fun l ->
        if String.Set.mem tagged_leaves l then 1 else 0
      )
  in
  Fitch.fitch ~n:2 ~category t
  |> transfer_tag_to_branches

let tag_tree tree tagged_leaves =
  let open Phylogenetics in
  let tagged_leaves = String.Set.of_list tagged_leaves in
  match (tree : Newick.t) with
  | Tree t -> Newick.Tree (tag_tree_aux t tagged_leaves)
  | Branch (Branch { tip ; data }) ->
    Newick.Branch (
      Tree.branch data (tag_tree_aux tip tagged_leaves)
    )

let tree_of_convergent_species species =
  Workflow.path_plugin ~descr:"tree_of_convergent_species" (
    let open Phylogenetics in
    let%pdeps species = species
    and ensembl_tree = path ensembl_tree in

    let ensembl_tree = Newick.from_file ensembl_tree in
    let tagged_tree = tag_tree ensembl_tree species in
    Newick.to_file tagged_tree [%dest]
  )

81
module Q = struct
82 83 84 85
  type t = query
  let tree q = tree_of_convergent_species q.convergent_species
  let nucleotide_alignment q =
    fasta_of_phylip (Workflow.input q.alignment)
86 87
end

88
include Detection_pipeline.Make(Q)