Commit f30d0759 authored by Philippe Veber's avatar Philippe Veber

Dataset: clip tree with respect to each alignment

parent bf918464
...@@ -60,13 +60,59 @@ module New_API = struct ...@@ -60,13 +60,59 @@ module New_API = struct
convergent_species : string list workflow ; convergent_species : string list workflow ;
} }
let clip_tree_on_alignment (tree : nhx file) (ali : nucleotide_fasta file) =
let f = fun%workflow dest ->
let open Phylogenetics in
let tree = Newick.from_file [%path tree] in
let _, ali =
Biotk.Fasta.from_file [%path ali]
|> Result.ok_or_failwith
in
let ali_species = List.map ali ~f:(fun it -> it.description) in
let clipped_tree =
Newick.map_inner_tree tree ~f:(fun tree ->
match
Tree.leafset_generated_subtree tree
(fun bi -> bi.Newick.name) ali_species
with
| None -> failwith "Tree has no leaf in alignment"
| Some filtered_tree -> filtered_tree
)
in
Newick.to_file clipped_tree dest
in
Workflow.path_plugin ~descr:"orthomam.clip_tree_on_alignment" f
let annotate_convergent_species_in_tree (tree : newick file) species : nhx file =
let f = fun%workflow dest ->
let open Phylogenetics in
let species = [%eval species]
and omm_tree = [%path tree] in
let ensembl_tree = Newick.from_file omm_tree in
let tagged_tree =
Newick.map_inner_tree ensembl_tree ~f:(fun t ->
Codepitk.Convergence_tree.infer_binary_condition_on_branches
~convergent_leaves:(String.Set.of_list species)
t
)
in
Newick.to_file tagged_tree dest
in
Workflow.path_plugin ~version:3 ~descr:"dataset.annotate_convergent_species_in_tree" f
let make ~tree ~nucleotide_alignments ~convergent_species =
let tree = annotate_convergent_species_in_tree tree convergent_species in
{ tree ; nucleotide_alignments ; convergent_species }
module Query = struct module Query = struct
type dataset = t type dataset = t
type t = dataset * nucleotide_fasta file type t = dataset * nucleotide_fasta file
let nucleotide_alignment = snd let nucleotide_alignment = snd
let tree ~branch_length_unit:_ (d, _) = d.tree let tree ~branch_length_unit:_ (d, fa) =
clip_tree_on_alignment d.tree fa
end end
let queries d = let queries d =
......
...@@ -78,24 +78,6 @@ let clip_tree_on_alignment tree ali = ...@@ -78,24 +78,6 @@ let clip_tree_on_alignment tree ali =
let omm_tree_of_db db = let omm_tree_of_db db =
Workflow.input (Orthomam_db.tree db) Workflow.input (Orthomam_db.tree db)
let annotate_convergent_species_in_tree (tree : newick file) species : nhx file =
let f = fun%workflow dest ->
let open Phylogenetics in
let species = [%eval species]
and omm_tree = [%path tree] in
let ensembl_tree = Newick.from_file omm_tree in
let tagged_tree =
Newick.map_inner_tree ensembl_tree ~f:(fun t ->
Codepitk.Convergence_tree.infer_binary_condition_on_branches
~convergent_leaves:(String.Set.of_list species)
t
)
in
Newick.to_file tagged_tree dest
in
Workflow.path_plugin ~version:3 ~descr:"tree_of_convergent_species" f
let compare_tree_branch_lengths t1 t2 = let compare_tree_branch_lengths t1 t2 =
let f = fun%workflow dest -> let f = fun%workflow dest ->
let open Phylogenetics in let open Phylogenetics in
...@@ -393,7 +375,7 @@ module Q = struct ...@@ -393,7 +375,7 @@ module Q = struct
let tree ~branch_length_unit q = let tree ~branch_length_unit q =
clip_tree_on_alignment clip_tree_on_alignment
(annotate_convergent_species_in_tree (Dataset.New_API.annotate_convergent_species_in_tree
(omm_tree_with_branch_lengths ~branch_length_unit q.db) (omm_tree_with_branch_lengths ~branch_length_unit q.db)
q.convergent_species) q.convergent_species)
(alignment q) (alignment q)
...@@ -405,7 +387,7 @@ end ...@@ -405,7 +387,7 @@ end
include Q include Q
let tree_of_db db ~branch_length_unit ~convergent_species = let tree_of_db db ~branch_length_unit ~convergent_species =
annotate_convergent_species_in_tree Dataset.New_API.annotate_convergent_species_in_tree
(omm_tree_with_branch_lengths ~branch_length_unit db) (omm_tree_with_branch_lengths ~branch_length_unit db)
(Workflow.data convergent_species) (Workflow.data convergent_species)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment