Commit f30d0759 authored by Philippe Veber's avatar Philippe Veber
Browse files

Dataset: clip tree with respect to each alignment

parent bf918464
......@@ -60,13 +60,59 @@ module New_API = struct
convergent_species : string list workflow ;
}
let clip_tree_on_alignment (tree : nhx file) (ali : nucleotide_fasta file) =
let f = fun%workflow dest ->
let open Phylogenetics in
let tree = Newick.from_file [%path tree] in
let _, ali =
Biotk.Fasta.from_file [%path ali]
|> Result.ok_or_failwith
in
let ali_species = List.map ali ~f:(fun it -> it.description) in
let clipped_tree =
Newick.map_inner_tree tree ~f:(fun tree ->
match
Tree.leafset_generated_subtree tree
(fun bi -> bi.Newick.name) ali_species
with
| None -> failwith "Tree has no leaf in alignment"
| Some filtered_tree -> filtered_tree
)
in
Newick.to_file clipped_tree dest
in
Workflow.path_plugin ~descr:"orthomam.clip_tree_on_alignment" f
let annotate_convergent_species_in_tree (tree : newick file) species : nhx file =
let f = fun%workflow dest ->
let open Phylogenetics in
let species = [%eval species]
and omm_tree = [%path tree] in
let ensembl_tree = Newick.from_file omm_tree in
let tagged_tree =
Newick.map_inner_tree ensembl_tree ~f:(fun t ->
Codepitk.Convergence_tree.infer_binary_condition_on_branches
~convergent_leaves:(String.Set.of_list species)
t
)
in
Newick.to_file tagged_tree dest
in
Workflow.path_plugin ~version:3 ~descr:"dataset.annotate_convergent_species_in_tree" f
let make ~tree ~nucleotide_alignments ~convergent_species =
let tree = annotate_convergent_species_in_tree tree convergent_species in
{ tree ; nucleotide_alignments ; convergent_species }
module Query = struct
type dataset = t
type t = dataset * nucleotide_fasta file
let nucleotide_alignment = snd
let tree ~branch_length_unit:_ (d, _) = d.tree
let tree ~branch_length_unit:_ (d, fa) =
clip_tree_on_alignment d.tree fa
end
let queries d =
......
......@@ -78,24 +78,6 @@ let clip_tree_on_alignment tree ali =
let omm_tree_of_db db =
Workflow.input (Orthomam_db.tree db)
let annotate_convergent_species_in_tree (tree : newick file) species : nhx file =
let f = fun%workflow dest ->
let open Phylogenetics in
let species = [%eval species]
and omm_tree = [%path tree] in
let ensembl_tree = Newick.from_file omm_tree in
let tagged_tree =
Newick.map_inner_tree ensembl_tree ~f:(fun t ->
Codepitk.Convergence_tree.infer_binary_condition_on_branches
~convergent_leaves:(String.Set.of_list species)
t
)
in
Newick.to_file tagged_tree dest
in
Workflow.path_plugin ~version:3 ~descr:"tree_of_convergent_species" f
let compare_tree_branch_lengths t1 t2 =
let f = fun%workflow dest ->
let open Phylogenetics in
......@@ -393,7 +375,7 @@ module Q = struct
let tree ~branch_length_unit q =
clip_tree_on_alignment
(annotate_convergent_species_in_tree
(Dataset.New_API.annotate_convergent_species_in_tree
(omm_tree_with_branch_lengths ~branch_length_unit q.db)
q.convergent_species)
(alignment q)
......@@ -405,7 +387,7 @@ end
include Q
let tree_of_db db ~branch_length_unit ~convergent_species =
annotate_convergent_species_in_tree
Dataset.New_API.annotate_convergent_species_in_tree
(omm_tree_with_branch_lengths ~branch_length_unit db)
(Workflow.data convergent_species)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment