Commit 4bf4ed46 authored by Philippe Veber's avatar Philippe Veber
Browse files

Detection_pipeline: tree asks for the unit of branch lengths

parent a01b693d
......@@ -3,7 +3,9 @@ open File_formats
module type Query = sig
type t
val tree : t -> nhx file
val tree :
branch_length_unit:[`Nucleotide | `Amino_acid | `Codon] ->
t -> nhx file
val nucleotide_alignment : t -> nucleotide_fasta file
end
......@@ -51,53 +53,53 @@ module Make(Q : Query) = struct
Tree_dataset.raxmlng_fna ~fna:(nucleotide_alignment d) ()
let identical d =
let tree_sc = Tree_dataset.prepare_sc_tree (tree d) in
let tree_id = Tree_dataset.prepare_tree_with_node_id (tree d) in
let tree_sc = Tree_dataset.prepare_sc_tree (tree ~branch_length_unit:`Amino_acid d) in
let tree_id = Tree_dataset.prepare_tree_with_node_id (tree ~branch_length_unit:`Amino_acid d) in
Identical.identical ~tree_id ~tree_sc ~prot_model:"LG08" ~faa:(amino_acid_alignment d) ()
|> Identical.results
let topological d =
let faa = amino_acid_alignment d in
let tree_conv = Tree_dataset.prepare_topological_tree (tree d) in
let tree = Tree_dataset.prepare_tree_with_node_id (tree d) in
let tree_conv = Tree_dataset.prepare_topological_tree (tree ~branch_length_unit:`Amino_acid d) in
let tree = Tree_dataset.prepare_tree_with_node_id (tree ~branch_length_unit:`Amino_acid d) in
Topological.topological ~faa ~tree ~tree_conv ~prot_model:"LG08" ()
|> Topological.results
let multinomial d =
Multinomial.multinomial
~tree_sc:(tree d)
~tree_sc:(tree ~branch_length_unit:`Amino_acid d)
~faa:(amino_acid_alignment d)
()
let multinomial_asymptotic_lrt d =
Multinomial.multinomial_asymptotic_lrt
~tree_sc:(tree d)
~tree_sc:(tree ~branch_length_unit:`Amino_acid d)
~faa:(amino_acid_alignment d)
let multinomial_asymptotic_sparse d =
Multinomial.multinomial_asymptotic_sparse
~tree_sc:(tree d)
~tree_sc:(tree ~branch_length_unit:`Amino_acid d)
~faa:(amino_acid_alignment d)
let multinomial_simulation_lrt d =
Multinomial.multinomial_simulation_lrt
~tree_sc:(tree d)
~tree_sc:(tree ~branch_length_unit:`Amino_acid d)
~faa:(amino_acid_alignment d)
let multinomial_simulation_sparse d =
Multinomial.multinomial_simulation_sparse
~tree_sc:(tree d)
~tree_sc:(tree ~branch_length_unit:`Amino_acid d)
~faa:(amino_acid_alignment d)
let tdg09 d =
Tamuri.tdg09
~tree:(tree d)
~tree:(tree ~branch_length_unit:`Amino_acid d)
~faa:(amino_acid_alignment d)
()
|> Tamuri.results
let diffseltree d =
Tree_dataset.prepare_diffsel_tree (tree d)
Tree_dataset.prepare_diffsel_tree (tree ~branch_length_unit:`Amino_acid d)
let diffsel d =
Diffsel.diffsel
......@@ -121,18 +123,18 @@ module Make(Q : Query) = struct
let pcoc ?(gamma = true) ?(ncat = 10) d =
let faa = amino_acid_alignment d in
let tree = tree d in
let tree = tree ~branch_length_unit:`Amino_acid d in
Pcoc.pcoc ~catx_est:ncat ~plot_complete:false ~gamma ~faa ~tree ()
|> Pcoc.results
let pcoc_v2 ?(gamma = true) ?(aa_profiles = `C10) d =
let faa = amino_acid_alignment d in
let tree = tree d in
let tree = tree ~branch_length_unit:`Amino_acid d in
Pcoc.pcoc_v2 ~aa_profiles ~gamma ~faa ~tree ()
|> Pcoc.results
let dn_ds_dnds_trees d =
Testnh.dn_ds_trees_real_data ~fna:(nucleotide_alignment d) ~tree:(tree d) ()
Testnh.dn_ds_trees_real_data ~fna:(nucleotide_alignment d) ~tree:(tree ~branch_length_unit:`Nucleotide d) ()
let dn_tree d = (dn_ds_dnds_trees d).dn_tsv
let ds_tree d = (dn_ds_dnds_trees d).ds_tsv
......
......@@ -3,7 +3,9 @@ open File_formats
module type Query = sig
type t
val tree : t -> nhx file
val tree :
branch_length_unit:[`Nucleotide | `Amino_acid | `Codon] ->
t -> nhx file
val nucleotide_alignment : t -> nucleotide_fasta file
end
......
......@@ -100,9 +100,7 @@ let%pworkflow clip_tree_on_alignment tree ali =
let omm_tree_of_db db =
Workflow.input (Orthomam_db.tree db)
let omm_tree q = omm_tree_of_db q.db
let tree_of_convergent_species tree species =
let annotate_convergent_species_in_tree tree species =
Workflow.path_plugin ~version:3 ~descr:"tree_of_convergent_species" (
let open Phylogenetics in
let%pdeps species = species
......@@ -117,33 +115,6 @@ let tree_of_convergent_species tree species =
Newick.to_file tagged_tree [%dest]
)
module Q = struct
type t = query
let alignment q : phylip file =
Workflow.input q.alignment
|> remove_unobserved_sequences_from_alignment
let tree q =
clip_tree_on_alignment
(tree_of_convergent_species (omm_tree q) q.convergent_species)
(alignment q)
let nucleotide_alignment q =
fasta_of_phylip (alignment q)
end
include Q
include Detection_pipeline.Make(Q)
let species_with_echolocation = [
"Rhinolophus ferrumequinum" ;
"Tursiops_truncatus" ;
"Myotis_lucifugus" ;
"Physeter_catodon" ;
]
let%pworkflow compare_tree_branch_lengths t1 t2 =
let open Phylogenetics in
let t1 = Newick.from_file [%path t1] in
......@@ -341,3 +312,38 @@ let concatenate_calibration ~seed db =
)
in
concatenate_calibration_figure ~nsites ~trees
let estimated_nucleotide_tree db =
concatenate ~seed:1234 ~site_sampling:10_000 ~max_missing_sequences:0 db
module Q = struct
type t = query
let alignment q : phylip file =
Workflow.input q.alignment
|> remove_unobserved_sequences_from_alignment
let tree ~branch_length_unit q =
let omm_tree_with_branch_lengths = match branch_length_unit with
| `Nucleotide -> estimated_nucleotide_tree q.db
| `Amino_acid -> assert false
| `Codon -> assert false
in
clip_tree_on_alignment
(annotate_convergent_species_in_tree omm_tree_with_branch_lengths q.convergent_species)
(alignment q)
let nucleotide_alignment q =
fasta_of_phylip (alignment q)
end
include Q
include Detection_pipeline.Make(Q)
let species_with_echolocation = [
"Rhinolophus ferrumequinum" ;
"Tursiops_truncatus" ;
"Myotis_lucifugus" ;
"Physeter_catodon" ;
]
......@@ -14,7 +14,11 @@ val query :
query
val alignment : query -> phylip file
val tree : query -> nhx file
val tree :
branch_length_unit:[`Nucleotide | `Amino_acid | `Codon] ->
query ->
nhx file
include Detection_pipeline.S with type query := query
......@@ -42,3 +46,7 @@ val concatenate_calibration :
seed:int ->
Orthomam_db.t ->
pdf file
val estimated_nucleotide_tree :
Orthomam_db.t ->
newick file
......@@ -47,7 +47,7 @@ struct
sprintf "%s/%s.%s" f.rd.alignment_dir_path f.name f.ext
|> Workflow.input
let tree f =
let tree ~branch_length_unit:_ f =
Raw_dataset.filter_input_tree
~descr:f.name
~tree:(Workflow.input f.rd.tree_path)
......@@ -137,7 +137,7 @@ let repo_parsed_rd pal rd =
[
[
Bistro_utils.Repo.item [(Family.name f) ^ ".faa"] (DP.amino_acid_alignment f);
Bistro_utils.Repo.item [(Family.name f) ^ ".species_tree.nhx"] (Family.tree f);
Bistro_utils.Repo.item [(Family.name f) ^ ".species_tree.nhx"] (Family.tree ~branch_length_unit:`Nucleotide f);
Bistro_utils.Repo.item [(Family.name f) ^ ".fna"] (Family.nucleotide_alignment f);
];
if (maybe_apply_parralel_analyses pal `GeneTree)
......
......@@ -39,7 +39,9 @@ val repo_parsed_rd :
module Family : sig
type t
val tree : t -> nhx file
val tree :
branch_length_unit:[`Nucleotide | `Amino_acid | `Codon] ->
t -> nhx file
end
val families : t -> Family.t list
......@@ -78,7 +78,7 @@ let tree_prefix = function
| Convdet_simulation { tree ; _ } ->
prefix_of_tree tree
let tree = function
let tree ~branch_length_unit:_ = function
| Bppseqgen_simulation { tree ; _ }
| Bppseqgen_mixed { tree ; _ }
| Convdet_simulation { tree ; _ } ->
......@@ -93,7 +93,7 @@ let tree = function
let tree_dataset sim =
Tree_dataset.prepare
~descr:("simulated_data." ^ (tree_prefix sim))
(tree sim)
(tree ~branch_length_unit:`Nucleotide sim)
let seed = function
| Bppseqgen_mixed s -> s.seed
......@@ -115,7 +115,7 @@ let bppseqgen sim ~hypothesis ~nb_sites ~profiles =
let profile_c = profile.profile_c in
Bppsuite.Bppseqgen.multi_profiles
~descr
~input_tree:(tree sim)
~input_tree:(tree ~branch_length_unit:`Nucleotide sim)
~hypothesis ~profile_f ~profile_c ~seed:(seed sim)
let rec nucleotide_alignment = function
......@@ -127,7 +127,7 @@ let rec nucleotide_alignment = function
let ha = nucleotide_alignment (Bppseqgen_simulation { hypothesis = HaPC (Fixed ne_s) ; profiles ; seed ; nb_sites = n_ha ; tree }) in
Utils.fasta_cappend h0 ha
| Convdet_simulation { n_h0 ; n_ha ; profiles ; ne_s ; gBGC ; branch_factor ; seed ; _ } as sim ->
let tree = tree sim in
let tree = tree ~branch_length_unit:`Nucleotide sim in
let fitness_profiles = Workflow.input profiles in
Simulator.simulator ~branch_factor ~n_ha ~n_h0 ~ne_s ~gBGC ~tree ~seed ~fitness_profiles ()
|> fst
......@@ -140,7 +140,7 @@ include Detection_pipeline.Make(struct
let alignment_plot d =
Convergence_detection.plot_convergent_sites
~tree:(tree d)
~tree:(tree ~branch_length_unit:`Amino_acid d)
~alignment:(amino_acid_alignment d)
~detection_results:(multinomial_asymptotic_lrt d)
()
......@@ -247,7 +247,7 @@ type record_t = {
}
let%workflow record_of_simu s =
let tree = [%path tree s] in
let tree = [%path tree ~branch_length_unit:`Nucleotide s] in
let nucleotide_alignment = [%path nucleotide_alignment s] in
let gc_mean_from_simu ~pos =
Alistats.nucleotide_fasta_gc_ac ~pos tree nucleotide_alignment
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment