Commit 6acf2dc3 authored by Louis Duchemin's avatar Louis Duchemin
Browse files

Implements Gemma as a detection pipeline

parent 272e5d76
......@@ -3,9 +3,12 @@ open File_formats
module type Query = sig
type t
val tree :
branch_length_unit:[`Nucleotide | `Amino_acid | `Codon] ->
t -> nhx file
branch_length_unit:[ `Nucleotide | `Amino_acid | `Codon ] ->
t ->
nhx file
val nucleotide_alignment : t -> nucleotide_fasta file
end
......@@ -13,40 +16,58 @@ module type S = sig
type query
val amino_acid_alignment : query -> aminoacid_fasta file
val gene_tree : query -> nw file
val dn_tree : query -> text file
val ds_tree : query -> text file
val dnds_tree : query -> text file
val identical : query -> text file
val topological : query -> text file
val multinomial : query -> text file
val multinomial_simulation_lrt : query -> text file
val multinomial_simulation_sparse : query -> text file
val multinomial_asymptotic_lrt : query -> text file
val multinomial_asymptotic_sparse : query -> text file
val tdg09 : query -> text file
val failsafe_tdg09 : query -> text file
val pcoc : ?gamma:bool -> ?ncat:int -> query -> text file
val pcoc_v2 :
?gamma:bool ->
?aa_profiles:Pcoc.aa_profiles ->
query -> text file
?gamma:bool -> ?aa_profiles:Pcoc.aa_profiles -> query -> text file
val gemma :
query ->
lmm_test:[ `All | `LRT | `Score | `Wald ] ->
relatedness_mode:[ `Centered | `Standardized ] ->
text file
val diffsel : query -> text file
val diffseldsparse :
?pi:float ->
?shiftprob:float * float ->
?eps:float ->
query ->
text file
val view_site :
query ->
convergent_species:string list ->
site_pos:int ->
pdf file
val view_site :
query -> convergent_species:string list -> site_pos:int -> pdf file
end
module Make(Q : Query) = struct
module Make (Q : Query) = struct
open Q
let amino_acid_alignment d =
......@@ -58,31 +79,48 @@ module Make(Q : Query) = struct
let gene_tree d =
Tree_dataset.raxmlng_fna ~fna:(nucleotide_alignment d) ()
let%pworkflow tree_with_no_single_child ~branch_length_unit d : newick file =
let%pworkflow tree_with_no_single_child ~branch_length_unit d :
newick file =
let tree_file = [%path tree ~branch_length_unit d] in
let open Phylogenetics in
let tree = Newick.from_file tree_file in
let tree = Newick.map_inner_tree tree ~f:Reviewphiltrans_toolbox.Convergence_tree.remove_nodes_with_single_child in
let tree =
Newick.map_inner_tree tree
~f:
Reviewphiltrans_toolbox.Convergence_tree
.remove_nodes_with_single_child
in
Newick.to_file tree [%dest]
let identical d =
let tree_sc = Tree_dataset.prepare_sc_tree (tree ~branch_length_unit:`Amino_acid d) in
let tree_id = Tree_dataset.prepare_tree_with_node_id (tree ~branch_length_unit:`Amino_acid d) in
Identical.identical ~tree_id ~tree_sc ~prot_model:"LG08" ~faa:(amino_acid_alignment d) ()
let tree_sc =
Tree_dataset.prepare_sc_tree (tree ~branch_length_unit:`Amino_acid d)
in
let tree_id =
Tree_dataset.prepare_tree_with_node_id
(tree ~branch_length_unit:`Amino_acid d)
in
Identical.identical ~tree_id ~tree_sc ~prot_model:"LG08"
~faa:(amino_acid_alignment d) ()
|> Identical.results
let topological d =
let faa = amino_acid_alignment d in
let tree_conv = Tree_dataset.prepare_topological_tree (tree ~branch_length_unit:`Amino_acid d) in
let tree = Tree_dataset.prepare_tree_with_node_id (tree ~branch_length_unit:`Amino_acid d) in
let tree_conv =
Tree_dataset.prepare_topological_tree
(tree ~branch_length_unit:`Amino_acid d)
in
let tree =
Tree_dataset.prepare_tree_with_node_id
(tree ~branch_length_unit:`Amino_acid d)
in
Topological.topological ~faa ~tree ~tree_conv ~prot_model:"LG08" ()
|> Topological.results
let multinomial d =
Multinomial.multinomial
~tree_sc:(tree ~branch_length_unit:`Amino_acid d)
~faa:(amino_acid_alignment d)
()
~faa:(amino_acid_alignment d) ()
let multinomial_asymptotic_lrt d =
Multinomial.multinomial_asymptotic_lrt
......@@ -107,44 +145,49 @@ module Make(Q : Query) = struct
let tdg09 d =
Tamuri.tdg09
~tree:(tree_with_no_single_child ~branch_length_unit:`Amino_acid d)
~faa:(amino_acid_alignment d)
()
~faa:(amino_acid_alignment d) ()
|> Tamuri.results
let%pworkflow mock_tdg09 d =
match Biotk.Fasta.from_file [%path amino_acid_alignment d] with
| Ok (_, item :: _) ->
let open Core_kernel in
let n = String.length item.sequence in
"Sites\tTdg09_1MinusFDR\tTdg09_1MinusLRT\tTdg09_prob_post"
:: List.init n ~f:(fun i -> sprintf "%d\t0.0\t0.0\tNA" (i + 1))
|> Out_channel.write_lines [%dest]
let open Core_kernel in
let n = String.length item.sequence in
"Sites\tTdg09_1MinusFDR\tTdg09_1MinusLRT\tTdg09_prob_post"
:: List.init n ~f:(fun i -> sprintf "%d\t0.0\t0.0\tNA" (i + 1))
|> Out_channel.write_lines [%dest]
| _ -> failwith "couldn't read an item in fasta"
let failsafe_tdg09 d = Workflow.trywith (tdg09 d) (mock_tdg09 d)
let gemma q ~lmm_test ~relatedness_mode =
let alignment = amino_acid_alignment q in
let tree = tree ~branch_length_unit:`Amino_acid q in
let genotype = Gemma.genotype_of_fasta alignment in
let phenotype = Gemma.phenotype_of_tree tree in
let relatedness_matrix =
Gemma.calculate_relatedness_matrix ~mode:relatedness_mode ~genotype
~phenotype
in
Gemma.univariate_lmm ~lmm:lmm_test ~genotype ~phenotype
~relatedness_matrix
|> Gemma.result_table_of_output alignment
let diffseltree d =
Tree_dataset.prepare_diffsel_tree (tree ~branch_length_unit:`Amino_acid d)
Tree_dataset.prepare_diffsel_tree
(tree ~branch_length_unit:`Amino_acid d)
let diffsel d =
Diffsel.diffsel
~phy_n:(phylip_nucleotide_alignment d)
~tree:(diffseltree d)
~w_every:1
~n_cycles:50
()
~tree:(diffseltree d) ~w_every:1 ~n_cycles:50 ()
|> Diffsel.selector
let diffseldsparse ?pi ?shiftprob ?eps d =
Diffseldsparse.diffseldsparse
?pi ?shiftprob ?eps
Diffseldsparse.diffseldsparse ?pi ?shiftprob ?eps
~alignment:(phylip_nucleotide_alignment d)
~tree:(diffseltree d)
~w_every:1
~n_cycles:50
()
|> Diffseldsparse.readdiffseldsparse
|> Diffseldsparse.results
~tree:(diffseltree d) ~w_every:1 ~n_cycles:50 ()
|> Diffseldsparse.readdiffseldsparse |> Diffseldsparse.results
let pcoc ?(gamma = true) ?(ncat = 10) d =
let faa = amino_acid_alignment d in
......@@ -155,14 +198,17 @@ module Make(Q : Query) = struct
let pcoc_v2 ?(gamma = true) ?(aa_profiles = `C10) d =
let faa = amino_acid_alignment d in
let tree = tree ~branch_length_unit:`Amino_acid d in
Pcoc.pcoc_v2 ~aa_profiles ~gamma ~faa ~tree ()
|> Pcoc.results
Pcoc.pcoc_v2 ~aa_profiles ~gamma ~faa ~tree () |> Pcoc.results
let dn_ds_dnds_trees d =
Testnh.dn_ds_trees_real_data ~fna:(nucleotide_alignment d) ~tree:(tree ~branch_length_unit:`Nucleotide d) ()
Testnh.dn_ds_trees_real_data ~fna:(nucleotide_alignment d)
~tree:(tree ~branch_length_unit:`Nucleotide d)
()
let dn_tree d = (dn_ds_dnds_trees d).dn_tsv
let ds_tree d = (dn_ds_dnds_trees d).ds_tsv
let dn_tree d = (dn_ds_dnds_trees d).dn_tsv
let ds_tree d = (dn_ds_dnds_trees d).ds_tsv
let dnds_tree d = (dn_ds_dnds_trees d).dnds_tsv
let%pworkflow view_site query ~convergent_species ~site_pos =
......@@ -172,17 +218,14 @@ module Make(Q : Query) = struct
let convergent_species = [%param convergent_species] in
let site_pos = [%param site_pos] in
let condition s =
if List.mem s convergent_species then `Convergent
else `Ancestral
if List.mem s convergent_species then `Convergent else `Ancestral
in
let tree = Phylogenetics.Newick.from_file tree_path in
let alignment =
Biotk.Fasta.from_file alignment_path
|> Base.Result.ok_or_failwith
|> snd
let alignment =
Biotk.Fasta.from_file alignment_path
|> Base.Result.ok_or_failwith |> snd
in
CS.make ~condition tree alignment site_pos
|> CS.draw
|> Biotk_croquis.Croquis.Layout.simple
|> CS.draw |> Biotk_croquis.Croquis.Layout.simple
|> Base.Fn.flip Biotk_croquis.Croquis.Layout.render_pdf [%dest]
end
......@@ -3,9 +3,12 @@ open File_formats
module type Query = sig
type t
val tree :
branch_length_unit:[`Nucleotide | `Amino_acid | `Codon] ->
t -> nhx file
branch_length_unit:[ `Nucleotide | `Amino_acid | `Codon ] ->
t ->
nhx file
val nucleotide_alignment : t -> nucleotide_fasta file
end
......@@ -13,37 +16,55 @@ module type S = sig
type query
val amino_acid_alignment : query -> aminoacid_fasta file
val gene_tree : query -> nw file
val dn_tree : query -> text file
val ds_tree : query -> text file
val dnds_tree : query -> text file
val identical : query -> text file
val topological : query -> text file
val multinomial : query -> text file
val multinomial_simulation_lrt : query -> text file
val multinomial_simulation_sparse : query -> text file
val multinomial_asymptotic_lrt : query -> text file
val multinomial_asymptotic_sparse : query -> text file
val tdg09 : query -> text file
val failsafe_tdg09 : query -> text file
val pcoc : ?gamma:bool -> ?ncat:int -> query -> text file
val pcoc_v2 :
?gamma:bool ->
?aa_profiles:Pcoc.aa_profiles ->
query -> text file
?gamma:bool -> ?aa_profiles:Pcoc.aa_profiles -> query -> text file
val gemma :
query ->
lmm_test:[ `All | `LRT | `Score | `Wald ] ->
relatedness_mode:[ `Centered | `Standardized ] ->
text file
val diffsel : query -> text file
val diffseldsparse :
?pi:float ->
?shiftprob:float * float ->
?eps:float ->
query ->
text file
val view_site :
query ->
convergent_species:string list ->
site_pos:int ->
pdf file
val view_site :
query -> convergent_species:string list -> site_pos:int -> pdf file
end
module Make(Q : Query) : S with type query := Q.t
module Make (Q : Query) : S with type query := Q.t
......@@ -80,7 +80,7 @@ let calculate_relatedness_matrix ~mode ~genotype ~phenotype =
cmd "cp" [ tmp // relatedness_filename mode; dest ];
]
let%pworkflow[@version 3] result_table_of_output alignment gemma_output =
let%pworkflow[@version 4] result_table_of_output alignment gemma_output =
let module R = Reviewphiltrans_toolbox.Gemma.Result_file in
match R.of_file [%path gemma_output] with
| Error msg -> failwith msg
......
......@@ -3,9 +3,9 @@ open Bistro
type tree =
| NHX of string
| Pair_tree of {
npairs : int ;
branch_length1 : float ;
branch_length2 : float ;
npairs : int;
branch_length1 : float;
branch_length2 : float;
}
type t
......@@ -13,40 +13,63 @@ type t
val bppseqgen_mixed_simulation :
?ne_s:float ->
?seed:int ->
tree:tree -> profiles:string -> n_h0:int -> n_ha:int -> unit -> t
tree:tree ->
profiles:string ->
n_h0:int ->
n_ha:int ->
unit ->
t
val bppseqgen_simulation :
hyp:Convergence_hypothesis.t ->
tree:tree -> profiles:string -> nb_sites:int -> seed:int -> t
tree:tree ->
profiles:string ->
nb_sites:int ->
seed:int ->
t
val convdet_simulation :
?branch_factor:float ->
?ne_s:float * float ->
?gBGC:float * float ->
?seed:int ->
tree:tree -> profiles:string -> n_h0:int -> n_ha:int -> unit -> t
?branch_factor:float ->
?ne_s:float * float ->
?gBGC:float * float ->
?seed:int ->
tree:tree ->
profiles:string ->
n_h0:int ->
n_ha:int ->
unit ->
t
include Detection_pipeline.Query with type t := t
include Detection_pipeline.S with type query := t
val oracle : t -> text file
val alignment_plot : t -> svg file
val multinomial_benchmark : t -> pdf file
val benchmark :
?mode:[`fast | `full] ->
t ->
svg file
val benchmark : ?mode:[ `fast | `full ] -> t -> svg file
val benchmark2 : t -> pdf file
(** stuff for gbgc SBME paper *)
type gBGC_t = Global of float | Convergent of float * float
type gBGC_t =
| Global of float
| Convergent of float * float
type param_t
val explore_params : f:(param_t -> 'a) -> ((float * gBGC_t) * 'a) list
val simu_of_param : ?n_h0:int -> param_t -> t
val filter_results : f:('a -> bool) -> (param_t * 'a) list -> (param_t * 'a) list
val filter_results :
f:('a -> bool) -> (param_t * 'a) list -> (param_t * 'a) list
type record_t
val record_of_simu : t -> record_t workflow
val realistic_result : record_t -> bool
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment