Commit 7b241068 authored by Philippe Veber's avatar Philippe Veber
Browse files

Pipeline refactoring (added functor)

parent ff0000aa
......@@ -2,7 +2,7 @@ open Core
open Reviewphiltrans
let main ~n_h0 ~n_ha ~seed:i () =
let open Pipeline2 in
let open Simulation_dataset in
let sim =
Bppseqgen_mixed {
tree = `NHX "example/trees_analyses/cyp_coding.Chrysithr_root.nhx" ;
......@@ -13,7 +13,6 @@ let main ~n_h0 ~n_ha ~seed:i () =
seed = i ;
}
in
Pipeline2.seed := i ;
let w = benchmark sim in
(* print_endline (Debug.path (multinomial sim)) ; *)
print_endline (Debug.path w)
......
open Bistro
open File_formats
module type Dataset = sig
type t
val tree : t -> nhx pworkflow
val nucleotide_alignment : t -> nucleotide_fasta pworkflow
end
module Make(D : Dataset) = struct
open D
let amino_acid_alignment d =
Bppsuite.fna2faa (nucleotide_alignment d)
let phylip_nucleotide_alignment d =
Bppsuite.fna2phy ~fna:(nucleotide_alignment d)
let identical d =
let tree_sc = Tree_dataset.prepare_sc_tree (tree d) in
let tree_id = Tree_dataset.prepare_tree_with_node_id (tree d) in
Identical.identical ~tree_id ~tree_sc ~prot_model:"LG08" ~faa:(amino_acid_alignment d) ()
|> Identical.results
let topological d =
let faa = amino_acid_alignment d in
let tree_conv = Tree_dataset.prepare_topological_tree (tree d) in
let tree = Tree_dataset.prepare_tree_with_node_id (tree d) in
Topological.topological ~faa ~tree ~tree_conv ~prot_model:"LG08" ()
|> Topological.results
let multinomial d =
Multinomial.multinomial
~tree_sc:(tree d)
~faa:(amino_acid_alignment d)
let multinomial_asymptotic_lrt d =
Multinomial.multinomial_asymptotic_lrt
~tree_sc:(tree d)
~faa:(amino_acid_alignment d)
let multinomial_asymptotic_sparse d =
Multinomial.multinomial_asymptotic_sparse
~tree_sc:(tree d)
~faa:(amino_acid_alignment d)
let multinomial_simulation_lrt d =
Multinomial.multinomial_simulation_lrt
~tree_sc:(tree d)
~faa:(amino_acid_alignment d)
let multinomial_simulation_sparse d =
Multinomial.multinomial_simulation_sparse
~tree_sc:(tree d)
~faa:(amino_acid_alignment d)
let tdg09 d =
Tamuri.tdg09
~tree:(tree d)
~faa:(amino_acid_alignment d)
()
|> Tamuri.results
let diffseltree d =
Tree_dataset.prepare_diffsel_tree (tree d)
let diffsel d =
Diffsel.diffsel
~phy_n:(phylip_nucleotide_alignment d)
~tree:(diffseltree d)
~w_every:1
~n_cycles:50
()
|> Diffsel.selector
let diffseldsparse ?pi ?shiftprob ?eps d =
Diffseldsparse.diffseldsparse
?pi ?shiftprob ?eps
~alignment:(phylip_nucleotide_alignment d)
~tree:(diffseltree d)
~w_every:1
~n_cycles:50
()
|> Diffseldsparse.readdiffseldsparse
|> Diffseldsparse.results
let pcoc ?(gamma = true) ?(ncat = 60) d =
let faa = amino_acid_alignment d in
let tree = tree d in
Pcoc.pcoc ~catx_est:ncat ~plot_complete:false ~gamma ~faa ~tree ()
|> Pcoc.results
end
......@@ -2,15 +2,13 @@ open Base
open Printf
open Bistro
let seed = ref 42
let calc_fixed_seed ~(str:string) (seed:int) : int =
let str_hash = Hashtbl.hash str in
Hashtbl.hash (str_hash + seed)
type tree = [`NHX of string | `Pair_tree of float * float * int]
type dataset =
type t =
| Bppseqgen_simulation of {
hypothesis : Convergence_hypothesis.t ;
tree : tree ;
......@@ -97,28 +95,32 @@ let tree_dataset sim =
~descr:("simulated_data." ^ (tree_prefix sim))
(tree sim)
let profile ~nb_sites ~profiles =
let seed = function
| Bppseqgen_mixed s -> s.seed
| Bppseqgen_simulation s -> s.seed
| Convdet_simulation s -> s.seed
let profile ~nb_sites ~profiles ~seed =
Profile.profile_l_of_splitted_profile
~nb_cat:All
~nb_sites
profiles
~seed:(calc_fixed_seed ~str:profiles !seed)
~seed:(calc_fixed_seed ~str:profiles seed)
let bppseqgen sim ~hypothesis ~nb_sites ~profiles ~seed =
let bppseqgen sim ~hypothesis ~nb_sites ~profiles =
let model_prefix = Convergence_hypothesis.string_of_model hypothesis in
let descr = sprintf ".%s.%s" model_prefix (tree_prefix sim) in
let profile = profile ~nb_sites ~profiles in
let profile = profile ~nb_sites ~profiles ~seed:(seed sim) in
let profile_f = profile.profile_f in
let profile_c = profile.profile_c in
let seed = calc_fixed_seed ~str:descr seed in
Bppsuite.Bppseqgen.multi_profiles
~descr
~input_tree:(tree sim)
~hypothesis ~profile_f ~profile_c ~seed
~hypothesis ~profile_f ~profile_c ~seed:(seed sim)
let rec nucleotide_alignment = function
| Bppseqgen_simulation { hypothesis ; nb_sites ; profiles ; seed ; _ } as sim ->
bppseqgen sim ~hypothesis ~nb_sites ~profiles ~seed
| Bppseqgen_simulation { hypothesis ; nb_sites ; profiles ; _ } as sim ->
bppseqgen sim ~hypothesis ~nb_sites ~profiles
|> Bppsuite.Bppseqgen.alignment
| Bppseqgen_mixed { profiles ; seed ; n_h0 ; n_ha ; ne_s ; tree } ->
let h0 = nucleotide_alignment (Bppseqgen_simulation { hypothesis = H0 (Fixed ne_s) ; profiles ; seed ; nb_sites = n_h0 ; tree }) in
......@@ -130,84 +132,11 @@ let rec nucleotide_alignment = function
Simulator.simulator ~branch_factor ~n_ha ~n_h0 ~ne_s ~gBGC ~tree ~seed ~fitness_profiles ()
|> fst
let amino_acid_alignment d = Bppsuite.fna2faa (nucleotide_alignment d)
let phylip_nucleotide_alignment d =
Bppsuite.fna2phy ~fna:(nucleotide_alignment d)
let diffseltree d =
Tree_dataset.prepare_diffsel_tree (tree d)
let diffsel d =
Diffsel.diffsel
~phy_n:(phylip_nucleotide_alignment d)
~tree:(diffseltree d)
~w_every:1
~n_cycles:50
()
|> Diffsel.selector
let tdg09 d =
Tamuri.tdg09
~tree:(tree d)
~faa:(amino_acid_alignment d)
()
|> Tamuri.results
let identical d =
let tree_sc = Tree_dataset.prepare_sc_tree (tree d) in
let tree_id = Tree_dataset.prepare_tree_with_node_id (tree d) in
Identical.identical ~tree_id ~tree_sc ~prot_model:"LG08" ~faa:(amino_acid_alignment d) ()
|> Identical.results
let topological d =
let faa = amino_acid_alignment d in
let tree_conv = Tree_dataset.prepare_topological_tree (tree d) in
let tree = Tree_dataset.prepare_tree_with_node_id (tree d) in
Topological.topological ~faa ~tree ~tree_conv ~prot_model:"LG08" ()
|> Topological.results
let multinomial d =
Multinomial.multinomial
~tree_sc:(tree d)
~faa:(amino_acid_alignment d)
let multinomial_asymptotic_lrt d =
Multinomial.multinomial_asymptotic_lrt
~tree_sc:(tree d)
~faa:(amino_acid_alignment d)
let multinomial_asymptotic_sparse d =
Multinomial.multinomial_asymptotic_sparse
~tree_sc:(tree d)
~faa:(amino_acid_alignment d)
let multinomial_simulation_lrt d =
Multinomial.multinomial_simulation_lrt
~tree_sc:(tree d)
~faa:(amino_acid_alignment d)
let multinomial_simulation_sparse d =
Multinomial.multinomial_simulation_sparse
~tree_sc:(tree d)
~faa:(amino_acid_alignment d)
let diffseldsparse ?pi ?shiftprob ?eps d =
Diffseldsparse.diffseldsparse
?pi ?shiftprob ?eps
~alignment:(phylip_nucleotide_alignment d)
~tree:(diffseltree d)
~w_every:1
~n_cycles:50
()
|> Diffseldsparse.readdiffseldsparse
|> Diffseldsparse.results
let pcoc ?(gamma = true) ?(ncat = 60) d =
let faa = amino_acid_alignment d in
let tree = tree d in
Pcoc.pcoc ~catx_est:ncat ~plot_complete:false ~gamma ~faa ~tree ()
|> Pcoc.results
include Detection_pipeline.Make(struct
type nonrec t = t
let tree = tree
let nucleotide_alignment = nucleotide_alignment
end)
let alignment_plot d =
Convergence_detection.plot_convergent_sites
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment