Commit d0bc61a2 authored by Philippe Veber's avatar Philippe Veber
Browse files

included convdet simulator

parent b808b7d1
......@@ -2,9 +2,17 @@ open Reviewphiltrans
open Pipeline2
let h0 =
simulation
bppseqgen_simulation
~hyp:Convergence_hypothesis.(H0 (Fixed 5.))
~tree:"example/trees_test/tree_small_bl.nhx"
~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
~nb_sites:10
~seed:42
let sim =
Convdet_simulation {
tree = `NHX "example/trees_test/tree_small_bl.nhx" ;
profiles = "example/aa_fitness/263SelectedProfiles.tsv" ;
n_h0 = 10 ;
n_ha = 5 ;
}
......@@ -2,7 +2,7 @@
(library
((name reviewphiltrans)
(libraries (biocaml.ez bistro.bioinfo bistro.utils))
(libraries (biocaml.ez bistro.bioinfo bistro.utils convdet))
(preprocess (pps (ppx_jane ppx_csv_conv bistro.ppx)))
))
......
......@@ -2,25 +2,29 @@ open Base
open Printf
open Bistro
type dataset =
| Simulation of simulation
and simulation = {
hypothesis : Convergence_hypothesis.t ;
tree : [`NHX of string] ;
profiles : string ;
nb_sites : int ;
seed : int ;
}
let seed = ref 42
let calc_fixed_seed ~(str:string) (seed:int) : int =
let str_hash = Hashtbl.hash str in
Hashtbl.hash (str_hash + seed)
let simulation ~hyp ~tree ~profiles ~nb_sites ~seed =
Simulation {
type dataset =
| Bppseqgen_simulation of {
hypothesis : Convergence_hypothesis.t ;
tree : [`NHX of string] ;
profiles : string ;
nb_sites : int ;
seed : int ;
}
| Convdet_simulation of {
tree : [`NHX of string] ;
profiles : string ;
n_h0 : int ;
n_ha : int ;
}
let bppseqgen_simulation ~hyp ~tree ~profiles ~nb_sites ~seed =
Bppseqgen_simulation {
hypothesis = hyp ;
tree = `NHX tree ;
profiles ;
......@@ -29,40 +33,49 @@ let simulation ~hyp ~tree ~profiles ~nb_sites ~seed =
}
let tree_prefix = function
| Simulation { tree = `NHX path ; _ } ->
| Bppseqgen_simulation { tree = `NHX path ; _ }
| Convdet_simulation { tree = `NHX path ; _ } ->
Caml.Filename.chop_extension path
let tree_dataset = function
| Simulation { tree = `NHX tree ; _ } as sim ->
| Bppseqgen_simulation { tree = `NHX tree ; _ }
| Convdet_simulation { tree = `NHX tree ; _ } as sim ->
Tree_dataset.prepare
~descr:("simulated_data." ^ (tree_prefix sim))
(Workflow.input tree)
let tree = function
| Simulation { tree = `NHX path ; _ } -> Workflow.input path
| Bppseqgen_simulation { tree = `NHX path ; _ }
| Convdet_simulation { tree = `NHX path ; _ } ->
Workflow.input path
let profile sim =
let profile ~nb_sites ~profiles =
Profile.profile_l_of_splitted_profile
~nb_cat:All
~nb_sites:sim.nb_sites
sim.profiles
~seed:(calc_fixed_seed ~str:sim.profiles !seed)
~nb_sites
profiles
~seed:(calc_fixed_seed ~str:profiles !seed)
let bppseqgen (Simulation sim as d) =
let model_prefix = Convergence_hypothesis.string_of_model sim.hypothesis in
let descr = sprintf ".%s.%s" model_prefix (tree_prefix d) in
let profile = profile sim in
let bppseqgen sim ~hypothesis ~nb_sites ~profiles ~seed =
let model_prefix = Convergence_hypothesis.string_of_model hypothesis in
let descr = sprintf ".%s.%s" model_prefix (tree_prefix sim) in
let profile = profile ~nb_sites ~profiles in
let profile_f = profile.profile_f in
let profile_c = profile.profile_c in
let seed = calc_fixed_seed ~str:descr sim.seed in
let seed = calc_fixed_seed ~str:descr seed in
Bppsuite.Bppseqgen.multi_profiles
~descr
~tree_dataset:(tree_dataset d)
~hypothesis:sim.hypothesis ~profile_f ~profile_c ~seed
~tree_dataset:(tree_dataset sim)
~hypothesis ~profile_f ~profile_c ~seed
let nucleotide_alignment (Simulation _ as d) =
bppseqgen d
|> Bppsuite.Bppseqgen.alignment
let nucleotide_alignment = function
| Bppseqgen_simulation { hypothesis ; nb_sites ; profiles ; seed ; _ } as sim ->
bppseqgen sim ~hypothesis ~nb_sites ~profiles ~seed
|> Bppsuite.Bppseqgen.alignment
| Convdet_simulation { n_h0 ; n_ha ; profiles ; _ } as sim ->
let tree = tree sim in
let fitness_profiles = Workflow.input profiles in
Simulator.simulator ~n_ha ~n_h0 ~tree ~fitness_profiles
let phylip_nucleotide_alignment d =
Bppsuite.fna2phy ~fna:(nucleotide_alignment d)
......
open Core_kernel
let%pworkflow simulator ~n_h0 ~n_ha ~tree ~fitness_profiles =
let tree = Convdet.Simulator.tree_from_file [%path tree] in
let fitness_profiles = Convdet.Fitness_profiles.read [%path fitness_profiles] in
let random_nucletide_param () =
let p = Convdet.Simulator.Codon_model.random_param () in
{ p with omega = 1. } in
let random_profile () =
Random.int (Array.length fitness_profiles)
|> Array.get fitness_profiles
|> Convdet.Amino_acid.vector_of_array_exn
in
let random_h0_fitness () =
let p = random_profile () in
Fn.const p in
let rec random_ha_fitness () =
let p = random_profile () in
let q = random_profile () in
if p = q then random_ha_fitness ()
else
(function
| 0 -> p
| 1 -> q
| _ -> assert false)
in
let h0_params = List.init n_h0 ~f:(fun _ -> random_nucletide_param (), random_h0_fitness ()) in
let ha_params = List.init n_ha ~f:(fun _ -> random_nucletide_param (), random_ha_fitness ()) in
let params = ha_params @ h0_params in
let ali = Convdet.Simulator.Codon_model.alignment tree params in
let species_name =
Phylogenetics.Tree.leaves tree
|> List.map ~f:(fun { name } -> Option.value_exn name) in
Out_channel.with_file [%dest] ~f:(fun oc ->
List.iter2_exn species_name ali ~f:(fun description sequence ->
fprintf oc ">%s\n%s\n" description sequence
)
)
open Bistro
open File_formats
val simulator :
n_h0:int ->
n_ha:int ->
tree:nhx pworkflow ->
fitness_profiles:#text_file pworkflow ->
nucleotide_fasta pworkflow
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment