Commit 361d983a authored by Philippe Veber's avatar Philippe Veber
Browse files

tk/Mutsel_simulator_cpg: use upstreamed simulator code and light refactoring

parent e4ba1849
......@@ -187,48 +187,37 @@ end
module Make(BI: Simulator.Branch_info) = struct
include Simulator.Make(NSCodon)(Evolution_model)(BI)
let rng = Gsl.Rng.(make (default ()))
let symbol_sample v =
Gsl.Randist.(discrete rng (discrete_preproc v))
|> NSCodon.of_int_exn
let cpg_update ~n ~pos f =
f pos ;
if pos - 1 >= 0 then f (pos - 1) ;
if pos + 1 < n then f (pos + 1)
let sequence_gillespie_direct tree ~root ~param =
Tree.propagate tree ~init:root ~node:Fn.const ~leaf:Fn.const ~branch:(fun seq b ->
let state = Array.copy seq in
let n = Array.length state in
let rate i = Evolution_model.rate_vector (param state i b) state.(i) in
let rates = Array.init n ~f:rate in
let pos_rate i = Owl.Stats.sum (rates.(i) :> float array) in
let pos_rates = Discrete_pd.init n ~f:pos_rate in
let rec loop remaining_time =
let total_rate = Discrete_pd.total_weight pos_rates in
let tau = Owl.Stats.exponential_rvs ~lambda:total_rate in
if Float.(tau > remaining_time) then state
else
let pos = Discrete_pd.draw pos_rates rng in
let next_letter = symbol_sample (rates.(pos) :> float array) in
state.(pos) <- next_letter ;
cpg_update ~n ~pos (fun pos ->
rates.(pos) <- rate pos ;
Discrete_pd.update pos_rates pos (pos_rate pos)
) ;
loop Float.(remaining_time - tau)
let sequence_gillespie_direct rng tree ~root ~param =
let rate_vector b state i =
Evolution_model.rate_vector (param state i b) state.(i)
in
loop (BI.length b)
)
sequence_gillespie_direct rng tree ~update_iterator:cpg_update ~root ~rate_vector
end
let print_rate_matrix nuc_ratios =
let header =
List.map Nucleotide.all ~f:Nucleotide.to_char
|> List.intersperse ~sep:'\t'
|> String.of_char_list
in
let line i nuc =
Array.map nuc_ratios.(i) ~f:(sprintf "%.4f")
|> String.concat_array ~sep:"\t"
|> printf "%c\t%s\n" (Nucleotide.to_char nuc)
in
printf "\t%s\n" header ;
List.iteri Nucleotide.all ~f:line
let demo seq_length ~rate_CpG ~branch_length =
let rng = Gsl.Rng.(make (default ())) in
let module Branch_info = struct
type t = Newick.branch_info
let length branch = Option.value_exn Newick.(branch.length)
type t = float
let length = Fn.id
end
in
let module M = Make(Branch_info) in
......@@ -237,26 +226,8 @@ let demo seq_length ~rate_CpG ~branch_length =
NSCodon.of_int_exn codon_seed
) in
let param seq i b = Evolution_model.flat_param ~rate_CpG seq i b in
let tree =
sprintf "(leaf:%f);" branch_length
|> Newick.from_string_exn
|> Newick.with_inner_tree ~f:(fun tree ->
M.sequence_gillespie_direct tree ~root:root_sequence ~param
)
in
let simulated_sequence = Tree.leaves tree |> List.hd_exn in
let tree = Tree.(node () (List1.singleton (branch branch_length (leaf ())))) in
let sim = M.sequence_gillespie_direct rng tree ~root:root_sequence ~param in
let simulated_sequence = Tree.leaves sim |> List.hd_exn in
let counts = Evolution_model.dinucleotide_ratios simulated_sequence in
Array.init Nucleotide.card ~f:(fun i ->
let nuc = Nucleotide.of_int_exn i |> Nucleotide.to_char |> Char.to_string in
"\t" ^ nuc
)
|> String.concat_array ~sep:"\t"
|> print_endline ;
Array.iteri counts ~f:(fun i nuc_ratios ->
let nuc = Nucleotide.of_int_exn i |> Nucleotide.to_char |> Char.to_string in
print_string nuc;
Array.iteri nuc_ratios ~f:(fun _ ratio ->
print_string ("\t" ^ string_of_float ratio)
);
Out_channel.newline stdout;
)
print_rate_matrix counts
......@@ -40,6 +40,13 @@ end
module Make(BI: Simulator.Branch_info) : sig
include module type of Simulator.Make(NSCodon)(Evolution_model)(BI)
val sequence_gillespie_direct :
Gsl.Rng.t ->
('a, 'b, BI.t) Tree.t ->
root:NSCodon.t array ->
param:(NSCodon.t array -> int -> BI.t -> Evolution_model.param) ->
(NSCodon.t array, NSCodon.t array, BI.t) Tree.t
end
val demo : int -> rate_CpG:float -> branch_length:float -> unit
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment