Commit e7746c43 authored by Philippe Veber's avatar Philippe Veber
Browse files

Merge branch 'optimize-mutsel-cpg-sim'

parents 1e38b720 361d983a
open Core_kernel
type t = {
n : int ;
shift : int ;
weights : float array ;
}
let is_leaf dpd i = i >= dpd.shift
let init n ~f =
let shift = Float.(to_int (2. ** round_up (log (float n) /. log 2.))) - 1 in
let m = shift + n in
let weights = Array.create ~len:m 0. in
for i = 0 to n - 1 do
weights.(shift + i) <- f (i)
done ;
for i = shift - 1 downto 0 do
if 2 * i + 1 < m then weights.(i) <- weights.(2 * i + 1) ;
if 2 * i + 2 < m then weights.(i) <- weights.(i) +. weights.(2 * i + 2)
done ;
{ n ; shift ; weights }
let draw dpd rng =
let x = dpd.weights.(0) *. Gsl.Rng.uniform rng in
let rec loop acc i =
if is_leaf dpd i then i
else if Float.( >= ) (acc +. dpd.weights.(2 * i + 1)) x then
loop acc (2 * i + 1)
else loop (acc +. dpd.weights.(2 * i + 1)) (2 * i + 2)
in
loop 0. 0 - dpd.shift
let update dpd i w_i =
let m = Array.length dpd.weights in
let j = i + dpd.shift in
dpd.weights.(j) <- w_i ;
let rec loop k =
dpd.weights.(k) <- dpd.weights.(2 * k + 1) ;
if 2 * k + 2 < m then dpd.weights.(k) <- dpd.weights.(k) +. dpd.weights.(2 * k + 2) ;
if k > 0 then loop ((k - 1) / 2)
in
loop ((j - 1) / 2)
let total_weight dpd = dpd.weights.(0)
let demo ~n ~ncat =
let rng = Gsl.Rng.(make (default ())) in
let probs = Array.init ncat ~f:(fun _ -> Gsl.Rng.uniform rng) in
let sum = Array.fold probs ~init:0. ~f:( +. ) in
let pd = init ncat ~f:(fun _ -> 0.) in
let counts = Array.create ~len:ncat 0 in
Array.iteri probs ~f:(update pd) ;
for _ = 1 to n do
let k = draw pd rng in
counts.(k) <- counts.(k) + 1
done ;
Array.map probs ~f:(fun x -> x /. sum),
Array.map counts ~f:(fun k -> float k /. float n)
(* Updatable discrete probability distribution *)
type t
val init : int -> f:(int -> float) -> t
val update : t -> int -> float -> unit
val draw : t -> Gsl.Rng.t -> int
val total_weight : t -> float
val demo : n:int -> ncat:int -> float array * float array
......@@ -114,6 +114,33 @@ module Evolution_model = struct
| None -> 0.
)
let rate_vector { nucleotide_rates ; omega ; scaled_fitness = _F_ ; gBGC ; rate_CpG ; context_CpG; _ } p =
let nuc_rates = (nucleotide_rates :> Nucleotide.matrix) in
NSCodon.Table.init (fun q ->
if NSCodon.equal p q then 0.
else
match NSCodon.neighbours p q with
| Some (pos, x_a, x_b) ->
let rate_CpG = if rate_increase_CpG (pos, x_a, x_b) context_CpG (inspect_codon p) then rate_CpG else 1. in
let _B_ = match Nucleotide.(inspect x_a, inspect x_b) with
| (A | T), (C | G) -> gBGC
| (C | G), (A | T) -> -. gBGC
| _ -> 0.
in
let selection_coefficient =
_B_ +.
if NSCodon.synonym p q then 0.
else
let aa_p = NSCodon.aa_of_codon p in
let aa_q = NSCodon.aa_of_codon q in
_F_.Amino_acid.%(aa_q) -. _F_.Amino_acid.%(aa_p)
in
let p_fix = fixation_probability selection_coefficient in
let q_ab = nuc_rates.Nucleotide.%{x_a, x_b} in
q_ab *. omega *. p_fix *. rate_CpG
| None -> 0.
)
let stationary_distribution p = NSCodon_rate_matrix.stationary_distribution (rate_matrix p)
let nucleotides_of_codons codons =
......@@ -157,42 +184,50 @@ module Evolution_model = struct
)
end
module Make(BI: Simulator.Branch_info) = Simulator.Make(NSCodon)(Evolution_model)(BI)
module Make(BI: Simulator.Branch_info) = struct
include Simulator.Make(NSCodon)(Evolution_model)(BI)
let cpg_update ~n ~pos f =
f pos ;
if pos - 1 >= 0 then f (pos - 1) ;
if pos + 1 < n then f (pos + 1)
let sequence_gillespie_direct rng tree ~root ~param =
let rate_vector b state i =
Evolution_model.rate_vector (param state i b) state.(i)
in
sequence_gillespie_direct rng tree ~update_iterator:cpg_update ~root ~rate_vector
end
let print_rate_matrix nuc_ratios =
let header =
List.map Nucleotide.all ~f:Nucleotide.to_char
|> List.intersperse ~sep:'\t'
|> String.of_char_list
in
let line i nuc =
Array.map nuc_ratios.(i) ~f:(sprintf "%.4f")
|> String.concat_array ~sep:"\t"
|> printf "%c\t%s\n" (Nucleotide.to_char nuc)
in
printf "\t%s\n" header ;
List.iteri Nucleotide.all ~f:line
let demo seq_length ~rate_CpG ~branch_length =
let rng = Gsl.Rng.(make (default ())) in
let module Branch_info = struct
type t = Newick.branch_info
let length branch = Option.value_exn Newick.(branch.length)
type t = float
let length = Fn.id
end
in
let module M = Make(Branch_info) in
let root_sequence = Array.init seq_length ~f:(fun _ ->
let codon_seed = Owl.Stats.uniform_int_rvs ~a:0 ~b:(NSCodon.card - 1) in
match NSCodon.of_int codon_seed with
| Some codon -> codon
| None -> sprintf "Cannot create codon from integer : %d" codon_seed |> failwith
NSCodon.of_int_exn codon_seed
) in
let param = Evolution_model.random_param ~alpha_nuc:0.5 ~alpha_fitness:0.5 ~rate_CpG in
let tree =
sprintf "(leaf:%f);" branch_length
|> Newick.from_string_exn
|> Newick.with_inner_tree ~f:(fun tree ->
M.sequence_gillespie_direct tree ~root:root_sequence ~param
)
in
let simulated_sequence = Tree.leaves tree |> List.hd_exn in
let param seq i b = Evolution_model.flat_param ~rate_CpG seq i b in
let tree = Tree.(node () (List1.singleton (branch branch_length (leaf ())))) in
let sim = M.sequence_gillespie_direct rng tree ~root:root_sequence ~param in
let simulated_sequence = Tree.leaves sim |> List.hd_exn in
let counts = Evolution_model.dinucleotide_ratios simulated_sequence in
Array.init Nucleotide.card ~f:(fun i ->
let nuc = Nucleotide.of_int_exn i |> Nucleotide.to_char |> Char.to_string in
"\t" ^ nuc
)
|> String.concat_array ~sep:"\t"
|> print_endline ;
Array.iteri counts ~f:(fun i nuc_ratios ->
let nuc = Nucleotide.of_int_exn i |> Nucleotide.to_char |> Char.to_string in
print_string nuc;
Array.iteri nuc_ratios ~f:(fun _ ratio ->
print_string ("\t" ^ string_of_float ratio)
);
Out_channel.newline stdout;
)
print_rate_matrix counts
......@@ -40,6 +40,13 @@ end
module Make(BI: Simulator.Branch_info) : sig
include module type of Simulator.Make(NSCodon)(Evolution_model)(BI)
val sequence_gillespie_direct :
Gsl.Rng.t ->
('a, 'b, BI.t) Tree.t ->
root:NSCodon.t array ->
param:(NSCodon.t array -> int -> BI.t -> Evolution_model.param) ->
(NSCodon.t array, NSCodon.t array, BI.t) Tree.t
end
val demo : int -> rate_CpG:float -> branch_length:float -> unit
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment