Commit e7746c43 authored by Philippe Veber's avatar Philippe Veber
Browse files

Merge branch 'optimize-mutsel-cpg-sim'

parents 1e38b720 361d983a
open Core_kernel
type t = {
n : int ;
shift : int ;
weights : float array ;
}
let is_leaf dpd i = i >= dpd.shift
let init n ~f =
let shift = Float.(to_int (2. ** round_up (log (float n) /. log 2.))) - 1 in
let m = shift + n in
let weights = Array.create ~len:m 0. in
for i = 0 to n - 1 do
weights.(shift + i) <- f (i)
done ;
for i = shift - 1 downto 0 do
if 2 * i + 1 < m then weights.(i) <- weights.(2 * i + 1) ;
if 2 * i + 2 < m then weights.(i) <- weights.(i) +. weights.(2 * i + 2)
done ;
{ n ; shift ; weights }
let draw dpd rng =
let x = dpd.weights.(0) *. Gsl.Rng.uniform rng in
let rec loop acc i =
if is_leaf dpd i then i
else if Float.( >= ) (acc +. dpd.weights.(2 * i + 1)) x then
loop acc (2 * i + 1)
else loop (acc +. dpd.weights.(2 * i + 1)) (2 * i + 2)
in
loop 0. 0 - dpd.shift
let update dpd i w_i =
let m = Array.length dpd.weights in
let j = i + dpd.shift in
dpd.weights.(j) <- w_i ;
let rec loop k =
dpd.weights.(k) <- dpd.weights.(2 * k + 1) ;
if 2 * k + 2 < m then dpd.weights.(k) <- dpd.weights.(k) +. dpd.weights.(2 * k + 2) ;
if k > 0 then loop ((k - 1) / 2)
in
loop ((j - 1) / 2)
let total_weight dpd = dpd.weights.(0)
let demo ~n ~ncat =
let rng = Gsl.Rng.(make (default ())) in
let probs = Array.init ncat ~f:(fun _ -> Gsl.Rng.uniform rng) in
let sum = Array.fold probs ~init:0. ~f:( +. ) in
let pd = init ncat ~f:(fun _ -> 0.) in
let counts = Array.create ~len:ncat 0 in
Array.iteri probs ~f:(update pd) ;
for _ = 1 to n do
let k = draw pd rng in
counts.(k) <- counts.(k) + 1
done ;
Array.map probs ~f:(fun x -> x /. sum),
Array.map counts ~f:(fun k -> float k /. float n)
(* Updatable discrete probability distribution *)
type t
val init : int -> f:(int -> float) -> t
val update : t -> int -> float -> unit
val draw : t -> Gsl.Rng.t -> int
val total_weight : t -> float
val demo : n:int -> ncat:int -> float array * float array
......@@ -58,16 +58,16 @@ module Evolution_model = struct
context_CpG;
}
let flat_param ~rate_CpG =
let flat_param ~rate_CpG =
let pi = Nucleotide.flat_profile () in
let rho = Linear_algebra.Lacaml.Vector.init 6 ~f:(fun _ -> 1. /. 6.) in
let nucleotide_rates = Rate_matrix.Nucleotide.gtr ~equilibrium_frequencies:pi ~transition_rates:rho in
make_param ~gBGC:0. ~scaled_fitness:(flat_fitness ()) ~omega:1. ~nucleotide_rates ~nucleotide_stat_dist:pi ~rate_CpG
let nucleotide_rates = Rate_matrix.Nucleotide.gtr ~equilibrium_frequencies:pi ~transition_rates:rho in
make_param ~gBGC:0. ~scaled_fitness:(flat_fitness ()) ~omega:1. ~nucleotide_rates ~nucleotide_stat_dist:pi ~rate_CpG
let random_param ~alpha_nuc ~alpha_fitness ~rate_CpG =
let pi = Nucleotide.random_profile alpha_nuc in
let rho = Utils.random_profile 6 in
let nucleotide_rates = Rate_matrix.Nucleotide.gtr ~equilibrium_frequencies:pi ~transition_rates:rho in
let random_param ~alpha_nuc ~alpha_fitness ~rate_CpG =
let pi = Nucleotide.random_profile alpha_nuc in
let rho = Utils.random_profile 6 in
let nucleotide_rates = Rate_matrix.Nucleotide.gtr ~equilibrium_frequencies:pi ~transition_rates:rho in
let scaled_fitness = Amino_acid.random_profile alpha_fitness |> fitness_of_profile in
make_param ~gBGC:0. ~scaled_fitness ~omega:1. ~nucleotide_rates ~nucleotide_stat_dist:pi ~rate_CpG
......@@ -114,11 +114,38 @@ module Evolution_model = struct
| None -> 0.
)
let rate_vector { nucleotide_rates ; omega ; scaled_fitness = _F_ ; gBGC ; rate_CpG ; context_CpG; _ } p =
let nuc_rates = (nucleotide_rates :> Nucleotide.matrix) in
NSCodon.Table.init (fun q ->
if NSCodon.equal p q then 0.
else
match NSCodon.neighbours p q with
| Some (pos, x_a, x_b) ->
let rate_CpG = if rate_increase_CpG (pos, x_a, x_b) context_CpG (inspect_codon p) then rate_CpG else 1. in
let _B_ = match Nucleotide.(inspect x_a, inspect x_b) with
| (A | T), (C | G) -> gBGC
| (C | G), (A | T) -> -. gBGC
| _ -> 0.
in
let selection_coefficient =
_B_ +.
if NSCodon.synonym p q then 0.
else
let aa_p = NSCodon.aa_of_codon p in
let aa_q = NSCodon.aa_of_codon q in
_F_.Amino_acid.%(aa_q) -. _F_.Amino_acid.%(aa_p)
in
let p_fix = fixation_probability selection_coefficient in
let q_ab = nuc_rates.Nucleotide.%{x_a, x_b} in
q_ab *. omega *. p_fix *. rate_CpG
| None -> 0.
)
let stationary_distribution p = NSCodon_rate_matrix.stationary_distribution (rate_matrix p)
let nucleotides_of_codons codons =
Array.concat_map codons ~f:(fun codon ->
let x, y, z = NSCodon.nucleotides codon in
let x, y, z = NSCodon.nucleotides codon in
[|x;y;z|]
)
......@@ -127,15 +154,15 @@ module Evolution_model = struct
Array.count seq ~f:(Nucleotide.equal nuc)
)
let dinucleotide_counts seq =
let observed = Array.init Nucleotide.card ~f:(fun _ ->
let dinucleotide_counts seq =
let observed = Array.init Nucleotide.card ~f:(fun _ ->
Array.init Nucleotide.card ~f:(fun _ -> 0)
) in
Array.iteri seq ~f:(fun pos nuc ->
) in
Array.iteri seq ~f:(fun pos nuc ->
if (pos + 1) < Array.length seq then
let next = seq.(pos+1) in
Nucleotide.(
observed.(to_int nuc).(to_int next) <-
observed.(to_int nuc).(to_int next) <-
observed.(to_int nuc).(to_int next) + 1
));
observed
......@@ -144,55 +171,63 @@ module Evolution_model = struct
let nucleotide_sequence = nucleotides_of_codons codon_sequence in
let single_counts = nucleotide_counts nucleotide_sequence in
let dinuc_counts = dinucleotide_counts nucleotide_sequence in
Array.mapi dinuc_counts ~f:(fun fst_nuc counts ->
Array.mapi counts ~f:(fun snd_nuc observed ->
let expected =
Array.mapi dinuc_counts ~f:(fun fst_nuc counts ->
Array.mapi counts ~f:(fun snd_nuc observed ->
let expected =
float_of_int Nucleotide.Table.(
get single_counts (Nucleotide.of_int_exn fst_nuc) *
get single_counts (Nucleotide.of_int_exn fst_nuc) *
get single_counts (Nucleotide.of_int_exn snd_nuc)
) /.
) /.
float_of_int (Array.length nucleotide_sequence)
in
float_of_int observed /. expected)
)
end
end
module Make(BI: Simulator.Branch_info) = struct
include Simulator.Make(NSCodon)(Evolution_model)(BI)
module Make(BI: Simulator.Branch_info) = Simulator.Make(NSCodon)(Evolution_model)(BI)
let cpg_update ~n ~pos f =
f pos ;
if pos - 1 >= 0 then f (pos - 1) ;
if pos + 1 < n then f (pos + 1)
let demo seq_length ~rate_CpG ~branch_length =
let sequence_gillespie_direct rng tree ~root ~param =
let rate_vector b state i =
Evolution_model.rate_vector (param state i b) state.(i)
in
sequence_gillespie_direct rng tree ~update_iterator:cpg_update ~root ~rate_vector
end
let print_rate_matrix nuc_ratios =
let header =
List.map Nucleotide.all ~f:Nucleotide.to_char
|> List.intersperse ~sep:'\t'
|> String.of_char_list
in
let line i nuc =
Array.map nuc_ratios.(i) ~f:(sprintf "%.4f")
|> String.concat_array ~sep:"\t"
|> printf "%c\t%s\n" (Nucleotide.to_char nuc)
in
printf "\t%s\n" header ;
List.iteri Nucleotide.all ~f:line
let demo seq_length ~rate_CpG ~branch_length =
let rng = Gsl.Rng.(make (default ())) in
let module Branch_info = struct
type t = Newick.branch_info
let length branch = Option.value_exn Newick.(branch.length)
end
type t = float
let length = Fn.id
end
in
let module M = Make(Branch_info) in
let root_sequence = Array.init seq_length ~f:(fun _ ->
let root_sequence = Array.init seq_length ~f:(fun _ ->
let codon_seed = Owl.Stats.uniform_int_rvs ~a:0 ~b:(NSCodon.card - 1) in
match NSCodon.of_int codon_seed with
| Some codon -> codon
| None -> sprintf "Cannot create codon from integer : %d" codon_seed |> failwith
NSCodon.of_int_exn codon_seed
) in
let param = Evolution_model.random_param ~alpha_nuc:0.5 ~alpha_fitness:0.5 ~rate_CpG in
let tree =
sprintf "(leaf:%f);" branch_length
|> Newick.from_string_exn
|> Newick.with_inner_tree ~f:(fun tree ->
M.sequence_gillespie_direct tree ~root:root_sequence ~param
)
in
let simulated_sequence = Tree.leaves tree |> List.hd_exn in
let param seq i b = Evolution_model.flat_param ~rate_CpG seq i b in
let tree = Tree.(node () (List1.singleton (branch branch_length (leaf ())))) in
let sim = M.sequence_gillespie_direct rng tree ~root:root_sequence ~param in
let simulated_sequence = Tree.leaves sim |> List.hd_exn in
let counts = Evolution_model.dinucleotide_ratios simulated_sequence in
Array.init Nucleotide.card ~f:(fun i ->
let nuc = Nucleotide.of_int_exn i |> Nucleotide.to_char |> Char.to_string in
"\t" ^ nuc
)
|> String.concat_array ~sep:"\t"
|> print_endline ;
Array.iteri counts ~f:(fun i nuc_ratios ->
let nuc = Nucleotide.of_int_exn i |> Nucleotide.to_char |> Char.to_string in
print_string nuc;
Array.iteri nuc_ratios ~f:(fun _ ratio ->
print_string ("\t" ^ string_of_float ratio)
);
Out_channel.newline stdout;
)
print_rate_matrix counts
......@@ -40,6 +40,13 @@ end
module Make(BI: Simulator.Branch_info) : sig
include module type of Simulator.Make(NSCodon)(Evolution_model)(BI)
val sequence_gillespie_direct :
Gsl.Rng.t ->
('a, 'b, BI.t) Tree.t ->
root:NSCodon.t array ->
param:(NSCodon.t array -> int -> BI.t -> Evolution_model.param) ->
(NSCodon.t array, NSCodon.t array, BI.t) Tree.t
end
val demo : int -> rate_CpG:float -> branch_length:float -> unit
\ No newline at end of file
val demo : int -> rate_CpG:float -> branch_length:float -> unit
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment