mutsel_simulator_cpg.ml 7.94 KB
Newer Older
1 2 3
open Core_kernel
open Phylogenetics

4
module NSCodon = Codon.Universal_genetic_code.NS
5
module NSCodon_rate_matrix = Rate_matrix.Make(NSCodon)
6

7
module Evolution_model = struct
8 9 10 11
  type context_CpG = {
    lh: Nucleotide.repr option;
    rh: Nucleotide.repr option;
  }
12

13
  type param = {
Louis Duchemin's avatar
Louis Duchemin committed
14
    mutsel: Mutsel.param;
15 16
    rate_CpG: float;
    context_CpG : context_CpG;
17 18
  }

Louis Duchemin's avatar
Louis Duchemin committed
19

Philippe Veber's avatar
Philippe Veber committed
20 21
  let inspect_codon codon =
    let x, y, z = NSCodon.nucleotides codon in
22
    Nucleotide.(inspect x, inspect y, inspect z)
23

24
  let context_CpG_of_codons ?left_codon ?right_codon () =
25 26
    let open Option.Monad_infix in
    {
27 28
      lh = left_codon  >>| inspect_codon >>| trd3 ;
      rh = right_codon >>| inspect_codon >>| fst3 ;
29
    }
30

31 32 33 34 35 36 37
  let fitness_of_profile ?(beta = 1.) =
    Amino_acid.Vector.map ~f:(fun x -> beta *. Float.log x)

  let flat_fitness () =
    Amino_acid.Vector.init (fun _ -> 1. /. float Amino_acid.card)
    |> fitness_of_profile

Louis Duchemin's avatar
Louis Duchemin committed
38
  let flanking_codons state pos = 
39 40 41 42 43 44 45
    let n = Array.length state in
    let left_codon =
      if 0 <= pos - 1 && pos - 1 < n
      then Some state.(pos - 1)
      else None
    and right_codon =
      if pos + 1 < n then Some state.(pos + 1) else None
46
    in
Louis Duchemin's avatar
Louis Duchemin committed
47 48 49 50 51
    left_codon, right_codon


  let make_param ~mutsel ~rate_CpG state pos =
    let left_codon, right_codon = flanking_codons state pos in
52
    let context_CpG = context_CpG_of_codons ?left_codon ?right_codon () in
Louis Duchemin's avatar
Louis Duchemin committed
53
    { mutsel; rate_CpG; context_CpG }
54

55
  let flat_param ~rate_CpG =
56 57
    let pi = Nucleotide.flat_profile () in
    let rho = Linear_algebra.Lacaml.Vector.init 6 ~f:(fun _ -> 1. /. 6.) in
58
    let nucleotide_rates = Rate_matrix.Nucleotide.gtr ~equilibrium_frequencies:pi ~transition_rates:rho in
Louis Duchemin's avatar
Louis Duchemin committed
59 60 61 62 63 64 65 66 67
    let mutsel = Mutsel.{
        gBGC = 0.; 
        omega = 1. ;
        scaled_fitness = flat_fitness ();
        nucleotide_rates ; 
        nucleotide_stat_dist = pi
      } 
    in
    make_param ~mutsel ~rate_CpG
68

Louis Duchemin's avatar
Louis Duchemin committed
69 70 71
  let random_param ~alpha_nucleotide ~alpha_fitness ~rate_CpG =
    let mutsel = Mutsel.random_param ~alpha_nucleotide ~alpha_fitness in
    make_param ~mutsel ~rate_CpG
72

73 74 75 76 77 78 79 80 81
  let rate_increase_CpG (pos, x_a, x_b) context codon =
    let open Nucleotide in
    match (pos, inspect x_a, inspect x_b), context, codon with
    | (0, C, T), _, (_, G, _)
    | (1, C, T), _, (_, _, G)
    | (2, C, T), { rh = Some G ; _ }, _
    | (0, G, A), { lh = Some C ; _ }, _
    | (1, G, A), _, (C, _, _)
    | (2, G, A), _, (_,C,_) -> true
82 83 84 85 86 87 88 89 90
    | _ -> false

  let fixation_probability delta =
    let open Float in
    if abs delta < 1e-30 then 1. + delta / 2.
    else if delta > 50. then delta
    else if delta < - 50. then 0.
    else delta / (1. - exp (- delta))

Louis Duchemin's avatar
Louis Duchemin committed
91
  let rate_matrix {  mutsel = { nucleotide_rates ; omega ; scaled_fitness = _F_ ; gBGC ;_ } ; rate_CpG ; context_CpG } =
92 93 94 95
    let nuc_rates = (nucleotide_rates :> Nucleotide.matrix) in
    Mutsel.NSCodon_rate_matrix.make (fun p q ->
        match NSCodon.neighbours p q with
        | Some (pos, x_a, x_b) ->
96
          let rate_CpG = if rate_increase_CpG (pos, x_a, x_b) context_CpG (inspect_codon p) then rate_CpG else 1. in
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
          let _B_ = match Nucleotide.(inspect x_a, inspect x_b) with
            | (A | T), (C | G) -> gBGC
            | (C | G), (A | T) -> -. gBGC
            | _ -> 0.
          in
          let selection_coefficient =
            _B_ +.
            if NSCodon.synonym p q then 0.
            else
              let aa_p = NSCodon.aa_of_codon p in
              let aa_q = NSCodon.aa_of_codon q in
              _F_.Amino_acid.%(aa_q) -. _F_.Amino_acid.%(aa_p)
          in
          let p_fix = fixation_probability selection_coefficient in
          let q_ab = nuc_rates.Nucleotide.%{x_a, x_b} in
          q_ab *. omega *. p_fix *. rate_CpG
        | None -> 0.
      )

Louis Duchemin's avatar
Louis Duchemin committed
116
  let rate_vector { mutsel = { nucleotide_rates ; omega ; scaled_fitness = _F_ ; gBGC ; _ } ; rate_CpG ; context_CpG } p =
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
    let nuc_rates = (nucleotide_rates :> Nucleotide.matrix) in
    NSCodon.Table.init (fun q ->
        if NSCodon.equal p q then 0.
        else
          match NSCodon.neighbours p q with
          | Some (pos, x_a, x_b) ->
            let rate_CpG = if rate_increase_CpG (pos, x_a, x_b) context_CpG (inspect_codon p) then rate_CpG else 1. in
            let _B_ = match Nucleotide.(inspect x_a, inspect x_b) with
              | (A | T), (C | G) -> gBGC
              | (C | G), (A | T) -> -. gBGC
              | _ -> 0.
            in
            let selection_coefficient =
              _B_ +.
              if NSCodon.synonym p q then 0.
              else
                let aa_p = NSCodon.aa_of_codon p in
                let aa_q = NSCodon.aa_of_codon q in
                _F_.Amino_acid.%(aa_q) -. _F_.Amino_acid.%(aa_p)
            in
            let p_fix = fixation_probability selection_coefficient in
            let q_ab = nuc_rates.Nucleotide.%{x_a, x_b} in
            q_ab *. omega *. p_fix *. rate_CpG
          | None -> 0.
      )

143
  let stationary_distribution p = NSCodon_rate_matrix.stationary_distribution (rate_matrix p)
144

145 146
  let nucleotides_of_codons codons =
    Array.concat_map codons ~f:(fun codon ->
147
        let x, y, z = NSCodon.nucleotides codon in
148
        [|x;y;z|]
149 150 151 152 153 154 155
      )

  let nucleotide_counts seq =
    Nucleotide.Table.init (fun nuc ->
        Array.count seq ~f:(Nucleotide.equal nuc)
      )

156 157
  let dinucleotide_counts seq =
    let observed = Array.init Nucleotide.card ~f:(fun _ ->
158
        Array.init Nucleotide.card ~f:(fun _ -> 0)
159 160
      ) in
    Array.iteri seq ~f:(fun pos nuc ->
161 162 163
        if (pos + 1) < Array.length seq then
          let next = seq.(pos+1) in
          Nucleotide.(
164
            observed.(to_int nuc).(to_int next) <-
165 166 167 168
              observed.(to_int nuc).(to_int next) + 1
          ));
    observed

169 170 171
  let dinucleotide_ratios codon_sequence =
    let nucleotide_sequence = nucleotides_of_codons codon_sequence in
    let single_counts = nucleotide_counts nucleotide_sequence in
172
    let dinuc_counts = dinucleotide_counts nucleotide_sequence in
173 174 175
    Array.mapi dinuc_counts ~f:(fun fst_nuc counts ->
        Array.mapi counts ~f:(fun snd_nuc observed ->
            let expected =
176
              float_of_int Nucleotide.Table.(
177
                  get single_counts (Nucleotide.of_int_exn fst_nuc) *
178
                  get single_counts (Nucleotide.of_int_exn snd_nuc)
179
                ) /.
180
              float_of_int (Array.length nucleotide_sequence)
181
            in
182
            float_of_int observed /. expected)
183
      )
184 185 186 187 188
end

module Make(BI: Simulator.Branch_info) = struct
  include Simulator.Make(NSCodon)(Evolution_model)(BI)

189 190 191 192 193
  let cpg_update ~n ~pos f =
    f pos ;
    if pos - 1 >= 0 then f (pos - 1) ;
    if pos + 1 < n then f (pos + 1)

194 195 196 197 198
  let sequence_gillespie_direct rng tree ~root ~param =
    let rate_vector b state i =
      Evolution_model.rate_vector (param state i b) state.(i)
    in
    sequence_gillespie_direct rng tree ~update_iterator:cpg_update ~root ~rate_vector
199 200
end

201 202 203 204 205 206 207 208 209 210 211 212 213
let print_rate_matrix nuc_ratios =
  let header =
    List.map Nucleotide.all ~f:Nucleotide.to_char
    |> List.intersperse ~sep:'\t'
    |> String.of_char_list
  in
  let line i nuc =
    Array.map nuc_ratios.(i) ~f:(sprintf "%.4f")
    |> String.concat_array ~sep:"\t"
    |> printf "%c\t%s\n" (Nucleotide.to_char nuc)
  in
  printf "\t%s\n" header ;
  List.iteri Nucleotide.all ~f:line
Louis Duchemin's avatar
Louis Duchemin committed
214

215
let demo seq_length ~rate_CpG ~branch_length =
216
  let rng = Gsl.Rng.(make (default ())) in
Louis Duchemin's avatar
Louis Duchemin committed
217
  let module Branch_info = struct
218 219
    type t = float
    let length = Fn.id
220
  end
Louis Duchemin's avatar
Louis Duchemin committed
221 222
  in
  let module M = Make(Branch_info) in
223
  let root_sequence = Array.init seq_length ~f:(fun _ ->
Louis Duchemin's avatar
Louis Duchemin committed
224
      let codon_seed = Owl.Stats.uniform_int_rvs ~a:0 ~b:(NSCodon.card - 1) in
225
      NSCodon.of_int_exn codon_seed
Louis Duchemin's avatar
Louis Duchemin committed
226
    ) in
227
  let tree = Tree.(node () (List1.singleton (branch branch_length (leaf ())))) in
Louis Duchemin's avatar
Louis Duchemin committed
228
  let param seq i _branch = Evolution_model.flat_param ~rate_CpG seq i in
229 230
  let sim = M.sequence_gillespie_direct rng tree ~root:root_sequence ~param in
  let simulated_sequence = Tree.leaves sim |> List.hd_exn in
Louis Duchemin's avatar
Louis Duchemin committed
231
  let counts = Evolution_model.dinucleotide_ratios simulated_sequence in
232
  print_rate_matrix counts