mutsel_simulator.ml 5.65 KB
Newer Older
Philippe Veber's avatar
Philippe Veber committed
1 2 3
open Core_kernel
open Phylogenetics

4
module Branch_info = Convergence_tree.Branch_info
Philippe Veber's avatar
Philippe Veber committed
5 6 7 8 9 10

let tree_rescale_branch_length t ~scale =
  Tree.map t ~node:Fn.id ~leaf:Fn.id ~branch:(fun (bi : Branch_info.t) ->
      { bi with length = bi.length *. scale }
    )

Louis Duchemin's avatar
Louis Duchemin committed
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
let rescale_fitness beta = Amino_acid.Vector.map ~f:(( *. ) beta)

let most_probable_aa (pref : Amino_acid.vector) =
  let pref = Amino_acid.Vector.to_array pref in
  let arr = Array.mapi pref ~f:(fun i x -> x, i) in
  match Array.max_elt ~compare:Poly.compare arr with
  | None -> assert false
  | Some (_, i) -> i

let make_profiles n_h0 n_ha fitness_profiles = 
  let random_profile () = Random.int (Array.length fitness_profiles)
  and most_probable_aa = Array.map fitness_profiles ~f:most_probable_aa in
  let h0_profiles = Array.init n_h0 ~f:(fun _ -> random_profile ())
  and ha_profiles = Array.init n_ha ~f:(fun _ ->
      let rec loop () =
        let p = random_profile ()
        and q = random_profile () in
        if p = q || most_probable_aa.(p) = most_probable_aa.(q) then loop ()
        else (p, q)
      in
      loop ()
    ) in
  h0_profiles, ha_profiles

let make_params ~gBGC:(gBGC0, gBGC1) ~ne_s:(ne_s0, ne_s1) ~fitness_profiles ~h0_profiles ~ha_profiles = 
  let base_param = Mutsel.random_param ~alpha_nucleotide:10. ~alpha_fitness:0.1 in
  let make_param ~gBGC ~ne_s p = 
    {
      base_param with 
      gBGC; scaled_fitness = rescale_fitness ne_s fitness_profiles.(p)
    }
  in
  let make_param_pair p q =
    make_param ~gBGC:gBGC0 ~ne_s:ne_s0 p,
    make_param ~gBGC:gBGC1 ~ne_s:ne_s1 q
  in
  let h0_params = Array.map h0_profiles ~f:(fun profile -> 
      make_param_pair profile profile
    )
  and ha_params = Array.map ha_profiles ~f:(fun (p,q) -> 
      make_param_pair p q 
    )
  in
  (base_param, h0_params, ha_params)

let params_of_cond h0_params ha_params =
  let n_h0 = Array.length h0_params in
  fun i cond -> 
    let p, q =
      if i < n_h0 then h0_params.(i)
      else ha_params.(i - n_h0)
    in
    match cond with
    | `Ancestral -> p
    | `Convergent -> q


Philippe Veber's avatar
Philippe Veber committed
68
module Site_independent = struct
Louis Duchemin's avatar
Louis Duchemin committed
69 70 71 72 73 74 75 76 77 78 79 80 81 82
  module Sim = Simulator.Make(Mutsel.NSCodon)(Mutsel)(Branch_info)

  type t = {
    sequences : Dna.t list ;
    fitness_profiles : Amino_acid.vector array ;
    tree : Convergence_tree.t ;
    h0_profiles : int array ;
    ha_profiles : (int * int) array ;
    h0_params : (Mutsel.param * Mutsel.param) array ;
    ha_params : (Mutsel.param * Mutsel.param) array ;
    base_param : Mutsel.param ;
    gBGC : float * float ;
    ne_s : float * float ;
  }
Philippe Veber's avatar
Philippe Veber committed
83 84 85

  let alignment tree ~root param =
    List.init (Array.length root) ~f:(fun i ->
Louis Duchemin's avatar
Louis Duchemin committed
86
        Sim.site_gillespie_direct tree ~root:root.(i) ~param:(param i)
Philippe Veber's avatar
Philippe Veber committed
87 88 89 90 91 92 93
        |> Tree.leaves
        |> List.map ~f:Codon.Universal_genetic_code.NS.to_string
      )
    |> List.transpose_exn
    |> List.map ~f:String.concat
    |> List.map ~f:Dna.of_string_unsafe

Louis Duchemin's avatar
Louis Duchemin committed
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
  let make ?seed ~n_h0 ~n_ha ~ne_s ~gBGC ~tree ~fitness_profiles () =
    let () = Option.iter ~f:Random.init seed in
    let h0_profiles, ha_profiles = make_profiles n_h0 n_ha fitness_profiles in
    let base_param, h0_params, ha_params = 
      make_params ~gBGC ~ne_s ~fitness_profiles ~h0_profiles ~ha_profiles  in
    let params_of_cond = params_of_cond h0_params ha_params in
    let params i bi = params_of_cond i (Branch_info.condition bi) in
    let root_condition = `Ancestral in
    let root_dists = Array.init (n_h0 + n_ha) ~f:(fun i ->
        Mutsel.stationary_distribution (params_of_cond i root_condition)
        |> Mutsel.NSCodon.Vector.to_array
        |> Mutsel.NSCodon.Table.of_array_exn
      )
    in
    let root = Sim.hmm0 ~len:(n_h0 + n_ha) ~dist:(Array.get root_dists) in
    let sequences = alignment tree ~root params in
    { sequences ; fitness_profiles ; h0_profiles ; ha_profiles ;
      gBGC ; ne_s ; h0_params ; ha_params ; base_param ; tree}
end

module With_CpG_hypermutability = struct 

  module Sim = Mutsel_simulator_cpg.Make(Branch_info)

  type t = {
Philippe Veber's avatar
Philippe Veber committed
119 120 121 122 123 124 125
    sequences : Dna.t list ;
    fitness_profiles : Amino_acid.vector array ;
    tree : Convergence_tree.t ;
    h0_profiles : int array ;
    ha_profiles : (int * int) array ;
    gBGC : float * float ;
    ne_s : float * float ;
Louis Duchemin's avatar
Louis Duchemin committed
126
    rate_CpG : float ;
Philippe Veber's avatar
Philippe Veber committed
127 128
  }

Louis Duchemin's avatar
Louis Duchemin committed
129 130 131 132 133 134 135 136 137 138 139
  let make ~rng ~n_h0 ~n_ha ~ne_s ~gBGC ~rate_CpG ~(tree:Convergence_tree.t) ~fitness_profiles () =
    let h0_profiles, ha_profiles = make_profiles n_h0 n_ha fitness_profiles in
    let _, h0_params, ha_params = 
      make_params ~gBGC ~ne_s ~fitness_profiles ~h0_profiles ~ha_profiles in
    let params_of_cond = params_of_cond h0_params ha_params in
    let param_factory state pos branch =
      let module EM = Mutsel_simulator_cpg.Evolution_model in
      let left_codon, right_codon = EM.flanking_codons state pos in
      let context_CpG = EM.context_CpG_of_codons ?left_codon ?right_codon () in
      let mutsel = params_of_cond pos (Branch_info.condition branch) in
      EM.{ mutsel ; rate_CpG ; context_CpG }
Philippe Veber's avatar
Philippe Veber committed
140
    in
Louis Duchemin's avatar
Louis Duchemin committed
141
    (* FIXME : this is stationary dist for Mutsel without CpG *)
Philippe Veber's avatar
Philippe Veber committed
142
    let root_dists = Array.init (n_h0 + n_ha) ~f:(fun i ->
Louis Duchemin's avatar
Louis Duchemin committed
143
        Mutsel.stationary_distribution (params_of_cond i `Ancestral)
Philippe Veber's avatar
Philippe Veber committed
144 145 146 147
        |> Mutsel.NSCodon.Vector.to_array
        |> Mutsel.NSCodon.Table.of_array_exn
      )
    in
Louis Duchemin's avatar
Louis Duchemin committed
148 149 150 151 152 153 154 155 156 157
    let root = Array.map root_dists ~f:(fun codon_dist -> 
        let dist = Gsl.Randist.discrete_preproc (codon_dist :> float array) in
        Gsl.Randist.discrete rng dist 
        |> Mutsel.NSCodon.of_int_exn
      )
    in
    let sequences = 
      Sim.sequence_gillespie_direct rng tree ~root ~param:param_factory
      |> Tree.leaves 
      |> List.map ~f:Dna.of_codons 
Philippe Veber's avatar
Philippe Veber committed
158 159
    in
    { sequences ; fitness_profiles ; h0_profiles ; ha_profiles ;
Louis Duchemin's avatar
Louis Duchemin committed
160 161 162
      gBGC ; ne_s ; tree ; rate_CpG}

end