mutsel_simulator.ml 3.67 KB
Newer Older
Philippe Veber's avatar
Philippe Veber committed
1 2 3
open Core_kernel
open Phylogenetics

4
module Branch_info = Convergence_tree.Branch_info
Philippe Veber's avatar
Philippe Veber committed
5 6 7 8 9 10

let tree_rescale_branch_length t ~scale =
  Tree.map t ~node:Fn.id ~leaf:Fn.id ~branch:(fun (bi : Branch_info.t) ->
      { bi with length = bi.length *. scale }
    )

Philippe Veber's avatar
Philippe Veber committed
11
module Site_independent = struct
Philippe Veber's avatar
Philippe Veber committed
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
  include Simulator.Make(Mutsel.NSCodon)(Mutsel)(Branch_info)

  let alignment tree ~root param =
    List.init (Array.length root) ~f:(fun i ->
        site_gillespie_direct tree ~root:root.(i) ~param:(param i)
        |> Tree.leaves
        |> List.map ~f:Codon.Universal_genetic_code.NS.to_string
      )
    |> List.transpose_exn
    |> List.map ~f:String.concat
    |> List.map ~f:Dna.of_string_unsafe

  type simulation = {
    sequences : Dna.t list ;
    fitness_profiles : Amino_acid.vector array ;
    tree : Convergence_tree.t ;
    h0_profiles : int array ;
    ha_profiles : (int * int) array ;
    h0_params : (Mutsel.param * Mutsel.param) array ;
    ha_params : (Mutsel.param * Mutsel.param) array ;
    base_param : Mutsel.param ;
    gBGC : float * float ;
    ne_s : float * float ;
  }

  let most_probable_aa (pref : Amino_acid.vector) =
    let pref = Amino_acid.Vector.to_array pref in
    let arr = Array.mapi pref ~f:(fun i x -> x, i) in
    match Array.max_elt ~compare:Poly.compare arr with
    | None -> assert false
    | Some (_, i) -> i

  let simulation ?branch_scale ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~gBGC:(gBGC0, gBGC1) ~tree ~fitness_profiles () =
    let () = Option.iter ~f:Random.init seed in
    let rescale_fitness beta = Amino_acid.Vector.map ~f:(( *. ) beta) in
    let base_param =
      let p = Mutsel.random_param ~alpha_nucleotide:10. ~alpha_fitness:0.1 in
      { p with omega = 1. }
    in
    let random_profile () = Random.int (Array.length fitness_profiles) in
    let most_probable_aa = Array.map fitness_profiles ~f:most_probable_aa in
    let h0_profiles = Array.init n_h0 ~f:(fun _ -> random_profile ()) in
    let ha_profiles = Array.init n_ha ~f:(fun _ ->
        let rec loop () =
          let p = random_profile () in
          let q = random_profile () in
          if p = q || most_probable_aa.(p) = most_probable_aa.(q) then loop ()
          else (p, q)
        in
        loop ()
      )
    in
    let make_param ~gBGC ~ne_s p =
      { base_param with gBGC ; scaled_fitness = rescale_fitness ne_s fitness_profiles.(p) }
    in
    let make_param_pair p q =
        make_param ~gBGC:gBGC0 ~ne_s:ne_s0 p,
        make_param ~gBGC:gBGC1 ~ne_s:ne_s1 q
    in
    let h0_params = Array.map h0_profiles ~f:(fun profile ->
        make_param_pair profile profile
      )
    in
    let ha_params = Array.map ha_profiles ~f:(fun (p, q) ->
        make_param_pair p q
      )
    in
    let params_of_cond i cond =
      let p, q =
        if i < n_h0 then h0_params.(i)
        else ha_params.(i - n_h0)
      in
      match cond with
      | `Ancestral -> p
      | `Convergent -> q
    in
    let params i bi =
      params_of_cond i (Branch_info.condition bi)
    in
    let root_condition = `Ancestral in
    let root_dists = Array.init (n_h0 + n_ha) ~f:(fun i ->
        Mutsel.stationary_distribution (params_of_cond i root_condition)
        |> Mutsel.NSCodon.Vector.to_array
        |> Mutsel.NSCodon.Table.of_array_exn
      )
    in
    let root = hmm0 ~len:(n_h0 + n_ha) ~dist:(Array.get root_dists) in
    let rescaled_tree =
      Option.value_map branch_scale ~default:tree ~f:(fun scale ->
          tree_rescale_branch_length tree ~scale
        )
    in
    let sequences = alignment rescaled_tree ~root params in
    { sequences ; fitness_profiles ; h0_profiles ; ha_profiles ;
      gBGC = gBGC0, gBGC1 ; ne_s = ne_s0, ne_s1 ;
      h0_params ; ha_params ; base_param ; tree}
end