Commit d33cb5a3 authored by Louis Duchemin's avatar Louis Duchemin
Browse files

Mutsel CpG simulator with flat parameters

parent 7b9b98b8
open Core_kernel
open Phylogenetics
module NSCodon = Codon.Universal_genetic_code.NS
module Mutsel_cpg = struct
type param = {
nucleotide_rates : Rate_matrix.Nucleotide.t ;
nucleotide_stat_dist : Nucleotide.vector ;
omega : float ; (* dN/dS *)
scaled_fitness : Amino_acid.vector ;
gBGC : float ;
rate_CpG: float;
}
module NSCodon = Codon.Universal_genetic_code.NS
type context_CpG = {
lh: Nucleotide.repr option;
rh: Nucleotide.repr option;
codon: (Nucleotide.repr * Nucleotide.repr * Nucleotide.repr)
}
type context_CpG = {
lh: Nucleotide.repr;
rh: Nucleotide.repr;
codon: (Nucleotide.repr * Nucleotide.repr * Nucleotide.repr)
}
let inspect_codon codon =
let x, y, z = NSCodon.nucleotides codon in
Nucleotide.(inspect x, inspect y, inspect z)
let make_CpG_context p lh rh =
{
lh = trd3 (inspect_codon lh);
rh = fst3 (inspect_codon rh);
codon = inspect_codon p;
type param = {
nucleotide_rates : Rate_matrix.Nucleotide.t ;
nucleotide_stat_dist : Nucleotide.vector ;
omega : float ; (* dN/dS *)
scaled_fitness : Amino_acid.vector ;
gBGC : float ;
rate_CpG: float;
context_CpG : context_CpG;
}
let rate_increase_CpG (pos, x_a, x_b) context =
match Nucleotide.(pos, inspect x_a, inspect x_b), context with
| (0, C, T), { codon=(_,G, _) ; _ }
| (1, C, T), { codon=(_,_, G) ; _ }
| (2, C, T), { rh=G ; _ }
| (0, G, A), { lh=C ; _ }
| (1, G, A), { codon=(C, _, _) ; _ }
| (2, G, A), { codon=(_,C,_) ; _ } -> true
| _ -> false
let inspect_codon codon =
let x, y, z = NSCodon.nucleotides codon in
Nucleotide.(inspect x, inspect y, inspect z)
let fixation_probability delta =
let open Float in
if abs delta < 1e-30 then 1. + delta / 2.
else if delta > 50. then delta
else if delta < - 50. then 0.
else delta / (1. - exp (- delta))
let context_CpG_of_codons p ?lh ?rh () =
let open Option.Monad_infix in
{
lh = lh >>| (fun lh -> inspect_codon lh |> trd3);
rh = rh >>| (fun rh -> inspect_codon rh |> fst3);
codon = inspect_codon p;
}
let rate_matrix { nucleotide_rates ; omega ; scaled_fitness = _F_ ; gBGC ; rate_CpG ; _ } ~lh ~rh=
let nuc_rates = (nucleotide_rates :> Nucleotide.matrix) in
Mutsel.NSCodon_rate_matrix.make (fun p q ->
match NSCodon.neighbours p q with
| Some (pos, x_a, x_b) ->
let context = make_CpG_context p lh rh in
let rate_CpG = if rate_increase_CpG (pos, x_a, x_b) context then rate_CpG else 1. in
let _B_ = match Nucleotide.(inspect x_a, inspect x_b) with
| (A | T), (C | G) -> gBGC
| (C | G), (A | T) -> -. gBGC
| _ -> 0.
in
let selection_coefficient =
_B_ +.
if NSCodon.synonym p q then 0.
else
let aa_p = NSCodon.aa_of_codon p in
let aa_q = NSCodon.aa_of_codon q in
_F_.Amino_acid.%(aa_q) -. _F_.Amino_acid.%(aa_p)
let make_param state pos _branch =
let fitness_of_profile ?(beta = 1.) =
Amino_acid.Vector.map ~f:(fun x -> beta *. Float.log x)
in
let flat_fitness () =
Amino_acid.Vector.init (fun _ -> 1. /. float Amino_acid.card)
|> fitness_of_profile
in
let context_CpG =
if pos = 0
then context_CpG_of_codons state.(pos) ~rh:state.(pos+1) ()
else if pos = (Array.length state - 1 )
then context_CpG_of_codons state.(pos) ~lh:state.(pos-1) ()
else context_CpG_of_codons state.(pos) ~lh:state.(pos -1) ~rh:state.(pos + 1) ()
in
let pi = Nucleotide.flat_profile () in
let rho = Linear_algebra.Lacaml.Vector.init 6 ~f:(fun _ -> 1. /. 6.) in
let nucleotide_rates = Rate_matrix.Nucleotide.gtr ~equilibrium_frequencies:pi ~transition_rates:rho in
{
nucleotide_rates;
nucleotide_stat_dist=pi;
omega=1.;
scaled_fitness = flat_fitness ();
gBGC = 0.;
rate_CpG = 0.;
context_CpG;
}
let rate_increase_CpG (pos, x_a, x_b) context =
match Nucleotide.(pos, inspect x_a, inspect x_b), context with
| (0, C, T), { codon=(_,G, _) ; _ }
| (1, C, T), { codon=(_,_, G) ; _ }
| (2, C, T), { rh=Some G ; _ }
| (0, G, A), { lh=Some C ; _ }
| (1, G, A), { codon=(C, _, _) ; _ }
| (2, G, A), { codon=(_,C,_) ; _ } -> true
| _ -> false
let fixation_probability delta =
let open Float in
if abs delta < 1e-30 then 1. + delta / 2.
else if delta > 50. then delta
else if delta < - 50. then 0.
else delta / (1. - exp (- delta))
let rate_matrix { nucleotide_rates ; omega ; scaled_fitness = _F_ ; gBGC ; rate_CpG ; context_CpG; _ } =
let nuc_rates = (nucleotide_rates :> Nucleotide.matrix) in
Mutsel.NSCodon_rate_matrix.make (fun p q ->
match NSCodon.neighbours p q with
| Some (pos, x_a, x_b) ->
let rate_CpG = if rate_increase_CpG (pos, x_a, x_b) context_CpG then rate_CpG else 1. in
let _B_ = match Nucleotide.(inspect x_a, inspect x_b) with
| (A | T), (C | G) -> gBGC
| (C | G), (A | T) -> -. gBGC
| _ -> 0.
in
let selection_coefficient =
_B_ +.
if NSCodon.synonym p q then 0.
else
let aa_p = NSCodon.aa_of_codon p in
let aa_q = NSCodon.aa_of_codon q in
_F_.Amino_acid.%(aa_q) -. _F_.Amino_acid.%(aa_p)
in
let p_fix = fixation_probability selection_coefficient in
let q_ab = nuc_rates.Nucleotide.%{x_a, x_b} in
q_ab *. omega *. p_fix *. rate_CpG
| None -> 0.
)
let stationary_distribution p =
let pi = p.nucleotide_stat_dist in
NSCodon.Vector.init (fun codon ->
let n1, n2, n3 = NSCodon.nucleotides codon in
let aa = NSCodon.aa_of_codon codon in
let b n = match Nucleotide.inspect n with
| A | T -> -. p.gBGC
| C | G -> +. p.gBGC
in
let p_fix = fixation_probability selection_coefficient in
let q_ab = nuc_rates.Nucleotide.%{x_a, x_b} in
q_ab *. omega *. p_fix *. rate_CpG
| None -> 0.
)
\ No newline at end of file
Nucleotide.(pi.%(n1) *. pi.%(n2) *. pi.%(n3))
*. exp (p.scaled_fitness.Amino_acid.%(aa) +. (b n1 +. b n2 +. b n3) /. 2.)
)
|> NSCodon.Vector.normalize
end
module M(BI: Simulator.Branch_info) = struct
include Simulator.Make(Mutsel_cpg.NSCodon)(Mutsel_cpg)(BI)
end
\ No newline at end of file
open Phylogenetics
module NSCodon = Codon.Universal_genetic_code.NS
type param = {
nucleotide_rates : Rate_matrix.Nucleotide.t ;
nucleotide_stat_dist : Nucleotide.vector ;
omega : float ; (* dN/dS *)
scaled_fitness : Amino_acid.vector ;
gBGC : float ;
rate_CpG: float;
}
val rate_matrix : param -> lh:NSCodon.t -> rh:NSCodon.t -> NSCodon.matrix
\ No newline at end of file
module Mutsel_cpg : sig
module NSCodon = Codon.Universal_genetic_code.NS
type context_CpG = {
lh: Nucleotide.repr option;
rh: Nucleotide.repr option;
codon: (Nucleotide.repr * Nucleotide.repr * Nucleotide.repr)
}
type param = {
nucleotide_rates : Rate_matrix.Nucleotide.t ;
nucleotide_stat_dist : Nucleotide.vector ;
omega : float ; (* dN/dS *)
scaled_fitness : Amino_acid.vector ;
gBGC : float ;
rate_CpG: float;
context_CpG: context_CpG;
}
val rate_matrix : param -> NSCodon.matrix
val stationary_distribution: param -> NSCodon.vector
val make_param: NSCodon.t array -> int -> 'a -> param
end
module M(BI: Simulator.Branch_info) : sig
include module type of Simulator.Make(Mutsel_cpg.NSCodon)(Mutsel_cpg)(BI)
end
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment