Commit bcf35ad0 authored by Louis Duchemin's avatar Louis Duchemin
Browse files

Mutsel CpG : more efficient implementation for dinucleotide ratios

parent 424c4047
......@@ -127,20 +127,33 @@ module Evolution_model = struct
Array.count seq ~f:(Nucleotide.equal nuc)
)
let dinucleotide_counts seq =
let observed = Array.init Nucleotide.card ~f:(fun _ ->
Array.init Nucleotide.card ~f:(fun _ -> 0)
) in
Array.iteri seq ~f:(fun pos nuc ->
if (pos + 1) < Array.length seq then
let next = seq.(pos+1) in
Nucleotide.(
observed.(to_int nuc).(to_int next) <-
observed.(to_int nuc).(to_int next) + 1
));
observed
let dinucleotide_ratios codon_sequence =
let nucleotide_sequence = nucleotides_of_codons codon_sequence in
let single_counts = nucleotide_counts nucleotide_sequence in
Nucleotide.Table.init (fun nuc1 ->
Nucleotide.Table.init (fun nuc2 ->
let expected = float_of_int Nucleotide.Table.(get single_counts nuc1 * get single_counts nuc2) /.
float_of_int (Array.length nucleotide_sequence) in
let observed = Array.counti nucleotide_sequence ~f:(fun pos nuc ->
if (pos + 1) = Array.length nucleotide_sequence then false
else Nucleotide.(equal nuc nuc1 && equal nucleotide_sequence.(pos+1) nuc2)
)
let dinuc_counts = dinucleotide_counts nucleotide_sequence in
Array.mapi dinuc_counts ~f:(fun fst_nuc counts ->
Array.mapi counts ~f:(fun snd_nuc observed ->
let expected =
float_of_int Nucleotide.Table.(
get single_counts (Nucleotide.of_int_exn fst_nuc) *
get single_counts (Nucleotide.of_int_exn snd_nuc)
) /.
float_of_int (Array.length nucleotide_sequence)
in
float_of_int observed /. expected
)
float_of_int observed /. expected)
)
end
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment