Commit 742207bf authored by Philippe Veber's avatar Philippe Veber
Browse files

Alistat.nucleotide_fasta_gc: compute inter sequence variance

parent 58668233
......@@ -16,6 +16,11 @@ let strings_from_fasta fa =
| Error `Fasta_parser_error _ -> failwith "parsing error"
| Error `Msg msg -> failwith msg
type gc_stat = {
gc_mean : float ;
gc_variance_among_sequences : float ;
}
let nucleotide_fasta_gc ?thirdpos:(tp=false) fa =
let seqs = strings_from_fasta fa in
let gc_counts = Array.map seqs ~f:(fun seq ->
......@@ -24,8 +29,11 @@ let nucleotide_fasta_gc ?thirdpos:(tp=false) fa =
float len, float sum
)
in
let gc_freqs = Array.map gc_counts ~f:(fun (len, k) -> k /. len) in
let gc_variance_among_sequences = Owl.Stats.var gc_freqs in
let total_len = Array.fold gc_counts ~init:0. ~f:(fun acc (n,_) -> acc +. n) in
Array.fold gc_counts ~init:0. ~f:(fun acc (_, k) -> acc +. k /. total_len)
let gc_mean = Array.fold gc_counts ~init:0. ~f:(fun acc (_, k) -> acc +. k /. total_len) in
{ gc_mean ; gc_variance_among_sequences }
let main ~alignment () =
match Alignment.from_fasta alignment with
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment