Commit 7df238ca authored by Philippe Veber's avatar Philippe Veber
Browse files

Alistat.nucleotide_fasta_gc: compute GC stats at any pos in codon

parent bb8a7d46
......@@ -5,9 +5,9 @@ let ok_exn err = function
| Ok x -> x
| Error e -> failwith (err e)
let is_gc ?thirdpos:(tp=false) i c =
match tp with
| true when not ((i mod 3) = 2) -> false
let is_gc ?pos i c =
match pos with
| Some p when not ((i mod 3) = p) -> false
| _ -> c = 'g' || c = 'G' || c = 'c' || c = 'C'
let strings_from_fasta fa =
......@@ -21,11 +21,17 @@ type gc_stat = {
gc_variance_among_sequences : float ;
}
let nucleotide_fasta_gc ?thirdpos:(tp=false) fa =
let nucleotide_fasta_gc ?pos fa =
let pos = Option.map pos ~f:(function
| `first -> 0
| `second -> 1
| `third -> 2
)
in
let seqs = strings_from_fasta fa in
let gc_counts = Array.map seqs ~f:(fun seq ->
let len = if tp then String.length seq / 3 else String.length seq in
let sum = String.foldi ~init:0 ~f:(fun i a c -> a + if is_gc ~thirdpos:tp i c then 1 else 0) seq in
let len = if pos <> None then String.length seq / 3 else String.length seq in
let sum = String.foldi ~init:0 ~f:(fun i a c -> a + if is_gc ?pos i c then 1 else 0) seq in
float len, float sum
)
in
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment