Commit 176b48d5 authored by Philippe Veber's avatar Philippe Veber
Browse files

moved Alignment to convdet

parent e5577818
open Core
open CFStream
type t = {
nsites : int ;
nseqs : int ;
labels : string array ;
seqs : string array ;
}
let fmt = Biocaml_ez.Fasta.{ default_fmt with allow_empty_lines = true }
let from_fasta fn =
match
Biocaml_ez.Fasta.with_file ~fmt fn ~f:(fun _ items ->
Stream.map items ~f:(fun i -> i.description, i.sequence)
|> Stream.to_array
|> Array.unzip
)
with
| [||], _ -> Error (`Msg "Empty FASTA file")
| labels, seqs ->
let nseqs = Array.length seqs in
let nsites = String.length seqs.(0) in
if Array.for_all seqs ~f:(fun s -> String.length s = nsites) then
Ok { nsites ; nseqs ; seqs ; labels }
else
Error (`Msg "Not an alignment: sequences with different lengths")
let residues al ~column:j =
if j < 0 || j > al.nsites then raise (Invalid_argument "incorrect column index") ;
Array.fold al.seqs ~init:Char.Set.empty ~f:(fun acc s -> Char.Set.add acc s.[j])
let number_of_residues_per_column_stats al =
let x =
Array.init al.nsites ~f:(fun column ->
residues al ~column
|> Char.Set.length
|> float
)
in
Biocaml_unix.Math.(mean x, variance x)
open Core
type t
val from_fasta : string -> (t, [> `Msg of string]) result
val residues : t -> column:int -> Char.Set.t
val number_of_residues_per_column_stats : t -> float * float
open Core
open Convdet
let main ~alignment () =
match Alignment.from_fasta alignment with
| Ok al ->
let mu, sigma = Alignment.number_of_residues_per_column_stats al in
printf "%f %f\n" mu sigma
| Error (`Msg msg) -> failwith msg
let xs = Alignment.number_of_residues_per_column_stats al in
List.iter xs ~f:(fun (n, k) ->
printf "%d %d\n" n k
)
| Error (`Msg msg | `Fasta_parser_error (_, msg)) -> failwith msg
let command =
let open Command.Let_syntax in
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment