Commit c1e496c1 authored by Philippe Veber's avatar Philippe Veber
Browse files

tk: added simple Dataframe implementation

parent a3605253
open Core_kernel
open Rresult
type column =
| Int of int array
| Maybe_int of int option array
| Float of float array
| Maybe_float of float option array
| String of string array
| Maybe_string of string option array
type t = {
nrows : int ;
ncols : int ;
cols : (string * column) list ;
}
let nrows t = t.nrows
let ncols t = t.ncols
let col t = List.Assoc.find t.cols ~equal:String.equal
let parse_header h =
let labels = String.split ~on:'\t' h in
labels, List.length labels
let fold_lines xs ~init ~f =
let rec loop i acc = function
| [] -> Ok acc
| x :: xs ->
match f i acc x with
| Ok r -> loop (i + 1) r xs
| Error _ as e -> e
in
loop 0 init xs
let optionally f = function
| "NA" -> None
| s -> Some (f s)
let rev_convert_col col =
let conv f =
List.rev_map col ~f
|> Array.of_list
in
let conv_opt f = conv (optionally f) in
if List.mem col "NA" ~equal:String.equal then
try Maybe_int (conv_opt Int.of_string) with _ ->
try Maybe_float (conv_opt Float.of_string)
with _ ->
Maybe_string (
List.map col ~f:Option.some
|> Array.of_list
)
else
try Int (conv Int.of_string) with _ ->
try Float (conv Float.of_string) with _ ->
String (Array.of_list col)
let parse_lines ncols lines =
let open Result.Monad_infix in
let init = 0, List.init ncols ~f:(Fn.const []) in
fold_lines lines ~init ~f:(fun i (nr, acc) l ->
let fields = String.split l ~on:'\t' in
match List.map2 fields acc ~f:List.cons with
| Ok r -> Ok (nr + 1, r)
| Unequal_lengths -> Rresult.R.error_msgf "Line %d doesn't have the expected %d fields" (i + 1) ncols
) >>| fun (nrows, cols) ->
nrows, List.map cols ~f:rev_convert_col
let from_file path =
let open Result.Monad_infix in
match In_channel.read_lines path with
| [] -> Error (`Msg "empty file")
| header :: lines ->
let labels, ncols = parse_header header in
parse_lines ncols lines >>= fun (nrows, cols) ->
let cols = List.zip_exn labels cols in
Ok { nrows ; ncols ; cols }
type t
type column =
| Int of int array
| Maybe_int of int option array
| Float of float array
| Maybe_float of float option array
| String of string array
| Maybe_string of string option array
val nrows : t -> int
val ncols : t -> int
val col : t -> string -> column option
val from_file : string -> (t, [> `Msg of string]) result
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment