Commit 5c3aaabf authored by Philippe Veber's avatar Philippe Veber
Browse files

tk/Dataframe: header specification for parsing

parent 4ebc5b3a
......@@ -90,12 +90,29 @@ let%expect_test "Dataframe.parse_line ex1" =
(Dataframe.Floats [|1.2; 1.2; -1.2|]);
(Dataframe.Int_opts [|None; (Some 2); None|])])) |}]
let from_file path =
let open Result.Monad_infix in
match In_channel.read_lines path with
| [] -> Error (`Msg "empty file")
| header :: lines ->
let labels, ncols = parse_header header in
parse_lines ncols lines >>= fun (nrows, cols) ->
let cols = List.zip_exn labels cols in
Ok { nrows ; ncols ; cols }
let check_header ~colnames header =
match List.for_all2 colnames header ~f:String.equal with
| Ok true -> Ok ()
| Ok false -> Error (`Msg "header is different from expected value")
| Unequal_lengths -> Error (`Msg "incorrect number of columns")
let from_file ?(header = `Read_in_file) path =
let open Let_syntax.Result in
let lines = In_channel.read_lines path in
let* labels, ncols, data_lines =
match header, lines with
| (`Read_in_file | `Expect _), [] ->
Error (`Msg "empty file but expected header")
| `Read_in_file, header :: lines ->
let labels, ncols = parse_header header in
Ok (labels, ncols, lines)
| `Expect colnames, header :: data_lines ->
let labels, ncols = parse_header header in
let+ () = check_header ~colnames labels in
labels, ncols, data_lines
| `Use colnames, data_lines ->
Ok (colnames, List.length colnames, data_lines)
in
parse_lines ncols data_lines >>= fun (nrows, cols) ->
let cols = List.zip_exn labels cols in
Ok { nrows ; ncols ; cols }
......@@ -15,4 +15,7 @@ val get_col : t -> int -> column option
val get_col_by_name : t -> string -> column option
val from_file : string -> (t, [> `Msg of string]) result
val from_file :
?header:[`Read_in_file | `Expect of string list | `Use of string list] ->
string ->
(t, [> `Msg of string]) result
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment