Commit 8bbf97e0 authored by Louis Duchemin's avatar Louis Duchemin
Browse files

Gemma.Result_file module for processing Gemma raw output

parent bedde902
......@@ -5,41 +5,112 @@ open Phylogenetics
let write_phenotypes ~newick ~output =
Out_channel.with_file output ~f:(fun oc ->
Newick.from_file newick
|> Newick.with_inner_tree
~f:(fun tree ->
Tree.leaves tree
|> List.iter ~f:(fun leaf ->
match leaf.name with
| None -> failwith "Found unnamed node"
| Some name -> fprintf oc "%s\n" name)
)
)
|> Newick.with_inner_tree ~f:(fun tree ->
Tree.leaves tree
|> List.iter ~f:(fun leaf ->
match leaf.name with
| None -> failwith "Found unnamed node"
| Some name -> fprintf oc "%s\n" name)))
let column_of_alignment al col =
List.init (Alignment.nrows al) ~f:(fun i -> al.sequences.(i).[col])
let aa_of_column col =
List.dedup_and_sort ~compare:Char.compare col
let aa_of_column col = List.dedup_and_sort ~compare:Char.compare col
let write_genotype_line oc locus_id column aa =
fprintf oc "%s_%c, 0, 1" locus_id aa ;
List.iter column ~f:(fun indiv_aa ->
fprintf oc ", %d" (if Char.(indiv_aa = aa) then 1 else 0)
);
fprintf oc "%s_%c, 0, 1" locus_id aa;
List.iter column ~f:(fun indiv_aa ->
fprintf oc ", %d" (if Char.(indiv_aa = aa) then 1 else 0));
Out_channel.newline oc
let write_column_alignment_genotype oc al col =
let locus_id = sprintf "locus_%d" col in
let column = column_of_alignment al col in
let aminoacids = aa_of_column column in
let column = column_of_alignment al col in
let aminoacids = aa_of_column column in
List.iter aminoacids ~f:(write_genotype_line oc locus_id column)
let write_genotypes ~alignment ~output =
let write_genotypes ~alignment ~output =
Out_channel.with_file output ~f:(fun _oc ->
match Alignment.from_fasta alignment with
| Ok al ->
let ncols = Alignment.ncols al in
range_iter 0 ncols ~f:(write_column_alignment_genotype _oc al)
| Error _ ->
failwithf "could not open %s" alignment ()
)
let ncols = Alignment.ncols al in
range_iter 0 ncols ~f:(write_column_alignment_genotype _oc al)
| Error _ -> failwithf "could not open %s" alignment ())
module Result_file = struct
type item = {
index : int;
residue : char;
pval_lrt : float option;
pval_score : float option;
pval_wald : float option;
}
type t = item list
type pval_decoder = {
lrt : int option;
wald : int option;
score : int option;
}
let index_of_column header colname =
List.findi header ~f:(fun _ -> String.equal colname) |> Option.map ~f:fst
let pval_decoder_of_header header =
{
lrt = index_of_column header "p_lrt";
wald = index_of_column header "p_wald";
score = index_of_column header "p_score";
}
let pval_parser_max_field pvp =
let f x y =
match (x, y) with
| None, None -> None
| Some _, None -> x
| None, _ -> y
| Some x, Some y -> Some (Int.max x y)
in
None |> f pvp.lrt |> f pvp.wald |> f pvp.score
let parse_locus_id locus_id =
match String.split ~on:'_' locus_id with
| [ _;+ index; residue ] -> (Int.of_string index, Char.of_string residue)
| _ -> failwith "Locus ID parsing failed"
let parse_line pval_parser fields =
let fields = Array.of_list fields in
let fields_upper_bound b1 maybe_b2 =
Option.value_map maybe_b2 ~default:b1 ~f:(Int.max b1)
in
if
Array.length fields
< fields_upper_bound 2 (pval_parser_max_field pval_parser)
then failwith "parse_line: not enough fields";
let index, residue = parse_locus_id fields.(1) in
let get_pval = Option.map ~f:(fun i -> fields.(i) |> Float.of_string) in
let pval_wald = get_pval pval_parser.wald in
let pval_score = get_pval pval_parser.score in
let pval_lrt = get_pval pval_parser.lrt in
{ index; residue; pval_lrt; pval_score; pval_wald }
let of_file filename =
match
In_channel.read_lines filename |> List.map ~f:(String.split ~on:'\t')
with
| [] -> failwith "Result file is empty"
| header :: lines ->
let pvp = pval_decoder_of_header header in
List.map lines ~f:(parse_line pvp)
let to_result_table results ~site_aggregator =
let scores =
List.sort results ~compare:(fun it1 it2 ->
Int.compare it1.index it2.index)
|> List.group ~break:(fun l1 l2 -> l1.index = l2.index)
|> List.map ~f:site_aggregator
|> Array.of_list
in
{ Result_table.oracle = None; scores_per_meth = [ ("Gemma", scores) ] }
end
val write_phenotypes: newick:string -> output:string -> unit
val write_phenotypes : newick:string -> output:string -> unit
val write_genotypes: alignment:string -> output:string -> unit
\ No newline at end of file
val write_genotypes : alignment:string -> output:string -> unit
module Result_file : sig
type item = {
index : int;
residue : char;
pval_lrt : float option;
pval_score : float option;
pval_wald : float option;
}
type t = item list
val of_file : string -> t
val to_result_table :
t -> site_aggregator:(item list -> float option) -> Result_table.t
end
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment