Commit 272e5d76 authored by Louis Duchemin's avatar Louis Duchemin
Browse files

Fixed missing sites in Gemma workflow result table

parent 473078f7
......@@ -22,14 +22,17 @@ let%pworkflow phenotype_of_tree nhx =
| Tree.Node n -> List1.fold_right n.branches ~init:acc ~f:branch
| Leaf _ -> condition :: acc
and branch (Tree.Branch b) acc =
node (Phylogenetics_convergence.Simulator.Branch_info.condition b.data) b.tip acc
node
(Phylogenetics_convergence.Simulator.Branch_info.condition b.data)
b.tip acc
in
node `Ancestral t []
in
let leaves = U.tree_from_file [%path nhx] |> collect_leaves in
let write_phenotypes leaves oc = List.iter leaves ~f:(fun cond ->
fprintf oc "%d\n" (match cond with `Ancestral -> 0 | `Convergent -> 1)
)
let write_phenotypes leaves oc =
List.iter leaves ~f:(fun cond ->
fprintf oc "%d\n"
(match cond with `Ancestral -> 0 | `Convergent -> 1))
in
Out_channel.with_file [%dest] ~f:(write_phenotypes leaves)
......@@ -57,7 +60,8 @@ let univariate_lmm ~lmm ~genotype ~phenotype ~relatedness_matrix =
cmd "cp" [ tmp // "result.assoc.txt"; dest ];
]
let template_of_mode m = int (match m with `Centered -> 1 | `Standardized -> 2)
let template_of_mode m =
int (match m with `Centered -> 1 | `Standardized -> 2)
let relatedness_filename = function
| `Centered -> "result.cXX.txt"
......@@ -76,10 +80,13 @@ let calculate_relatedness_matrix ~mode ~genotype ~phenotype =
cmd "cp" [ tmp // relatedness_filename mode; dest ];
]
let%pworkflow[@version 3] result_table_of_output gemma_output =
let%pworkflow[@version 3] result_table_of_output alignment gemma_output =
let module R = Reviewphiltrans_toolbox.Gemma.Result_file in
match R.of_file [%path gemma_output] with
| Error msg -> failwith msg
| Ok result_file ->
R.to_result_table result_file ~site_aggregator:R.min_pvalue_aggregator
R.to_result_table result_file ~alignment:[%path alignment]
~site_aggregator:R.min_pvalue_aggregator
|> Result.map_error ~f:Phylogenetics.Alignment.show_parsing_error
|> Result.ok_or_failwith
|> Reviewphiltrans_toolbox.Result_table.to_file ~output:[%dest]
......@@ -32,6 +32,5 @@ val univariate_lmm :
relatedness_matrix:relatedness_matrix file ->
univariate_lmm_output file
val result_table_of_output :
univariate_lmm_output file ->
text file
\ No newline at end of file
val result_table_of_output :
aminoacid_fasta file -> univariate_lmm_output file -> text file
(library
(name reviewphiltrans_toolbox)
(libraries biotk biocaml.ez ocaml-r.graphics ocaml-r.grDevices phylogenetics phylogenetics.convergence)
(libraries biotk biocaml.ez ocaml-r.graphics ocaml-r.grDevices phylogenetics
phylogenetics.convergence)
(inline_tests
(deps ../../tests/data/gemma_output.tsv))
(deps ../../tests/data/gemma_output.tsv ../../tests/data/gemma_alignment.fa))
(preprocess
(pps ppx_jane ppx_inline_test)))
......@@ -142,16 +142,23 @@ module Result_file = struct
Pval_decoder.of_header header >>| fun pvp ->
List.map lines ~f:(parse_line pvp)
let to_result_table results ~site_aggregator =
let scores =
let alignment_length path =
let open Result.Monad_infix in
Alignment.from_fasta path >>| Alignment.ncols
let to_result_table results ~alignment ~site_aggregator =
let open Result in
alignment_length alignment >>| fun nsites ->
let scores_by_index =
List.sort results ~compare:(fun it1 it2 ->
Int.compare it1.index it2.index)
|> List.group ~break:(fun l1 l2 -> l1.index <> l2.index)
|> List.map ~f:(function
| [] -> assert false
| h :: t -> site_aggregator (List1.cons h t) |> Option.some)
|> Array.of_list
| h :: t -> (h.index, site_aggregator (List1.cons h t)))
|> Int.Table.of_alist_exn
in
let scores = Array.init nsites ~f:(Int.Table.find scores_by_index) in
{ Result_table.oracle = None; scores_per_meth = [ ("Gemma", scores) ] }
let min_pvalue_aggregator xs =
......@@ -162,21 +169,15 @@ module Result_file = struct
let%test "gemma output parsing" =
Result.is_ok @@ of_file "../../tests/data/gemma_output.tsv"
let%test "gemma output parsing preserves indices" =
match of_file "../../tests/data/gemma_output.tsv" with
| Error _ -> false
| Ok table -> (
range_find 0 103 ~f:(fun i ->
not @@ List.exists table ~f:(fun item -> item.index = i))
|> function
| None -> true
| Some i -> failwithf "%d is missing" i () )
let%test "gemma output aggregation" =
match of_file "../../tests/data/gemma_output.tsv" with
| Error _ -> false
| Ok table ->
Result_table.nrows
(to_result_table table ~site_aggregator:min_pvalue_aggregator)
= 103
let open Result.Monad_infix in
Poly.equal
( to_result_table table
~alignment:"../../tests/data/gemma_alignment.fa"
~site_aggregator:min_pvalue_aggregator
>>| Result_table.nrows )
(Ok 103)
end
......@@ -20,7 +20,10 @@ module Result_file : sig
val of_file : string -> (t, string) result
val to_result_table :
t -> site_aggregator:(item List1.t -> float) -> Result_table.t
t ->
alignment:string ->
site_aggregator:(item List1.t -> float) ->
(Result_table.t, [> Alignment.parsing_error ]) result
val min_pvalue_aggregator : item List1.t -> float
end
>A0
YCIGIHYILDINIFSDITDLIYVFLSTHCSLFMQSLIIYSSYDFSAYKIDTLIAIILKESTLICVHGLMMASDITLNGSPDSLAGLSHCVSFSYLNVFSCSKQ
>C0
YPAIIHVIMLSNIMFSAFIYYCVFPRPHSLTGMMTLNPKHIKDDVVGNSNNLFISMLISFLVTYCGLLGCEYINLANAYPLNYFVLSHLVASDDLNPPVMCVI
>A1
FTKDIHGIMSINIYLSIRLFCFNSIRHHKSSPYVLIARSESIDIVGNHAKGLFISYQINAYGKVTGYLEYWVIIQLRFSYFDTVFLSRANFLSSLIMGFMSYV
>C1
LALIIHMILSKPIYLIDVFLCFLFLSLPYNIFRLPSVRCHPVDPSFLSCSLLINSMMYKSYLDCTGTRAIGSIDYSRMQIMMLNIDTRAKLCSDLPMAMMAVN
>A2
TNFSIHFGMPLLIMIIDRYAAKLFLRFGKNSFMSPHIFCIASEYKYLVSNDYICALLDKDYDDILIYLYYCLIKKKNFYVMNLTVLALLVRVSGLGPHAPSNY
>C2
CHALIHVVLTINILADKRILHSVFLSAKIIIVLILTVRIGITDQIFNLLIYLILSLLSISYFTYTWYYLSAQINTSNNSFLTYWPLTRSQKEHDVCVVHMNVF
>A3
TCLSIHNITDINIPEIKRSLNMDSPSHLQTRPYCVTFLCHESSFLLNFSQLYANIMLNHSQLHQVLIYYYEQIKYYDSLFSYLYVLLRLVLHLHLALNMVSKF
>C3
GWAHIHVILDNTILLSLRVYFSENLSIYKIVKFSTRSRGHSTVTSMPQTNYYFPAMLIMQYLDQLVYLGFEQLWQLRFSISLLIQLYISNSTVELILFCFNKI
>A4
TGAFIHVNSWINIVATIRIYFMVDYRWHANQFYTLHSRSHFVSILSHNCELLINPMLILRLLDTAFLLFYGRISPLNFSQLKTKVTVGATWIHELSMSFFSLQ
>C4
TAFPIHLILLINTSQNCRIYYSVFLRVFYSAAFRLCARSHSFDCSYDHIPLFINAMLDAQLLYCLNYFYDGATFYVDIIMLTATVFVASLKFTHLMTNMFNVG
>A5
TRLLSHMNLDILIPWHGRTFYSVCLSTNYTAQYKLRARYHFLDSVVGKIYLTINTELIKYFVLPTVFLLAEYAPYYLFVFICLPRFKLRQYLHGLLYMKMSCK
>C5
SCASIHMILIILPFYLIRILSFVPLSIWMTPPLTMGIRHHAEDPVFILPSYSILIMLIRQELYYFWIDLLFSSWFPSQAVFYLIWFARDQENHDLIVITLDLF
>A6
SRKYIHVSLECNPGCSIRVSYLVFLSHVCICQMMLGARYHSVDFQQLGAHYYRNDMLIIIYRLEVCYAYCCKDKMLNVVILSLHIISLAQAVGELVMFYMSAT
>C6
TYFYIHMILSCNIPFFCRILYYVRLSTLMCIGYQMDVRYPGAVFSLLQSKYLVNIMLIQYCLKECHFLYGSTTGYDCFSFEILSVLYIILRTIISLLFMFNFH
>A7
TNAIIHMILGINTYLTYRYYSFELLRKAYTTIIVSISIGHILDIVFLMSSYLINTGLIRSSDKYLWYLYQGYISLLYMTWNIYNVLVLLTFEVDLPLWFGSRQ
>C7
TSSIIHLILIINILEIIRIFIYFSLSVFWNKFVSLTVRYHIVKFVSCFSVLFINSFLIHIQVDCKWYYYRGQIVFKRWCYFHLNSPWMYVFTSELTMFFFLLG
>A8
ITAMIHMILDINIPLSFRNLYGRFPSVLMNVSPVLTSRSHISAFGFHWLSYLFSALLILSQFLKTLCLCCATICTYSIVWMIYHHLTKANYSSELTLISFSNY
>C8
TSFIIHYALMFNTVLHIRILFVFFLRSAIFNCLCSGSRSHFSDAVLLSSLLYLRAALILSQDFSYLYFTFTLICYAGFVFLRLKFYYRRHIVADLNMFFAFKH
>A9
TCSIIHLILSWLITRSNRCFITCFLSGLNTSFLCLGSMAHIPRDEIHNSMLYCNSILISSALKMSQYSTYSLIITDCFSMECFAVTYRFAKAWILFPPFWEWP
>C9
SSLIIHVSNSCNIFASARVLCCPFLSHFYLVLGSLRKRCHGIVYVFVLFNLFYHAMKIASLVLYAWLFRGLLIIFGCFTILYFSVLHSWVYMSDLILYFFRVF
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment