From 09f3403761ec0c0b6009a6ea4a5c9fffa02b1315 Mon Sep 17 00:00:00 2001 From: Philippe Veber Date: Sun, 20 Dec 2020 12:35:27 +0100 Subject: [PATCH] Run: output candidate sites for each site --- lib/run.ml | 102 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 74 insertions(+), 28 deletions(-) diff --git a/lib/run.ml b/lib/run.ml index 4a4ac49..dd0df7d 100644 --- a/lib/run.ml +++ b/lib/run.ml @@ -1,33 +1,73 @@ open Core open Bistro open Bistro_utils +open File_formats -let candidate_site_report sites = +module Dataset = Dataset.New_API +module Pipeline = Detection_pipeline.Make(Dataset.Query) + +type detection_method = Detection_method of { + id : string ; + name : string ; + cpt_column_label : string ; + f : Dataset.Query.t -> cpt file ; +} + +let multinomial_method = Detection_method { + id = "multinomial" ; + name = "Multinomial" ; + cpt_column_label = "Multinomial_1mp" ; + f = Pipeline.multinomial_asymptotic_lrt ; + } + +let candidate_site_report (Detection_method dm) sites = let f = fun%workflow dest -> let module N = Codepitk.Note in let module DF = Codepitk.Dataframe in let module CS = Codepitk.Candidate_site in let sites = Array.of_list [%eval sites] in - let df = - DF.make [ - "Alignment ID", DF.String_opts (Array.map sites ~f:(fun s -> s.CS.alignment_id)) ; - "Position", DF.Int_opts (Array.map sites ~f:(fun s -> s.CS.pos)) ; - "Score", DF.Float_opts (Array.map sites ~f:(fun s -> s.CS.score)) ; - ] + let note_to_html note path = + N.to_html note path |> Rresult.R.failwith_error_msg in - let index = N.make ~title:"Candidate sites" N.[ - dataframe df ; - ] + let site_page_path i = sprintf "%s_%04d.html" dm.id i in + let site_page i s = + let path = Filename.concat dest (site_page_path i) in + let title = sprintf "%s candidate site #%d" dm.name i in + let contents = N.make ~title N.[ + text (sprintf "Alignment: %s" (Option.value ~default:"NA" s.CS.alignment_id)) ; + text (sprintf "Position: %s" (Option.value_map ~f:Int.to_string ~default:"NA" s.CS.pos)) ; + croquis (CS.draw s) ; + ] + in + note_to_html contents path ; + if i = 0 then Out_channel.with_file "delme.bin" ~f:(fun oc -> Marshal.to_channel oc s []) + in + let index = + let df = + DF.make [ + "Alignment ID", DF.String_opts (Array.map sites ~f:(fun s -> s.CS.alignment_id)) ; + "Position", DF.Int_opts (Array.map sites ~f:(fun s -> s.CS.pos)) ; + "Score", DF.Float_opts (Array.map sites ~f:(fun s -> s.CS.score)) ; + "Infos", DF.Strings (Array.(create ~len:(length sites) "")) ; + ] + |> Rresult.R.failwith_error_msg + in + let title = sprintf "Candidate sites for %s method" dm.name in + let formatters = Tyxml.Html.[ + "Infos", fun i _ -> a ~a:[a_href (site_page_path i)] [txt "Details"] + ] + in + N.make ~title N.[ + dataframe ~formatters df ; + ] in let path fn = Filename.concat dest fn in Unix.mkdir_p dest ; - N.to_html index (path "index.html") - |> Rresult.R.failwith_error_msg + note_to_html index (path "index.html") ; + Array.iteri sites ~f:site_page in - Workflow.path_plugin ~descr:"codepi.run.candidate_site_report" f - -module Dataset = Dataset.New_API + Workflow.path_plugin ~descr:"codepi.run.candidate_site_report" ~version:2 f type t = { tree_file : string ; @@ -35,6 +75,10 @@ type t = { convergent_species_file : string ; } +let detection_methods _run = [ + multinomial_method ; +] + let convergent_species_workflow run = [%workflow In_channel.read_lines @@ -53,23 +97,25 @@ let dataset run = let convergent_species = convergent_species_workflow run in Dataset.make ~tree ~nucleotide_alignments ~convergent_species -module Pipeline = Detection_pipeline.Make(Dataset.Query) - let repo run = + let detection_methods = detection_methods run in let d = dataset run in let q = List.hd_exn (Dataset.queries d) in - let multinomial_ranking = - Pipeline.ranking - ~query_descr:(fun q -> Some q.Dataset.Query.alignment_descr) - ~meth:Pipeline.multinomial_asymptotic_lrt - ~column_label:"Multinomial_1mp" - ~convergent_species:d.convergent_species - (Dataset.queries d) + let foreach_detection_method (Detection_method meth as dm) = + let ranking = + Pipeline.ranking + ~query_descr:(fun q -> Some q.Dataset.Query.alignment_descr) + ~meth:meth.f + ~column_label:meth.cpt_column_label + ~convergent_species:d.convergent_species + (Dataset.queries d) + in + Repo.[ + item [meth.id ^ ".cpt"] (meth.f q) ; + item [meth.id ^ "_report"] (candidate_site_report dm ranking) ; + ] in - Repo.[ - item ["multinomial.cpt"] (Pipeline.multinomial_asymptotic_lrt q) ; - item ["multinomial_report"] (candidate_site_report multinomial_ranking) ; - ] + List.concat_map detection_methods ~f:foreach_detection_method let main ~tree_file ~alignment_dir ~convergent_species_file -- GitLab