open Core_kernel open Bistro open Codepi open Codepi.File_formats type dataset = { label : string ; tree : nhx file ; rooted : bool ; branch_scale : float ; ne_s : float * float ; } let besnard2009 = { label = "besnard2009" ; tree = Bistro.Workflow.input "data/besnard2009/besnard2009.nhx" ; rooted = true ; branch_scale = 10. ; ne_s = 4., 4. ; } let oneline_rodent = { label = "online_rodent" ; tree = Bistro.Workflow.input "data/online_rodent/online_rodent.nhx" ; rooted = true ; branch_scale = 10. ; ne_s = 4., 4. ; } let rubisco = { label = "rubisco" ; tree = Rubisco_dataset.(Path "data/rubisco" |> Query.tree ~branch_length_unit:`Amino_acid) ; rooted = false ; branch_scale = 10. ; ne_s = 4., 4. ; } let orthomam_echolocation = { label = "orthomam_echolocation" ; tree = ( Orthomam.tree_of_db ~branch_length_unit:`Amino_acid ~convergent_species:Orthomam.species_with_echolocation (Codepitk.Orthomam_db.make "omm") ) ; rooted = false ; branch_scale = 10. ; ne_s = 4., 4. ; } type detection_method = { result : Simulation_dataset.t -> text file ; col : int ; label : string ; requires_rooted_tree : bool ; } let meth ?(col = 1) ?(requires_rooted_tree = false) result label = { result ; col ; label ; requires_rooted_tree } let methods = Simulation_dataset.[ meth tdg09 "tdg09" ; meth pcoc ~col:3 "pcoc" ; (* meth pcoc_v2 ~col:3 "pcoc v2" ; *) meth (gemma ~lmm_test:`Score ~relatedness_mode:`Standardized) "gemma" ; meth inhouse_lmm "LMM" ; meth multinomial_asymptotic_lrt "multinomial" ; meth topological "topological" ~requires_rooted_tree:true ; ] let benchmark { tree = t ; rooted ; ne_s ; branch_scale ; _ } = let open Simulation_dataset in let sim = convdet_simulation ~seed:42 ~tree:(NHX t) ~branch_scale ~ne_s ~profiles:"example/aa_fitness/263SelectedProfiles.tsv" ~n_h0:900 ~n_ha:100 () in let results, labels = List.filter_map methods ~f:(fun m -> if not m.requires_rooted_tree || rooted then Some ((m.result sim, m.col), m.label) else None ) |> List.unzip in Utils.average_precision_plot ~oracle:(oracle sim) ~labels ~results let benchmark_rds ?(seed = 42) { tree = t ; rooted ; ne_s ; branch_scale ; _ } = let open Simulation_dataset in let param = Convdet_simulation_param.make ~seed ~tree:(NHX t) ~branch_scale ~ne_s ~profiles:"example/aa_fitness/263SelectedProfiles.tsv" ~n_h0:900 ~n_ha:100 () in let sim = convdet_simulation_of_param param in let simulation = Convdet_simulation_param.simulation param in let results, labels = List.filter_map methods ~f:(fun m -> if not m.requires_rooted_tree || rooted then Some ((m.result sim, m.col), m.label) else None ) |> List.unzip in benchmark_statistics simulation ~labels ~results let report = let module H = Bistro_utils.Html_report in H.make ~title:"Codepi benchmark" [ H.section "Besnard 2009 dataset" ; H.pdf (benchmark besnard2009) ; H.section "Rubisco dataset" ; H.pdf (benchmark rubisco) ; H.section "Rodent dataset" ; H.pdf (benchmark oneline_rodent) ; H.section "Orthomam/echolocation dataset" ; H.pdf (benchmark orthomam_echolocation) ; ] |> H.render let () = let open Bistro_utils.Repo in let datasets = [ besnard2009 ; rubisco ; oneline_rodent ; orthomam_echolocation ] in let repo = item ["report.html"] report :: List.map datasets ~f:(fun d -> item [d.label ^ ".rds"] (benchmark_rds d) ) in let loggers = [ Bistro_utils.Console_logger.create () ] in build_main ~loggers ~np:4 ~mem:(`GB 4) repo ~outdir:"res"