open Core_kernel open Bistro open Codepi open Codepi.File_formats module Pipeline = Simulation_pipeline.Mutsel type dataset = { label : string ; tree : nhx file ; rooted : bool ; branch_scale : float ; ne_s : float * float ; } let besnard2009 = { label = "besnard2009" ; tree = Bistro.Workflow.input "data/besnard2009/besnard2009.nhx" ; rooted = true ; branch_scale = 1. ; ne_s = 4., 4. ; } let oneline_rodent = { label = "online_rodent" ; tree = Bistro.Workflow.input "data/online_rodent/online_rodent.nhx" ; rooted = true ; branch_scale = 1. ; ne_s = 4., 4. ; } let rubisco = { label = "rubisco" ; tree = Rubisco_dataset.(Path "data/rubisco" |> Query.tree ~branch_length_unit:`Amino_acid) ; rooted = false ; branch_scale = 1. ; ne_s = 4., 4. ; } let orthomam_echolocation = { label = "orthomam_echolocation" ; tree = ( Orthomam.tree_of_db ~branch_length_unit:`Amino_acid ~convergent_species:Orthomam.species_with_echolocation (Codepitk.Orthomam_db.make "omm") ) ; rooted = false ; branch_scale = 1. ; ne_s = 4., 4. ; } type detection_method = { result : Pipeline.query -> cpt file ; label : string ; requires_rooted_tree : bool ; } let meth ?(requires_rooted_tree = false) result label = { result ; label ; requires_rooted_tree } let methods = Pipeline.[ meth tdg09 "tdg09" ; meth pcoc "pcoc" ; (* meth pcoc_v2 ~col:3 "pcoc v2" ; *) meth (gemma ~lmm_test:`Score ~relatedness_mode:`Standardized) "gemma" ; meth inhouse_lmm "LMM" ; meth multinomial_asymptotic_lrt "multinomial" ; meth topological "topological" ~requires_rooted_tree:true ; ] let benchmark_rds ?(seed = 42) { tree = t ; rooted ; ne_s ; branch_scale ; _ } = let q = Pipeline.query ~seed ~tree:(NHX t) ~branch_scale ~ne_s ~profiles:"example/aa_fitness/263SelectedProfiles.tsv" ~n_h0:900 ~n_ha:100 () in let simulation = Pipeline.simulation q in let results = List.filter_map methods ~f:(fun m -> if not m.requires_rooted_tree || rooted then Some (m.result q) else None ) in Pipeline.benchmark_statistics simulation ~results let () = let open Bistro_utils.Repo in let datasets = [ besnard2009 ; rubisco ; oneline_rodent ; orthomam_echolocation ] in let repo = List.map datasets ~f:(fun d -> item [d.label ^ ".rds"] (benchmark_rds d) ) in let loggers = [ Bistro_utils.Console_logger.create () ] in build_main ~loggers ~np:4 ~mem:(`GB 4) repo ~outdir:"res"