lmm_benchmark.ml 2.64 KB
Newer Older
Philippe Veber's avatar
Philippe Veber committed
1 2
open Core_kernel
open Bistro
3
open Codepi
Philippe Veber's avatar
Philippe Veber committed
4
open Codepi.File_formats
5

Philippe Veber's avatar
Philippe Veber committed
6 7 8 9 10 11
type dataset = {
  tree : nhx file ;
  rooted : bool ;
  branch_scale : float ;
  ne_s : float * float ;
}
12

Philippe Veber's avatar
Philippe Veber committed
13 14 15 16
let besnard2009 = {
  tree = Bistro.Workflow.input "data/besnard2009/besnard2009.nhx" ;
  rooted = true ;
  branch_scale = 3. ;
17
  ne_s = 4., 4. ;
Philippe Veber's avatar
Philippe Veber committed
18 19 20 21 22 23
}

let oneline_rodent = {
  tree = Bistro.Workflow.input "data/online_rodent/online_rodent.nhx"  ;
  rooted = true ;
  branch_scale = 3. ;
24
  ne_s = 4., 4. ;
Philippe Veber's avatar
Philippe Veber committed
25 26 27 28 29 30
}

let rubisco = {
  tree = Rubisco_dataset.(Path "data/rubisco" |> Query.tree ~branch_length_unit:`Amino_acid) ;
  rooted = false ;
  branch_scale = 3. ;
31
  ne_s = 4., 4. ;
Philippe Veber's avatar
Philippe Veber committed
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
}

let orthomam_echolocation = {
  tree = (
    Orthomam.tree_of_db
      ~branch_length_unit:`Amino_acid
      ~convergent_species:Orthomam.species_with_echolocation
      (Codepitk.Orthomam_db.make "omm")
  ) ;
  rooted = false ;
  branch_scale = 3. ;
  ne_s = 4., 4. ;
}

type detection_method = {
  result : Simulation_dataset.t -> text file ;
  col : int ;
  label : string ;
  requires_rooted_tree : bool ;
}

let meth ?(col = 1) ?(requires_rooted_tree = false) result label =
  { result ; col ; label ; requires_rooted_tree }

let methods = Simulation_dataset.[
  meth tdg09 "tdg09" ;
  meth pcoc ~col:3 "pcoc" ;
59
  (* meth pcoc_v2 ~col:3 "pcoc v2" ; *)
Philippe Veber's avatar
Philippe Veber committed
60 61 62 63 64 65 66 67
  meth (gemma ~lmm_test:`Score ~relatedness_mode:`Standardized) "gemma" ;
  meth inhouse_lmm "LMM" ;
  meth multinomial_asymptotic_lrt "multinomial" ;
  meth topological "topological" ~requires_rooted_tree:true ;
]

let benchmark { tree = t ; rooted ; ne_s ; branch_scale } =
  let open Simulation_dataset in
68
  let sim =
69
    convdet_simulation ~seed:3 ~tree:(NHX t) ~branch_scale ~ne_s
70
      ~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
Philippe Veber's avatar
Philippe Veber committed
71 72 73 74 75 76 77 78 79
      ~n_h0:900 ~n_ha:100 ()
  in
  let results, labels =
    List.filter_map methods ~f:(fun m ->
        if not m.requires_rooted_tree || rooted then
          Some ((m.result sim, m.col), m.label)
        else None
      )
    |> List.unzip
80
  in
81
  Utils.average_precision_plot ~oracle:(oracle sim) ~labels ~results
Philippe Veber's avatar
Philippe Veber committed
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101

let report =
  let module H = Bistro_utils.Html_report in
  H.make ~title:"Codepi benchmark" [
    H.section "Besnard 2009 dataset" ;
    H.pdf (benchmark besnard2009) ;

    H.section "Rubisco dataset" ;
    H.pdf (benchmark rubisco) ;

    H.section "Rodent dataset" ;
    H.pdf (benchmark oneline_rodent) ;

    H.section "Orthomam/echolocation dataset" ;
    H.pdf (benchmark orthomam_echolocation) ;
  ]
  |> H.render

let () =
  let w = report in
102 103 104
  let loggers = [ Bistro_utils.Console_logger.create () ] in
  Bistro_engine.Scheduler.simple_eval_exn ~loggers ~np:4 ~mem:(`GB 4) (Bistro.Workflow.path w)
  |> print_endline