lmm_benchmark.ml 3.62 KB
Newer Older
Philippe Veber's avatar
Philippe Veber committed
1 2
open Core_kernel
open Bistro
3
open Codepi
Philippe Veber's avatar
Philippe Veber committed
4
open Codepi.File_formats
5

Philippe Veber's avatar
Philippe Veber committed
6
type dataset = {
7
  label : string ;
Philippe Veber's avatar
Philippe Veber committed
8 9 10 11 12
  tree : nhx file ;
  rooted : bool ;
  branch_scale : float ;
  ne_s : float * float ;
}
13

Philippe Veber's avatar
Philippe Veber committed
14
let besnard2009 = {
15
  label = "besnard2009" ;
Philippe Veber's avatar
Philippe Veber committed
16 17
  tree = Bistro.Workflow.input "data/besnard2009/besnard2009.nhx" ;
  rooted = true ;
18
  branch_scale = 1. ;
19
  ne_s = 4., 4. ;
Philippe Veber's avatar
Philippe Veber committed
20 21 22
}

let oneline_rodent = {
23
  label = "online_rodent" ;
Philippe Veber's avatar
Philippe Veber committed
24 25
  tree = Bistro.Workflow.input "data/online_rodent/online_rodent.nhx"  ;
  rooted = true ;
26
  branch_scale = 1. ;
27
  ne_s = 4., 4. ;
Philippe Veber's avatar
Philippe Veber committed
28 29 30
}

let rubisco = {
31
  label = "rubisco" ;
Philippe Veber's avatar
Philippe Veber committed
32 33
  tree = Rubisco_dataset.(Path "data/rubisco" |> Query.tree ~branch_length_unit:`Amino_acid) ;
  rooted = false ;
34
  branch_scale = 1. ;
35
  ne_s = 4., 4. ;
Philippe Veber's avatar
Philippe Veber committed
36 37 38
}

let orthomam_echolocation = {
39
  label = "orthomam_echolocation" ;
Philippe Veber's avatar
Philippe Veber committed
40 41 42 43 44 45 46
  tree = (
    Orthomam.tree_of_db
      ~branch_length_unit:`Amino_acid
      ~convergent_species:Orthomam.species_with_echolocation
      (Codepitk.Orthomam_db.make "omm")
  ) ;
  rooted = false ;
47
  branch_scale = 1. ;
Philippe Veber's avatar
Philippe Veber committed
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
  ne_s = 4., 4. ;
}

type detection_method = {
  result : Simulation_dataset.t -> text file ;
  col : int ;
  label : string ;
  requires_rooted_tree : bool ;
}

let meth ?(col = 1) ?(requires_rooted_tree = false) result label =
  { result ; col ; label ; requires_rooted_tree }

let methods = Simulation_dataset.[
  meth tdg09 "tdg09" ;
  meth pcoc ~col:3 "pcoc" ;
64
  (* meth pcoc_v2 ~col:3 "pcoc v2" ; *)
Philippe Veber's avatar
Philippe Veber committed
65 66 67 68 69 70
  meth (gemma ~lmm_test:`Score ~relatedness_mode:`Standardized) "gemma" ;
  meth inhouse_lmm "LMM" ;
  meth multinomial_asymptotic_lrt "multinomial" ;
  meth topological "topological" ~requires_rooted_tree:true ;
]

71
let benchmark { tree = t ; rooted ; ne_s ; branch_scale ; _ } =
Philippe Veber's avatar
Philippe Veber committed
72
  let open Simulation_dataset in
73
  let sim =
74
    convdet_simulation ~seed:42 ~tree:(NHX t) ~branch_scale ~ne_s
75
      ~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
Philippe Veber's avatar
Philippe Veber committed
76 77 78 79 80 81 82 83 84
      ~n_h0:900 ~n_ha:100 ()
  in
  let results, labels =
    List.filter_map methods ~f:(fun m ->
        if not m.requires_rooted_tree || rooted then
          Some ((m.result sim, m.col), m.label)
        else None
      )
    |> List.unzip
85
  in
86
  Utils.average_precision_plot ~oracle:(oracle sim) ~labels ~results
Philippe Veber's avatar
Philippe Veber committed
87

88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
let benchmark_rds ?(seed = 42) { tree = t ; rooted ; ne_s ; branch_scale ; _ } =
  let open Simulation_dataset in
  let param =
    Convdet_simulation_param.make ~seed ~tree:(NHX t) ~branch_scale ~ne_s
      ~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
      ~n_h0:900 ~n_ha:100 ()
  in
  let sim = convdet_simulation_of_param param in
  let simulation = Convdet_simulation_param.simulation param in
  let results, labels =
    List.filter_map methods ~f:(fun m ->
        if not m.requires_rooted_tree || rooted then
          Some ((m.result sim, m.col), m.label)
        else None
      )
    |> List.unzip
  in
  benchmark_statistics simulation ~labels ~results

Philippe Veber's avatar
Philippe Veber committed
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
let report =
  let module H = Bistro_utils.Html_report in
  H.make ~title:"Codepi benchmark" [
    H.section "Besnard 2009 dataset" ;
    H.pdf (benchmark besnard2009) ;

    H.section "Rubisco dataset" ;
    H.pdf (benchmark rubisco) ;

    H.section "Rodent dataset" ;
    H.pdf (benchmark oneline_rodent) ;

    H.section "Orthomam/echolocation dataset" ;
    H.pdf (benchmark orthomam_echolocation) ;
  ]
  |> H.render

let () =
125 126 127 128 129 130 131 132
  let open Bistro_utils.Repo in
  let datasets = [ besnard2009 ; rubisco ; oneline_rodent ; orthomam_echolocation ] in
  let repo =
    item ["report.html"] report
    :: List.map datasets ~f:(fun d ->
        item [d.label ^ ".rds"] (benchmark_rds d)
      )
  in
133
  let loggers = [ Bistro_utils.Console_logger.create () ] in
134
  build_main ~loggers ~np:4 ~mem:(`GB 4) repo ~outdir:"res"