convergence_detection.ml 7.51 KB
Newer Older
Carine Rey's avatar
Carine Rey committed
1
open Core
Philippe Veber's avatar
Philippe Veber committed
2 3
open Bistro.Shell_dsl
open Bistro
4
open File_formats
Carine Rey's avatar
Carine Rey committed
5

Philippe Veber's avatar
Philippe Veber committed
6
type result = [
Philippe Veber's avatar
Philippe Veber committed
7 8 9 10 11 12 13 14 15
  | `Pcoc of [`pcoc] directory
  | `Pcoc_gamma of [`pcoc] directory
  | `Pcoc_C60 of [`pcoc] directory
  | `Diffsel of [`diffsel] directory
  | `Identical_LG of [`identical] directory
  | `Identical_WAG of [`identical] directory
  | `Topological_LG of [`topological] directory
  | `Topological_WAG of [`topological] directory
  | `Tdg09 of [`tdg09] directory
16
  | `Multinomial of cpt file
Philippe Veber's avatar
Philippe Veber committed
17
  | `Msd of [`msd] directory * float
Philippe Veber's avatar
Philippe Veber committed
18
]
Carine Rey's avatar
Carine Rey committed
19

Philippe Veber's avatar
Philippe Veber committed
20 21 22
let meth_string_of_result = function
  | `Pcoc _ -> "pcoc"
  | `Pcoc_gamma _ -> "pcoc_gamma"
Carine Rey's avatar
Carine Rey committed
23
  | `Pcoc_C60 _ -> "pcoc_gamma"
Philippe Veber's avatar
Philippe Veber committed
24
  | `Diffsel _ -> "diffsel"
Carine Rey's avatar
Carine Rey committed
25 26
  | `Identical_LG _ -> "identical_LG"
  | `Identical_WAG _ -> "identical_WAG"
27 28
  | `Topological_LG _ -> "topological_LG"
  | `Topological_WAG _ -> "topological_WAG"
Carine Rey's avatar
typo  
Carine Rey committed
29
  | `Tdg09 _ -> "tdg09"
30
  | `Multinomial _ -> "multinomial"
31
  | `Msd (_, e) -> sprintf "msd_%f" e
Carine Rey's avatar
Carine Rey committed
32 33 34 35

type dataset_res = {
  model_prefix : string ;
  tree_prefix : string ;
Carine Rey's avatar
Carine Rey committed
36
  dataset : Dataset.t ;
Philippe Veber's avatar
Philippe Veber committed
37
  res_by_tools: result list ;
38
  merged_results : cpt file ;
Philippe Veber's avatar
Philippe Veber committed
39
  plot_merged_results : svg file ;
Philippe Veber's avatar
Philippe Veber committed
40
}
Carine Rey's avatar
Carine Rey committed
41

42
let merge_results ?fna_infos ~(res_by_tools : result list) () : cpt file =
Carine Rey's avatar
Carine Rey committed
43
  let command = List.map res_by_tools ~f:(fun res ->
Philippe Veber's avatar
Philippe Veber committed
44
      let w = match res with
Carine Rey's avatar
Carine Rey committed
45
        | `Pcoc d -> Pcoc.results d
Philippe Veber's avatar
Philippe Veber committed
46
        | `Pcoc_gamma d -> Pcoc.results d
Carine Rey's avatar
Carine Rey committed
47
        | `Pcoc_C60 d -> Pcoc.results d
Philippe Veber's avatar
Philippe Veber committed
48
        | `Diffsel d -> Diffsel.selector d
Carine Rey's avatar
Carine Rey committed
49 50
        | `Identical_LG d -> Identical.results d
        | `Identical_WAG d -> Identical.results d
51 52
        | `Topological_LG d -> Topological.results d
        | `Topological_WAG d -> Topological.results d
Philippe Veber's avatar
Philippe Veber committed
53
        | `Tdg09 d -> Tdg09.results d
54
        | `Multinomial d -> d
Philippe Veber's avatar
Philippe Veber committed
55
        | `Msd (d, _) -> Msd.results d
Philippe Veber's avatar
Philippe Veber committed
56 57 58 59
      in
      let opt = match res with
        | `Pcoc _ -> string "--pcoc"
        | `Pcoc_gamma _ -> string "--pcoc_gamma"
Carine Rey's avatar
Carine Rey committed
60
        | `Pcoc_C60 _ -> string "--pcoc_C60"
Philippe Veber's avatar
Philippe Veber committed
61
        | `Diffsel _ -> string "--diffsel"
Carine Rey's avatar
Carine Rey committed
62 63
        | `Identical_LG _ -> string "--identical_LG"
        | `Identical_WAG _ -> string "--identical_WAG"
64 65 66
        | `Topological_LG _ -> string "--topological_LG"
        | `Topological_WAG _ -> string "--topological_WAG"
        | `Tdg09 _ -> string "--tdg09"
67
        | `Multinomial _ -> string "--multinomial"
Philippe Veber's avatar
Philippe Veber committed
68
        | `Msd (_, e) -> string (sprintf "--msd %f" e)
Philippe Veber's avatar
Philippe Veber committed
69 70 71
      in
      seq ~sep:" " [opt; dep w]
    )
Carine Rey's avatar
Carine Rey committed
72
  in
Philippe Veber's avatar
Philippe Veber committed
73 74
  Workflow.shell ~descr:"convergence_detection.merge_results" ~img:Env.env_py [
    cmd "python" [
75 76 77
      file_dump (string Scripts.merge_det_results) ;
      opt "-o" ident dest ;
      seq ~sep:" " command ;
78 79 80 81
      option (opt "--fna_infos" dep) fna_infos;
    ] ;
  ]

82
let merge_result_tables ?fna_infos ?oracle ?multinomial ?tdg09 ?identical ?topological ?pcoc ?pcoc_v2 ?pcoc_pcp ?diffsel ?diffseldsparse () : cpt file =
Philippe Veber's avatar
Philippe Veber committed
83 84
  Workflow.shell ~descr:"convergence_detection.merge_results" ~img:Env.env_py [
    cmd "python" [
85 86 87
      file_dump (string Scripts.merge_det_results) ;
      opt "-o" ident dest ;
      option (opt "--multinomial" dep) multinomial ;
88
      option (opt "--tdg09" dep) tdg09 ;
89 90
      option (opt "--identical_LG" dep) identical ;
      option (opt "--topological_LG" dep) topological ;
91 92
      option (opt "--pcoc_v2" dep) pcoc_v2 ;
      option (opt "--pcoc_pcp" dep) pcoc_pcp ;
Philippe Veber's avatar
Philippe Veber committed
93
      option (opt "--pcoc" dep) pcoc ;
94
      option (opt "--diffsel" dep) diffsel ;
95
      option (opt "--diffseldsparse" dep) diffseldsparse ;
96 97
      option (opt "--oracle" dep) oracle ;
      option (opt "--fna_infos" dep) fna_infos;
98
    ] ;
Carine Rey's avatar
Carine Rey committed
99 100
  ]

Philippe Veber's avatar
Philippe Veber committed
101
let plot_merge_results ?t_choices ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv (): svg file =
102
  let img = Env.env_pcoc in
Carine Rey's avatar
Carine Rey committed
103 104
  (* use of pcoc env due to its working X server for dra plot with ete3 *)
  let meths = List.map res_by_tools ~f:(fun res ->
105 106 107 108 109 110 111 112 113
      let opt = match res with
        | `Pcoc _ -> "PCOC,PC,OC"
        | `Pcoc_gamma _ -> "PCOC_gamma,PC_gamma,OC_gamma,"
        | `Pcoc_C60 _ -> "PCOC_C60,PC_C60,OC_C60,"
        | `Diffsel _ -> "Diffsel_mean,Diffsel_max"
        | `Identical_LG _ -> "Identical_LG08"
        | `Identical_WAG _ -> "Identical_WAG01"
        | `Topological_LG _ -> "Topological_LG08"
        | `Topological_WAG _ -> "Topological_WAG01"
Carine Rey's avatar
Carine Rey committed
114
        | `Tdg09 _ -> "Tdg09_1MinusFDR,Tdg09_1MinusLRT,Tdg09_prob_post"
Carine Rey's avatar
Carine Rey committed
115
        | `Multinomial _ -> "Mutinomial_1MinusLRT"
Carine Rey's avatar
Carine Rey committed
116
        | `Msd _ -> "Msd_0.05_1MinusP"
117 118 119
      in
      string opt
    ) |> seq ~sep:","
Carine Rey's avatar
Carine Rey committed
120
  in
121
  let default_t = List.map res_by_tools ~f:(fun res ->
122
      let opt = match res with
LANORE Vincent's avatar
LANORE Vincent committed
123 124 125 126 127 128 129 130 131 132 133
        | `Pcoc _ -> "PCOC:0,PC:0,OC:0"
        | `Pcoc_gamma _ -> "PCOC_gamma:0,PC_gamma:0,OC_gamma:0"
        | `Pcoc_C60 _ -> "PCOC_C60:0,PC_C60:0,OC_C60:0"
        | `Diffsel _ -> "Diffsel_mean:0,Diffsel_max:0"
        | `Identical_LG _ -> "Identical_LG08:0"
        | `Identical_WAG _ -> "Identical_WAG01:0"
        | `Topological_LG _ -> "Topological_LG08:0"
        | `Topological_WAG _ -> "Topological_WAG01:0"
        | `Tdg09 _ -> "Tdg09_1MinusFDR:0,Tdg09_prob_post:0,Tdg09_1MinusLRT:0"
        | `Multinomial _ -> "Mutinomial_1MinusLRT:0"
        | `Msd _ -> "Msd_0.05_1MinusP:0"
Carine Rey's avatar
Carine Rey committed
134

135 136 137
      in
      string opt
    ) |> seq ~sep:","
138
  in
139 140 141 142
  let meths_t = match t_choices with
    | Some _ -> None
    | None -> Some default_t
  in
Carine Rey's avatar
Carine Rey committed
143
  let out = dest // "results.svg" in
Philippe Veber's avatar
Philippe Veber committed
144
  let inner =
Philippe Veber's avatar
Philippe Veber committed
145
    Workflow.shell ~descr:"convergence_detection.plot_results" ~img [
146 147
      and_list [
        mkdir_p dest ;
Philippe Veber's avatar
Philippe Veber committed
148

149 150
        cmd "python" [
          Utils.script_dump Scripts.[ diffsel_script_utils ; plot_data ; plot_convergent_sites ] ;
Philippe Veber's avatar
Philippe Veber committed
151 152
          opt "-msa" dep faa ;
          opt "-tsv" dep tsv ;
153
          opt "-tree" dep tree ;
Philippe Veber's avatar
Philippe Veber committed
154 155 156 157
          opt "-out" ident out ;
          opt "-meth" ident meths ;
          option (opt "-t" ident) meths_t ;
          option (opt "--t_tsv" dep) t_choices ;
158 159 160
          flag string "--all_sites" plot_all_sites ;
        ]
      ]
Philippe Veber's avatar
Philippe Veber committed
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
    ]
  in
  Workflow.select inner ["results.svg"]

let plot_convergent_sites ?(plot_all_sites = true) ~alignment ~detection_results ~tree () =
  Workflow.shell ~descr:"plot_convergent_sites.py" ~img:Env.env_pcoc [
    and_list [
      mkdir_p dest ;
      cmd "python" [
        Utils.script_dump Scripts.[ diffsel_script_utils ; plot_data ; plot_convergent_sites ] ;
        opt "-tsv" dep detection_results ;
        opt "-msa" dep alignment ;
        opt "-tree" dep tree ;
        opt "-out" ident (dest // "plot.svg") ;
        flag string "--all_sites" plot_all_sites ;
      ]
    ]
178 179
  ]
  |> Fn.flip Workflow.select ["plot.svg"]
Philippe Veber's avatar
Philippe Veber committed
180 181

let recall_precision_curve table =
182
  let img = [ docker_image ~account:"pveber" ~name:"r_basics" ~tag:"20190710" () ] in
Philippe Veber's avatar
Philippe Veber committed
183 184
  Workflow.shell ~descr:"recall_precision_curve" ~img [
    cmd "Rscript" [
Philippe Veber's avatar
Philippe Veber committed
185 186 187 188 189 190
      file_dump (string Scripts.recall_precision_curve) ;
      dep table ;
      dest ;
    ] ;
  ]

Philippe Veber's avatar
Philippe Veber committed
191 192 193 194 195 196 197 198 199 200 201 202
let recall_precision_auc_table table =
  let f = fun%workflow () ->
    let module RT = Codepitk.Result_table in
    let { RT.oracle ; scores_per_meth } = RT.of_file [%path table] in
    let labels = Option.value_exn oracle in
    List.map scores_per_meth ~f:(fun (meth, scores) ->
        let scores = Array.filter_opt scores in
        let _, auc = Biocaml_unix.Bin_pred.recall_precision_curve ~labels ~scores in
        meth, auc
      )
  in
  Workflow.plugin ~descr:"convergence_detection.recall_precision_curve" f
203

Philippe Veber's avatar
Philippe Veber committed
204 205 206 207 208 209 210 211 212 213
let oracle ~n_h0 ~n_ha =
  let f = fun%workflow dest ->
    let n_h0 = [%param n_h0] in
    let n_ha = [%param n_ha] in
    "Sites\tOracle"
    :: (List.init n_h0 ~f:(fun i -> sprintf "%d\t0" (i + 1)))
    @ (List.init n_ha ~f:(fun i -> sprintf "%d\t1" (n_h0 + i + 1)))
    |> Out_channel.write_lines dest
  in
  Workflow.path_plugin ~descr:"convergence_detection.oracle" f