convergence_detection.ml 7.82 KB
Newer Older
Carine Rey's avatar
Carine Rey committed
1
open Core
Philippe Veber's avatar
Philippe Veber committed
2 3
open Bistro.Shell_dsl
open Bistro
Carine Rey's avatar
Carine Rey committed
4

Philippe Veber's avatar
Philippe Veber committed
5
type result = [
Philippe Veber's avatar
Philippe Veber committed
6 7 8 9 10 11 12 13 14
  | `Pcoc of [`pcoc] dworkflow
  | `Pcoc_gamma of [`pcoc] dworkflow
  | `Pcoc_C60 of [`pcoc] dworkflow
  | `Diffsel of [`diffsel] dworkflow
  | `Identical_LG of [`identical] dworkflow
  | `Identical_WAG of [`identical] dworkflow
  | `Topological_LG of [`topological] dworkflow
  | `Topological_WAG of [`topological] dworkflow
  | `Tdg09 of [`tdg09] dworkflow
15
  | `Multinomial of text_file pworkflow
Philippe Veber's avatar
Philippe Veber committed
16
  | `Msd of [`msd] dworkflow * float
Philippe Veber's avatar
Philippe Veber committed
17
]
Carine Rey's avatar
Carine Rey committed
18

Philippe Veber's avatar
Philippe Veber committed
19 20 21
let meth_string_of_result = function
  | `Pcoc _ -> "pcoc"
  | `Pcoc_gamma _ -> "pcoc_gamma"
Carine Rey's avatar
Carine Rey committed
22
  | `Pcoc_C60 _ -> "pcoc_gamma"
Philippe Veber's avatar
Philippe Veber committed
23
  | `Diffsel _ -> "diffsel"
Carine Rey's avatar
Carine Rey committed
24 25
  | `Identical_LG _ -> "identical_LG"
  | `Identical_WAG _ -> "identical_WAG"
26 27
  | `Topological_LG _ -> "topological_LG"
  | `Topological_WAG _ -> "topological_WAG"
Carine Rey's avatar
typo  
Carine Rey committed
28
  | `Tdg09 _ -> "tdg09"
29
  | `Multinomial _ -> "multinomial"
30
  | `Msd (_, e) -> sprintf "msd_%f" e
Carine Rey's avatar
Carine Rey committed
31 32 33 34

type dataset_res = {
  model_prefix : string ;
  tree_prefix : string ;
Carine Rey's avatar
Carine Rey committed
35
  dataset : Dataset.t ;
Philippe Veber's avatar
Philippe Veber committed
36
  res_by_tools: result list ;
Philippe Veber's avatar
Philippe Veber committed
37 38
  merged_results : text_file pworkflow ;
  plot_merged_results : svg pworkflow ;
Philippe Veber's avatar
Philippe Veber committed
39
}
Carine Rey's avatar
Carine Rey committed
40

41
let merge_results ?fna_infos ~(res_by_tools : result list) () : text_file pworkflow =
Carine Rey's avatar
Carine Rey committed
42
  let command = List.map res_by_tools ~f:(fun res ->
Philippe Veber's avatar
Philippe Veber committed
43
      let w = match res with
Carine Rey's avatar
Carine Rey committed
44
        | `Pcoc d -> Pcoc.results d
Philippe Veber's avatar
Philippe Veber committed
45
        | `Pcoc_gamma d -> Pcoc.results d
Carine Rey's avatar
Carine Rey committed
46
        | `Pcoc_C60 d -> Pcoc.results d
Philippe Veber's avatar
Philippe Veber committed
47
        | `Diffsel d -> Diffsel.selector d
Carine Rey's avatar
Carine Rey committed
48 49
        | `Identical_LG d -> Identical.results d
        | `Identical_WAG d -> Identical.results d
50 51 52
        | `Topological_LG d -> Topological.results d
        | `Topological_WAG d -> Topological.results d
        | `Tdg09 d -> Tamuri.results d
53
        | `Multinomial d -> d
Philippe Veber's avatar
Philippe Veber committed
54
        | `Msd (d, _) -> Msd.results d
Philippe Veber's avatar
Philippe Veber committed
55 56 57 58
      in
      let opt = match res with
        | `Pcoc _ -> string "--pcoc"
        | `Pcoc_gamma _ -> string "--pcoc_gamma"
Carine Rey's avatar
Carine Rey committed
59
        | `Pcoc_C60 _ -> string "--pcoc_C60"
Philippe Veber's avatar
Philippe Veber committed
60
        | `Diffsel _ -> string "--diffsel"
Carine Rey's avatar
Carine Rey committed
61 62
        | `Identical_LG _ -> string "--identical_LG"
        | `Identical_WAG _ -> string "--identical_WAG"
63 64 65
        | `Topological_LG _ -> string "--topological_LG"
        | `Topological_WAG _ -> string "--topological_WAG"
        | `Tdg09 _ -> string "--tdg09"
66
        | `Multinomial _ -> string "--multinomial"
Philippe Veber's avatar
Philippe Veber committed
67
        | `Msd (_, e) -> string (sprintf "--msd %f" e)
Philippe Veber's avatar
Philippe Veber committed
68 69 70
      in
      seq ~sep:" " [opt; dep w]
    )
Carine Rey's avatar
Carine Rey committed
71
  in
Philippe Veber's avatar
Philippe Veber committed
72 73
  Workflow.shell ~descr:"convergence_detection.merge_results" [
    cmd "python" ~img:Env.env_py [
74 75 76
      file_dump (string Scripts.merge_det_results) ;
      opt "-o" ident dest ;
      seq ~sep:" " command ;
77 78 79 80
      option (opt "--fna_infos" dep) fna_infos;
    ] ;
  ]

Philippe Veber's avatar
Philippe Veber committed
81
let merge_result_tables ?fna_infos ?oracle ?multinomial ?tdg09 ?identical ?topological ?pcoc ?diffsel ?diffseldsparse () : text_file pworkflow =
82 83 84 85 86
  Workflow.shell ~descr:"convergence_detection.merge_results" [
    cmd "python" ~img:Env.env_py [
      file_dump (string Scripts.merge_det_results) ;
      opt "-o" ident dest ;
      option (opt "--multinomial" dep) multinomial ;
87
      option (opt "--tdg09" dep) tdg09 ;
88 89
      option (opt "--identical_LG" dep) identical ;
      option (opt "--topological_LG" dep) topological ;
Philippe Veber's avatar
Philippe Veber committed
90
      option (opt "--pcoc" dep) pcoc ;
91
      option (opt "--diffsel" dep) diffsel ;
92
      option (opt "--diffseldsparse" dep) diffseldsparse ;
93 94
      option (opt "--oracle" dep) oracle ;
      option (opt "--fna_infos" dep) fna_infos;
95
    ] ;
Carine Rey's avatar
Carine Rey committed
96 97
  ]

Philippe Veber's avatar
Philippe Veber committed
98 99
let plot_merge_results ?t_choices ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv (): svg pworkflow =
  let img = Pcoc.img in
Carine Rey's avatar
Carine Rey committed
100 101
  (* use of pcoc env due to its working X server for dra plot with ete3 *)
  let meths = List.map res_by_tools ~f:(fun res ->
102 103 104 105 106 107 108 109 110
      let opt = match res with
        | `Pcoc _ -> "PCOC,PC,OC"
        | `Pcoc_gamma _ -> "PCOC_gamma,PC_gamma,OC_gamma,"
        | `Pcoc_C60 _ -> "PCOC_C60,PC_C60,OC_C60,"
        | `Diffsel _ -> "Diffsel_mean,Diffsel_max"
        | `Identical_LG _ -> "Identical_LG08"
        | `Identical_WAG _ -> "Identical_WAG01"
        | `Topological_LG _ -> "Topological_LG08"
        | `Topological_WAG _ -> "Topological_WAG01"
Carine Rey's avatar
Carine Rey committed
111
        | `Tdg09 _ -> "Tdg09_1MinusFDR,Tdg09_1MinusLRT,Tdg09_prob_post"
Carine Rey's avatar
Carine Rey committed
112
        | `Multinomial _ -> "Mutinomial_1MinusLRT"
Carine Rey's avatar
Carine Rey committed
113
        | `Msd _ -> "Msd_0.05_1MinusP"
114 115 116
      in
      string opt
    ) |> seq ~sep:","
Carine Rey's avatar
Carine Rey committed
117
  in
118
  let default_t = List.map res_by_tools ~f:(fun res ->
119
      let opt = match res with
LANORE Vincent's avatar
LANORE Vincent committed
120 121 122 123 124 125 126 127 128 129 130
        | `Pcoc _ -> "PCOC:0,PC:0,OC:0"
        | `Pcoc_gamma _ -> "PCOC_gamma:0,PC_gamma:0,OC_gamma:0"
        | `Pcoc_C60 _ -> "PCOC_C60:0,PC_C60:0,OC_C60:0"
        | `Diffsel _ -> "Diffsel_mean:0,Diffsel_max:0"
        | `Identical_LG _ -> "Identical_LG08:0"
        | `Identical_WAG _ -> "Identical_WAG01:0"
        | `Topological_LG _ -> "Topological_LG08:0"
        | `Topological_WAG _ -> "Topological_WAG01:0"
        | `Tdg09 _ -> "Tdg09_1MinusFDR:0,Tdg09_prob_post:0,Tdg09_1MinusLRT:0"
        | `Multinomial _ -> "Mutinomial_1MinusLRT:0"
        | `Msd _ -> "Msd_0.05_1MinusP:0"
Carine Rey's avatar
Carine Rey committed
131

132 133 134
      in
      string opt
    ) |> seq ~sep:","
135
  in
136 137 138 139
  let meths_t = match t_choices with
    | Some _ -> None
    | None -> Some default_t
  in
Carine Rey's avatar
Carine Rey committed
140
  let out = dest // "results.svg" in
Philippe Veber's avatar
Philippe Veber committed
141 142 143 144
  let inner =
    Workflow.shell ~descr:"convergence_detection.plot_results" [
      within_container img (
        and_list [
145
          mkdir_p dest ;
Carine Rey's avatar
Carine Rey committed
146

Philippe Veber's avatar
Philippe Veber committed
147
          cmd "python" [
148
            Utils.script_dump Scripts.[ diffsel_script_utils ; plot_data ; plot_convergent_sites ] ;
Philippe Veber's avatar
Philippe Veber committed
149 150 151 152 153 154 155 156 157
            opt "-msa" dep faa ;
            opt "-tsv" dep tsv ;
            opt "-tree" dep tree ;
            opt "-out" ident out ;
            opt "-meth" ident meths ;
            option (opt "-t" ident) meths_t ;
            option (opt "--t_tsv" dep) t_choices ;
            flag string "--all_sites" plot_all_sites ;
          ]
Carine Rey's avatar
Carine Rey committed
158
        ]
Philippe Veber's avatar
Philippe Veber committed
159 160 161 162
      )
    ]
  in
  Workflow.select inner ["results.svg"]
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180

let plot_convergent_sites ?(plot_all_sites = true) ~alignment ~detection_results ~tree () =
  Workflow.shell ~descr:"plot_convergent_sites.py" [
    within_container Pcoc.img (
      and_list [
        mkdir_p dest ;
        cmd "python" [
          Utils.script_dump Scripts.[ diffsel_script_utils ; plot_data ; plot_convergent_sites ] ;
          opt "-tsv" dep detection_results ;
          opt "-msa" dep alignment ;
          opt "-tree" dep tree ;
          opt "-out" ident (dest // "plot.svg") ;
          flag string "--all_sites" plot_all_sites ;
        ]
      ]
    )
  ]
  |> Fn.flip Workflow.select ["plot.svg"]
Philippe Veber's avatar
Philippe Veber committed
181 182 183

let recall_precision_curve table =
  Workflow.shell ~descr:"recall_precision_curve" [
184
    cmd "Rscript" ~img:Env.env_r [
Philippe Veber's avatar
Philippe Veber committed
185 186 187 188 189 190
      file_dump (string Scripts.recall_precision_curve) ;
      dep table ;
      dest ;
    ] ;
  ]

191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
let result_table_parser fn =
  let module D = Owl.Dataframe in
  let df = D.of_csv fn in
  let labels =
    D.get_col_by_name df "Oracle"
    |> D.unpack_int_series
    |> Array.map ~f:(function
        | 0 -> false
        | 1 -> true
        | _ -> assert false
      )
  in
  let scores meth =
    match D.get_col_by_name df meth with
    | Float_Series xs -> Some xs
    | Int_Series xs -> Some (Array.map xs ~f:Float.of_int)
    | String_Series _ -> None (* has NA *)
    | _ -> assert false
  in
  let methods =
    D.get_heads df
    |> Array.filter ~f:(fun m -> m <> "Oracle" && m <> "Sites")
  in
  labels,
  Array.filter_map methods ~f:(fun m ->
      Option.map (scores m) ~f:(fun c -> m, c)
    )

let%workflow recall_precision_auc_table table =
  let labels, scores_per_meth = result_table_parser [%path table] in
  Array.map scores_per_meth ~f:(fun (meth, scores) ->
      let _, auc = Biocaml_unix.Bin_pred.recall_precision_curve ~labels ~scores in
      meth, auc
    )

Philippe Veber's avatar
Philippe Veber committed
226
let%pworkflow oracle ~n_h0 ~n_ha =
Philippe Veber's avatar
Philippe Veber committed
227 228
  let n_h0 = [%param n_h0] in
  let n_ha = [%param n_ha] in
Philippe Veber's avatar
Philippe Veber committed
229 230 231 232
  "Sites\tOracle"
  :: (List.init n_h0 ~f:(fun i -> sprintf "%d\t0" (i + 1)))
  @ (List.init n_ha ~f:(fun i -> sprintf "%d\t1" (n_h0 + i + 1)))
  |> Out_channel.write_lines [%dest]