convergence_detection.ml 7.78 KB
Newer Older
Carine Rey's avatar
Carine Rey committed
1
open Core
Philippe Veber's avatar
Philippe Veber committed
2 3
open Bistro.Shell_dsl
open Bistro
Carine Rey's avatar
Carine Rey committed
4

Philippe Veber's avatar
Philippe Veber committed
5
type result = [
Philippe Veber's avatar
Philippe Veber committed
6 7 8 9 10 11 12 13 14
  | `Pcoc of [`pcoc] dworkflow
  | `Pcoc_gamma of [`pcoc] dworkflow
  | `Pcoc_C60 of [`pcoc] dworkflow
  | `Diffsel of [`diffsel] dworkflow
  | `Identical_LG of [`identical] dworkflow
  | `Identical_WAG of [`identical] dworkflow
  | `Topological_LG of [`topological] dworkflow
  | `Topological_WAG of [`topological] dworkflow
  | `Tdg09 of [`tdg09] dworkflow
15
  | `Multinomial of text_file pworkflow
Philippe Veber's avatar
Philippe Veber committed
16
  | `Msd of [`msd] dworkflow * float
Philippe Veber's avatar
Philippe Veber committed
17
]
Carine Rey's avatar
Carine Rey committed
18

Philippe Veber's avatar
Philippe Veber committed
19 20 21
let meth_string_of_result = function
  | `Pcoc _ -> "pcoc"
  | `Pcoc_gamma _ -> "pcoc_gamma"
Carine Rey's avatar
Carine Rey committed
22
  | `Pcoc_C60 _ -> "pcoc_gamma"
Philippe Veber's avatar
Philippe Veber committed
23
  | `Diffsel _ -> "diffsel"
Carine Rey's avatar
Carine Rey committed
24 25
  | `Identical_LG _ -> "identical_LG"
  | `Identical_WAG _ -> "identical_WAG"
26 27
  | `Topological_LG _ -> "topological_LG"
  | `Topological_WAG _ -> "topological_WAG"
Carine Rey's avatar
typo  
Carine Rey committed
28
  | `Tdg09 _ -> "tdg09"
29
  | `Multinomial _ -> "multinomial"
30
  | `Msd (_, e) -> sprintf "msd_%f" e
Carine Rey's avatar
Carine Rey committed
31 32 33 34

type dataset_res = {
  model_prefix : string ;
  tree_prefix : string ;
Carine Rey's avatar
Carine Rey committed
35
  dataset : Dataset.t ;
Philippe Veber's avatar
Philippe Veber committed
36
  res_by_tools: result list ;
Philippe Veber's avatar
Philippe Veber committed
37 38
  merged_results : text_file pworkflow ;
  plot_merged_results : svg pworkflow ;
Philippe Veber's avatar
Philippe Veber committed
39
}
Carine Rey's avatar
Carine Rey committed
40

41
let merge_results ?fna_infos ~(res_by_tools : result list) () : text_file pworkflow =
Carine Rey's avatar
Carine Rey committed
42
  let command = List.map res_by_tools ~f:(fun res ->
Philippe Veber's avatar
Philippe Veber committed
43
      let w = match res with
Carine Rey's avatar
Carine Rey committed
44
        | `Pcoc d -> Pcoc.results d
Philippe Veber's avatar
Philippe Veber committed
45
        | `Pcoc_gamma d -> Pcoc.results d
Carine Rey's avatar
Carine Rey committed
46
        | `Pcoc_C60 d -> Pcoc.results d
Philippe Veber's avatar
Philippe Veber committed
47
        | `Diffsel d -> Diffsel.selector d
Carine Rey's avatar
Carine Rey committed
48 49
        | `Identical_LG d -> Identical.results d
        | `Identical_WAG d -> Identical.results d
50 51 52
        | `Topological_LG d -> Topological.results d
        | `Topological_WAG d -> Topological.results d
        | `Tdg09 d -> Tamuri.results d
53
        | `Multinomial d -> d
Philippe Veber's avatar
Philippe Veber committed
54
        | `Msd (d, _) -> Msd.results d
Philippe Veber's avatar
Philippe Veber committed
55 56 57 58
      in
      let opt = match res with
        | `Pcoc _ -> string "--pcoc"
        | `Pcoc_gamma _ -> string "--pcoc_gamma"
Carine Rey's avatar
Carine Rey committed
59
        | `Pcoc_C60 _ -> string "--pcoc_C60"
Philippe Veber's avatar
Philippe Veber committed
60
        | `Diffsel _ -> string "--diffsel"
Carine Rey's avatar
Carine Rey committed
61 62
        | `Identical_LG _ -> string "--identical_LG"
        | `Identical_WAG _ -> string "--identical_WAG"
63 64 65
        | `Topological_LG _ -> string "--topological_LG"
        | `Topological_WAG _ -> string "--topological_WAG"
        | `Tdg09 _ -> string "--tdg09"
66
        | `Multinomial _ -> string "--multinomial"
Philippe Veber's avatar
Philippe Veber committed
67
        | `Msd (_, e) -> string (sprintf "--msd %f" e)
Philippe Veber's avatar
Philippe Veber committed
68 69 70
      in
      seq ~sep:" " [opt; dep w]
    )
Carine Rey's avatar
Carine Rey committed
71
  in
Philippe Veber's avatar
Philippe Veber committed
72 73
  Workflow.shell ~descr:"convergence_detection.merge_results" [
    cmd "python" ~img:Env.env_py [
74 75 76
      file_dump (string Scripts.merge_det_results) ;
      opt "-o" ident dest ;
      seq ~sep:" " command ;
77 78 79 80
      option (opt "--fna_infos" dep) fna_infos;
    ] ;
  ]

81
let merge_result_tables ?fna_infos ?oracle ?multinomial ?tdg09 ?identical ?topological ?diffsel ?diffseldsparse () : text_file pworkflow =
82 83 84 85 86
  Workflow.shell ~descr:"convergence_detection.merge_results" [
    cmd "python" ~img:Env.env_py [
      file_dump (string Scripts.merge_det_results) ;
      opt "-o" ident dest ;
      option (opt "--multinomial" dep) multinomial ;
87
      option (opt "--tdg09" dep) tdg09 ;
88 89
      option (opt "--identical_LG" dep) identical ;
      option (opt "--topological_LG" dep) topological ;
90
      option (opt "--diffsel" dep) diffsel ;
91
      option (opt "--diffseldsparse" dep) diffseldsparse ;
92 93
      option (opt "--oracle" dep) oracle ;
      option (opt "--fna_infos" dep) fna_infos;
94
    ] ;
Carine Rey's avatar
Carine Rey committed
95 96
  ]

Philippe Veber's avatar
Philippe Veber committed
97 98
let plot_merge_results ?t_choices ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv (): svg pworkflow =
  let img = Pcoc.img in
Carine Rey's avatar
Carine Rey committed
99 100
  (* use of pcoc env due to its working X server for dra plot with ete3 *)
  let meths = List.map res_by_tools ~f:(fun res ->
101 102 103 104 105 106 107 108 109
      let opt = match res with
        | `Pcoc _ -> "PCOC,PC,OC"
        | `Pcoc_gamma _ -> "PCOC_gamma,PC_gamma,OC_gamma,"
        | `Pcoc_C60 _ -> "PCOC_C60,PC_C60,OC_C60,"
        | `Diffsel _ -> "Diffsel_mean,Diffsel_max"
        | `Identical_LG _ -> "Identical_LG08"
        | `Identical_WAG _ -> "Identical_WAG01"
        | `Topological_LG _ -> "Topological_LG08"
        | `Topological_WAG _ -> "Topological_WAG01"
Carine Rey's avatar
Carine Rey committed
110
        | `Tdg09 _ -> "Tdg09_1MinusFDR,Tdg09_1MinusLRT,Tdg09_prob_post"
Carine Rey's avatar
Carine Rey committed
111
        | `Multinomial _ -> "Mutinomial_1MinusLRT"
Carine Rey's avatar
Carine Rey committed
112
        | `Msd _ -> "Msd_0.05_1MinusP"
113 114 115
      in
      string opt
    ) |> seq ~sep:","
Carine Rey's avatar
Carine Rey committed
116
  in
117
  let default_t = List.map res_by_tools ~f:(fun res ->
118
      let opt = match res with
LANORE Vincent's avatar
LANORE Vincent committed
119 120 121 122 123 124 125 126 127 128 129
        | `Pcoc _ -> "PCOC:0,PC:0,OC:0"
        | `Pcoc_gamma _ -> "PCOC_gamma:0,PC_gamma:0,OC_gamma:0"
        | `Pcoc_C60 _ -> "PCOC_C60:0,PC_C60:0,OC_C60:0"
        | `Diffsel _ -> "Diffsel_mean:0,Diffsel_max:0"
        | `Identical_LG _ -> "Identical_LG08:0"
        | `Identical_WAG _ -> "Identical_WAG01:0"
        | `Topological_LG _ -> "Topological_LG08:0"
        | `Topological_WAG _ -> "Topological_WAG01:0"
        | `Tdg09 _ -> "Tdg09_1MinusFDR:0,Tdg09_prob_post:0,Tdg09_1MinusLRT:0"
        | `Multinomial _ -> "Mutinomial_1MinusLRT:0"
        | `Msd _ -> "Msd_0.05_1MinusP:0"
Carine Rey's avatar
Carine Rey committed
130

131 132 133
      in
      string opt
    ) |> seq ~sep:","
134
  in
135 136 137 138
  let meths_t = match t_choices with
    | Some _ -> None
    | None -> Some default_t
  in
Carine Rey's avatar
Carine Rey committed
139
  let out = dest // "results.svg" in
Philippe Veber's avatar
Philippe Veber committed
140 141 142 143
  let inner =
    Workflow.shell ~descr:"convergence_detection.plot_results" [
      within_container img (
        and_list [
144
          mkdir_p dest ;
Carine Rey's avatar
Carine Rey committed
145

Philippe Veber's avatar
Philippe Veber committed
146
          cmd "python" [
147
            Utils.script_dump Scripts.[ diffsel_script_utils ; plot_data ; plot_convergent_sites ] ;
Philippe Veber's avatar
Philippe Veber committed
148 149 150 151 152 153 154 155 156
            opt "-msa" dep faa ;
            opt "-tsv" dep tsv ;
            opt "-tree" dep tree ;
            opt "-out" ident out ;
            opt "-meth" ident meths ;
            option (opt "-t" ident) meths_t ;
            option (opt "--t_tsv" dep) t_choices ;
            flag string "--all_sites" plot_all_sites ;
          ]
Carine Rey's avatar
Carine Rey committed
157
        ]
Philippe Veber's avatar
Philippe Veber committed
158 159 160 161
      )
    ]
  in
  Workflow.select inner ["results.svg"]
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179

let plot_convergent_sites ?(plot_all_sites = true) ~alignment ~detection_results ~tree () =
  Workflow.shell ~descr:"plot_convergent_sites.py" [
    within_container Pcoc.img (
      and_list [
        mkdir_p dest ;
        cmd "python" [
          Utils.script_dump Scripts.[ diffsel_script_utils ; plot_data ; plot_convergent_sites ] ;
          opt "-tsv" dep detection_results ;
          opt "-msa" dep alignment ;
          opt "-tree" dep tree ;
          opt "-out" ident (dest // "plot.svg") ;
          flag string "--all_sites" plot_all_sites ;
        ]
      ]
    )
  ]
  |> Fn.flip Workflow.select ["plot.svg"]
Philippe Veber's avatar
Philippe Veber committed
180 181 182

let recall_precision_curve table =
  Workflow.shell ~descr:"recall_precision_curve" [
183
    cmd "Rscript" ~img:Env.env_r [
Philippe Veber's avatar
Philippe Veber committed
184 185 186 187 188 189
      file_dump (string Scripts.recall_precision_curve) ;
      dep table ;
      dest ;
    ] ;
  ]

190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
let result_table_parser fn =
  let module D = Owl.Dataframe in
  let df = D.of_csv fn in
  let labels =
    D.get_col_by_name df "Oracle"
    |> D.unpack_int_series
    |> Array.map ~f:(function
        | 0 -> false
        | 1 -> true
        | _ -> assert false
      )
  in
  let scores meth =
    match D.get_col_by_name df meth with
    | Float_Series xs -> Some xs
    | Int_Series xs -> Some (Array.map xs ~f:Float.of_int)
    | String_Series _ -> None (* has NA *)
    | _ -> assert false
  in
  let methods =
    D.get_heads df
    |> Array.filter ~f:(fun m -> m <> "Oracle" && m <> "Sites")
  in
  labels,
  Array.filter_map methods ~f:(fun m ->
      Option.map (scores m) ~f:(fun c -> m, c)
    )

let%workflow recall_precision_auc_table table =
  let labels, scores_per_meth = result_table_parser [%path table] in
  Array.map scores_per_meth ~f:(fun (meth, scores) ->
      let _, auc = Biocaml_unix.Bin_pred.recall_precision_curve ~labels ~scores in
      meth, auc
    )

Philippe Veber's avatar
Philippe Veber committed
225
let%pworkflow oracle ~n_h0 ~n_ha =
Philippe Veber's avatar
Philippe Veber committed
226 227
  let n_h0 = [%param n_h0] in
  let n_ha = [%param n_ha] in
Philippe Veber's avatar
Philippe Veber committed
228 229 230 231
  "Sites\tOracle"
  :: (List.init n_h0 ~f:(fun i -> sprintf "%d\t0" (i + 1)))
  @ (List.init n_ha ~f:(fun i -> sprintf "%d\t1" (n_h0 + i + 1)))
  |> Out_channel.write_lines [%dest]