convergence_detection.ml 7.26 KB
Newer Older
Carine Rey's avatar
Carine Rey committed
1
open Core
Philippe Veber's avatar
Philippe Veber committed
2 3
open Bistro.Shell_dsl
open Bistro
Carine Rey's avatar
Carine Rey committed
4

Philippe Veber's avatar
Philippe Veber committed
5
type result = [
Philippe Veber's avatar
Philippe Veber committed
6 7 8 9 10 11 12 13 14
  | `Pcoc of [`pcoc] dworkflow
  | `Pcoc_gamma of [`pcoc] dworkflow
  | `Pcoc_C60 of [`pcoc] dworkflow
  | `Diffsel of [`diffsel] dworkflow
  | `Identical_LG of [`identical] dworkflow
  | `Identical_WAG of [`identical] dworkflow
  | `Topological_LG of [`topological] dworkflow
  | `Topological_WAG of [`topological] dworkflow
  | `Tdg09 of [`tdg09] dworkflow
15
  | `Multinomial of text_file pworkflow
Philippe Veber's avatar
Philippe Veber committed
16
  | `Msd of [`msd] dworkflow * float
Philippe Veber's avatar
Philippe Veber committed
17
]
Carine Rey's avatar
Carine Rey committed
18

Philippe Veber's avatar
Philippe Veber committed
19 20 21
let meth_string_of_result = function
  | `Pcoc _ -> "pcoc"
  | `Pcoc_gamma _ -> "pcoc_gamma"
Carine Rey's avatar
Carine Rey committed
22
  | `Pcoc_C60 _ -> "pcoc_gamma"
Philippe Veber's avatar
Philippe Veber committed
23
  | `Diffsel _ -> "diffsel"
Carine Rey's avatar
Carine Rey committed
24 25
  | `Identical_LG _ -> "identical_LG"
  | `Identical_WAG _ -> "identical_WAG"
26 27
  | `Topological_LG _ -> "topological_LG"
  | `Topological_WAG _ -> "topological_WAG"
Carine Rey's avatar
typo  
Carine Rey committed
28
  | `Tdg09 _ -> "tdg09"
29
  | `Multinomial _ -> "multinomial"
30
  | `Msd (_, e) -> sprintf "msd_%f" e
Carine Rey's avatar
Carine Rey committed
31 32 33 34

type dataset_res = {
  model_prefix : string ;
  tree_prefix : string ;
Carine Rey's avatar
Carine Rey committed
35
  dataset : Dataset.t ;
Philippe Veber's avatar
Philippe Veber committed
36
  res_by_tools: result list ;
Philippe Veber's avatar
Philippe Veber committed
37 38
  merged_results : text_file pworkflow ;
  plot_merged_results : svg pworkflow ;
Philippe Veber's avatar
Philippe Veber committed
39
}
Carine Rey's avatar
Carine Rey committed
40

41
let merge_results ?fna_infos ~(res_by_tools : result list) () : text_file pworkflow =
Carine Rey's avatar
Carine Rey committed
42
  let command = List.map res_by_tools ~f:(fun res ->
Philippe Veber's avatar
Philippe Veber committed
43
      let w = match res with
Carine Rey's avatar
Carine Rey committed
44
        | `Pcoc d -> Pcoc.results d
Philippe Veber's avatar
Philippe Veber committed
45
        | `Pcoc_gamma d -> Pcoc.results d
Carine Rey's avatar
Carine Rey committed
46
        | `Pcoc_C60 d -> Pcoc.results d
Philippe Veber's avatar
Philippe Veber committed
47
        | `Diffsel d -> Diffsel.selector d
Carine Rey's avatar
Carine Rey committed
48 49
        | `Identical_LG d -> Identical.results d
        | `Identical_WAG d -> Identical.results d
50 51 52
        | `Topological_LG d -> Topological.results d
        | `Topological_WAG d -> Topological.results d
        | `Tdg09 d -> Tamuri.results d
53
        | `Multinomial d -> d
Philippe Veber's avatar
Philippe Veber committed
54
        | `Msd (d, _) -> Msd.results d
Philippe Veber's avatar
Philippe Veber committed
55 56 57 58
      in
      let opt = match res with
        | `Pcoc _ -> string "--pcoc"
        | `Pcoc_gamma _ -> string "--pcoc_gamma"
Carine Rey's avatar
Carine Rey committed
59
        | `Pcoc_C60 _ -> string "--pcoc_C60"
Philippe Veber's avatar
Philippe Veber committed
60
        | `Diffsel _ -> string "--diffsel"
Carine Rey's avatar
Carine Rey committed
61 62
        | `Identical_LG _ -> string "--identical_LG"
        | `Identical_WAG _ -> string "--identical_WAG"
63 64 65
        | `Topological_LG _ -> string "--topological_LG"
        | `Topological_WAG _ -> string "--topological_WAG"
        | `Tdg09 _ -> string "--tdg09"
66
        | `Multinomial _ -> string "--multinomial"
Philippe Veber's avatar
Philippe Veber committed
67
        | `Msd (_, e) -> string (sprintf "--msd %f" e)
Philippe Veber's avatar
Philippe Veber committed
68 69 70
      in
      seq ~sep:" " [opt; dep w]
    )
Carine Rey's avatar
Carine Rey committed
71
  in
Philippe Veber's avatar
Philippe Veber committed
72 73
  Workflow.shell ~descr:"convergence_detection.merge_results" [
    cmd "python" ~img:Env.env_py [
74 75 76
      file_dump (string Scripts.merge_det_results) ;
      opt "-o" ident dest ;
      seq ~sep:" " command ;
77 78 79 80
      option (opt "--fna_infos" dep) fna_infos;
    ] ;
  ]

Philippe Veber's avatar
Philippe Veber committed
81
let merge_result_tables ?fna_infos ?oracle ?multinomial ?tdg09 ?identical ?topological ?pcoc ?diffsel ?diffseldsparse () : text_file pworkflow =
82 83 84 85 86
  Workflow.shell ~descr:"convergence_detection.merge_results" [
    cmd "python" ~img:Env.env_py [
      file_dump (string Scripts.merge_det_results) ;
      opt "-o" ident dest ;
      option (opt "--multinomial" dep) multinomial ;
87
      option (opt "--tdg09" dep) tdg09 ;
88 89
      option (opt "--identical_LG" dep) identical ;
      option (opt "--topological_LG" dep) topological ;
Philippe Veber's avatar
Philippe Veber committed
90
      option (opt "--pcoc" dep) pcoc ;
91
      option (opt "--diffsel" dep) diffsel ;
92
      option (opt "--diffseldsparse" dep) diffseldsparse ;
93 94
      option (opt "--oracle" dep) oracle ;
      option (opt "--fna_infos" dep) fna_infos;
95
    ] ;
Carine Rey's avatar
Carine Rey committed
96 97
  ]

Philippe Veber's avatar
Philippe Veber committed
98 99
let plot_merge_results ?t_choices ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv (): svg pworkflow =
  let img = Pcoc.img in
Carine Rey's avatar
Carine Rey committed
100 101
  (* use of pcoc env due to its working X server for dra plot with ete3 *)
  let meths = List.map res_by_tools ~f:(fun res ->
102 103 104 105 106 107 108 109 110
      let opt = match res with
        | `Pcoc _ -> "PCOC,PC,OC"
        | `Pcoc_gamma _ -> "PCOC_gamma,PC_gamma,OC_gamma,"
        | `Pcoc_C60 _ -> "PCOC_C60,PC_C60,OC_C60,"
        | `Diffsel _ -> "Diffsel_mean,Diffsel_max"
        | `Identical_LG _ -> "Identical_LG08"
        | `Identical_WAG _ -> "Identical_WAG01"
        | `Topological_LG _ -> "Topological_LG08"
        | `Topological_WAG _ -> "Topological_WAG01"
Carine Rey's avatar
Carine Rey committed
111
        | `Tdg09 _ -> "Tdg09_1MinusFDR,Tdg09_1MinusLRT,Tdg09_prob_post"
Carine Rey's avatar
Carine Rey committed
112
        | `Multinomial _ -> "Mutinomial_1MinusLRT"
Carine Rey's avatar
Carine Rey committed
113
        | `Msd _ -> "Msd_0.05_1MinusP"
114 115 116
      in
      string opt
    ) |> seq ~sep:","
Carine Rey's avatar
Carine Rey committed
117
  in
118
  let default_t = List.map res_by_tools ~f:(fun res ->
119
      let opt = match res with
LANORE Vincent's avatar
LANORE Vincent committed
120 121 122 123 124 125 126 127 128 129 130
        | `Pcoc _ -> "PCOC:0,PC:0,OC:0"
        | `Pcoc_gamma _ -> "PCOC_gamma:0,PC_gamma:0,OC_gamma:0"
        | `Pcoc_C60 _ -> "PCOC_C60:0,PC_C60:0,OC_C60:0"
        | `Diffsel _ -> "Diffsel_mean:0,Diffsel_max:0"
        | `Identical_LG _ -> "Identical_LG08:0"
        | `Identical_WAG _ -> "Identical_WAG01:0"
        | `Topological_LG _ -> "Topological_LG08:0"
        | `Topological_WAG _ -> "Topological_WAG01:0"
        | `Tdg09 _ -> "Tdg09_1MinusFDR:0,Tdg09_prob_post:0,Tdg09_1MinusLRT:0"
        | `Multinomial _ -> "Mutinomial_1MinusLRT:0"
        | `Msd _ -> "Msd_0.05_1MinusP:0"
Carine Rey's avatar
Carine Rey committed
131

132 133 134
      in
      string opt
    ) |> seq ~sep:","
135
  in
136 137 138 139
  let meths_t = match t_choices with
    | Some _ -> None
    | None -> Some default_t
  in
Carine Rey's avatar
Carine Rey committed
140
  let out = dest // "results.svg" in
Philippe Veber's avatar
Philippe Veber committed
141 142 143 144
  let inner =
    Workflow.shell ~descr:"convergence_detection.plot_results" [
      within_container img (
        and_list [
145
          mkdir_p dest ;
Carine Rey's avatar
Carine Rey committed
146

Philippe Veber's avatar
Philippe Veber committed
147
          cmd "python" [
148
            Utils.script_dump Scripts.[ diffsel_script_utils ; plot_data ; plot_convergent_sites ] ;
Philippe Veber's avatar
Philippe Veber committed
149 150 151 152 153 154 155 156 157
            opt "-msa" dep faa ;
            opt "-tsv" dep tsv ;
            opt "-tree" dep tree ;
            opt "-out" ident out ;
            opt "-meth" ident meths ;
            option (opt "-t" ident) meths_t ;
            option (opt "--t_tsv" dep) t_choices ;
            flag string "--all_sites" plot_all_sites ;
          ]
Carine Rey's avatar
Carine Rey committed
158
        ]
Philippe Veber's avatar
Philippe Veber committed
159 160 161 162
      )
    ]
  in
  Workflow.select inner ["results.svg"]
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180

let plot_convergent_sites ?(plot_all_sites = true) ~alignment ~detection_results ~tree () =
  Workflow.shell ~descr:"plot_convergent_sites.py" [
    within_container Pcoc.img (
      and_list [
        mkdir_p dest ;
        cmd "python" [
          Utils.script_dump Scripts.[ diffsel_script_utils ; plot_data ; plot_convergent_sites ] ;
          opt "-tsv" dep detection_results ;
          opt "-msa" dep alignment ;
          opt "-tree" dep tree ;
          opt "-out" ident (dest // "plot.svg") ;
          flag string "--all_sites" plot_all_sites ;
        ]
      ]
    )
  ]
  |> Fn.flip Workflow.select ["plot.svg"]
Philippe Veber's avatar
Philippe Veber committed
181 182

let recall_precision_curve table =
183
  let img = [ docker_image ~account:"pveber" ~name:"r_basics" ~tag:"20190710" () ] in
Philippe Veber's avatar
Philippe Veber committed
184
  Workflow.shell ~descr:"recall_precision_curve" [
185
    cmd "Rscript" ~img [
Philippe Veber's avatar
Philippe Veber committed
186 187 188 189 190 191
      file_dump (string Scripts.recall_precision_curve) ;
      dep table ;
      dest ;
    ] ;
  ]

192
let%workflow recall_precision_auc_table table =
193 194
  let module RT = Reviewphiltrans_toolbox.Result_table in
  let { RT.labels ; scores_per_meth } = RT.of_file [%path table] in
195 196 197 198 199
  Array.map scores_per_meth ~f:(fun (meth, scores) ->
      let _, auc = Biocaml_unix.Bin_pred.recall_precision_curve ~labels ~scores in
      meth, auc
    )

Philippe Veber's avatar
Philippe Veber committed
200
let%pworkflow oracle ~n_h0 ~n_ha =
Philippe Veber's avatar
Philippe Veber committed
201 202
  let n_h0 = [%param n_h0] in
  let n_ha = [%param n_ha] in
Philippe Veber's avatar
Philippe Veber committed
203 204 205 206
  "Sites\tOracle"
  :: (List.init n_h0 ~f:(fun i -> sprintf "%d\t0" (i + 1)))
  @ (List.init n_ha ~f:(fun i -> sprintf "%d\t1" (n_h0 + i + 1)))
  |> Out_channel.write_lines [%dest]