post_analyses.ml 8.84 KB
Newer Older
Carine Rey's avatar
Carine Rey committed
1 2 3 4 5 6 7 8 9
open Core
open Bistro.Std
open Bistro.EDSL
open Bistro_bioinfo.Std
open Bistro_utils
open File_formats
open Convergence_detection

type post_analyses_dir
Carine Rey's avatar
Carine Rey committed
10
type sens_spe_t_choices_plot
Carine Rey's avatar
Carine Rey committed
11

Carine Rey's avatar
Carine Rey committed
12
type t_choices = {
Carine Rey's avatar
Carine Rey committed
13 14 15
  t_choices_complete: text_file workflow ;
  t_choices_max: text_file workflow ;
  t_choices_plot: text_file workflow ;
Carine Rey's avatar
Carine Rey committed
16
  tree_prefix: string;
17 18
}

19 20 21 22 23 24 25 26 27 28
type auto_t_plot = {
  tree_prefix:string ;
  model_prefix:string ;
  auto_t_plot: svg workflow;
}


type post_analyses_res = {
  t_choices : t_choices option;
  auto_t_plot_l : auto_t_plot list option;
Carine Rey's avatar
Carine Rey committed
29
  dataset_results_l : dataset_res list;
30 31
}

Carine Rey's avatar
Carine Rey committed
32 33 34
type simu_infos = {
  simu_infos: text_file workflow option ;
  model_prefix: string ;
35
  tree_prefix: string ;
36
}
Carine Rey's avatar
Carine Rey committed
37

Carine Rey's avatar
Carine Rey committed
38
type post_analyses_simu = {
Carine Rey's avatar
Carine Rey committed
39
  simu_infos_l : simu_infos list;
40
  simu_infos_plot : text_file workflow ;
41
}
Carine Rey's avatar
Carine Rey committed
42

43
let r_env = docker_image ~account:"carinerey" ~name:"r_basics" ~tag:"07232018" ()
Carine Rey's avatar
Carine Rey committed
44 45

let is_hyp ~hyp (dataset_results :dataset_res) =
Carine Rey's avatar
Carine Rey committed
46 47 48 49
  let model_prefix = dataset_results.model_prefix in
  model_prefix = hyp

let make_t_choices ~h0_merged_results ~ha_merged_results : post_analyses_dir directory workflow =
50
  let env = r_env in
Carine Rey's avatar
Carine Rey committed
51 52
  let out = dest // "out" in
  workflow ~descr:"post_analyses.t_choices" [
53
    docker env (
Carine Rey's avatar
Carine Rey committed
54 55 56 57 58 59 60
      and_list [
        mkdir_p dest ;
        cmd "Rscript" [
          file_dump (string Scripts.calc_t_per_meth) ;
          opt "--H0" dep h0_merged_results;
          opt "--Ha" dep ha_merged_results;
          opt "--out " ident out;
61 62
        ];
      ])
Carine Rey's avatar
Carine Rey committed
63 64
  ]

Carine Rey's avatar
Carine Rey committed
65 66 67 68
let make_simu_infos ?(descr="") ?(fna_infos) ~faa ~tree_sc : text_file workflow =
  let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"07022018" () in
  workflow ~descr:("post_analyses.simu_infos." ^ descr) [
    cmd "python" ~env [
69 70 71 72 73
      file_dump (string Scripts.calc_simu_infos) ;
      opt "--faa" dep faa;
      opt "--tree" dep tree_sc;
      option ( opt "--fna_infos" dep) fna_infos;
      opt "--output " ident dest;
Carine Rey's avatar
Carine Rey committed
74 75 76
    ];
  ]

77
let group_simu_infos ~simu_infos_l : simu_infos directory workflow =
78
  let env = r_env in
79
  let cmd_cp_l = List.map simu_infos_l ~f:(fun s ->
80
      match s.simu_infos with
Carine Rey's avatar
Carine Rey committed
81
      | Some w -> [cmd "cp" [dep w ; tmp // (s.tree_prefix ^"@"^ s.model_prefix ^ ".tsv")]]
82 83 84 85 86
      | None -> []
    ) |> List.concat
  in
  let out = dest // "out" in
  workflow ~descr:"post_analyses.plot_simu_infos" [
87
    docker env (
88
      and_list ([
89 90 91 92 93 94 95 96 97 98 99
          [mkdir_p dest];
          [mkdir_p tmp ];
          cmd_cp_l;
          [cmd "Rscript" [
              file_dump (string Scripts.plot_hyp_simu_validation) ;
              opt "--input_dir" ident tmp;
              opt "--out " ident out;
            ];]
        ]
          |> List.concat)
    )
100 101
  ]

Carine Rey's avatar
Carine Rey committed
102
let get_t_choices ~(dataset_results_l: dataset_res list) : t_choices option =
Carine Rey's avatar
Carine Rey committed
103 104 105
  let h0_res = List.find dataset_results_l (is_hyp ~hyp: "H0") in
  let ha_res = List.find dataset_results_l (is_hyp ~hyp: "HaPCOC") in
  match (h0_res, ha_res) with
106 107 108 109 110 111 112
  | (Some h0, Some ha) ->
    let h0_merged_results = h0.merged_results in
    let ha_merged_results = ha.merged_results in
    let t_choices_dir = make_t_choices ~h0_merged_results ~ha_merged_results in
    let t_choices_max = t_choices_dir / selector ["out.max_per_meth.tsv"] in
    let t_choices_complete = t_choices_dir / selector ["out.complete.tsv"] in
    let t_choices_plot = t_choices_dir / selector ["out.pdf"] in
Carine Rey's avatar
Carine Rey committed
113 114
    let tree_prefix = h0.tree_prefix in
    Some {t_choices_max; t_choices_complete ; t_choices_plot; tree_prefix}
115
  | _ -> None
Carine Rey's avatar
Carine Rey committed
116

117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137


let plot_det_meth_res_auto_t ~t_choices ~dataset_results_l =
  match t_choices with
  | None -> None
  | Some w -> Some (
      List.map dataset_results_l ~f:(fun (dataset_results : dataset_res) ->
          let tree_prefix = dataset_results.tree_prefix in
          let model_prefix = dataset_results.model_prefix in
          let ready_dataset = dataset_results.dataset.dataset in
          let tree = Tree_dataset.tree ready_dataset.tree_dataset `Detection in
          let faa = ready_dataset.faa in
          let tsv = dataset_results.merged_results in
          let res_by_tools = dataset_results.res_by_tools in
          let plot_all_sites = true in
          let t_choices = w.t_choices_max in
          let auto_t_plot = plot_merge_results ~t_choices ~plot_all_sites ~res_by_tools ~tree ~faa ~tsv () in
          {tree_prefix; model_prefix ; auto_t_plot}
        )
    )

Carine Rey's avatar
Carine Rey committed
138 139 140
let get_simu_infos ~(dataset:Dataset.t) =
  let model_prefix = dataset.model_prefix in
  let ready_dataset = dataset.dataset in
Carine Rey's avatar
Carine Rey committed
141 142 143 144
  let faa = ready_dataset.faa in
  let tree_sc = Tree_dataset.tree ready_dataset.tree_dataset `Detection in
  let fna_infos = ready_dataset.fna_infos in
  match fna_infos with
145 146
  | Some w -> Some (make_simu_infos ~descr:model_prefix ~faa ~tree_sc ~fna_infos:w)
  | None -> None (*make_simu_infos ~faa ~tree_sc*)
Carine Rey's avatar
Carine Rey committed
147 148


Carine Rey's avatar
Carine Rey committed
149
let post_analyses_res_of_dataset_results_l ~dataset_results_l =
Carine Rey's avatar
Carine Rey committed
150
  let t_choices = get_t_choices ~dataset_results_l in
151
  let auto_t_plot_l = plot_det_meth_res_auto_t ~t_choices ~dataset_results_l in
Carine Rey's avatar
Carine Rey committed
152
  {t_choices; auto_t_plot_l;dataset_results_l}
Carine Rey's avatar
Carine Rey committed
153 154 155 156 157 158

let post_analyses_simu_of_simu_dataset_l ~simu_dataset_l =
  let simu_infos_l = List.map simu_dataset_l ~f:(fun dataset ->
      {simu_infos = (get_simu_infos ~dataset);
       tree_prefix = dataset.tree_prefix ;
       model_prefix = dataset.model_prefix
159 160
      }
    ) in
161
  let simu_infos_plot = group_simu_infos ~simu_infos_l / selector ["out.pdf"] in
Carine Rey's avatar
Carine Rey committed
162
  {simu_infos_l; simu_infos_plot}
Carine Rey's avatar
Carine Rey committed
163

Carine Rey's avatar
Carine Rey committed
164 165


166
let plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile_prefix : sens_spe_t_choices_plot directory workflow =
167
  let env = r_env in
Carine Rey's avatar
Carine Rey committed
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
  let t_choices_dir = tmp // "t_choices_dir" in
  let merged_results_dir = tmp // "merged_results_dir" in
  let out = dest // "out" in
  let cmd_cp_t_choices_l = List.map t_choices_l ~f:(fun t_choices ->
      cmd "cp" [dep t_choices.t_choices_max ; t_choices_dir // (t_choices.tree_prefix ^ ".tsv")]
    )
  in
  let cmd_cp_merged_results_l = List.map dataset_results_l ~f:(fun dataset_results ->
      cmd "cp" [dep dataset_results.merged_results ; merged_results_dir // (dataset_results.tree_prefix ^"@"^ dataset_results.model_prefix ^ ".tsv")]
    )
  in
  workflow ~descr:"post_analyses.plot_sens_spe_all_trees" [
    docker env (
      and_list ([
          [mkdir_p dest];
          [mkdir_p t_choices_dir ];
          [mkdir_p merged_results_dir ];
          cmd_cp_t_choices_l;
          cmd_cp_merged_results_l;
          [cmd "Rscript" [
              file_dump (string Scripts.plot_sens_spe_all_trees) ;
              opt "--input_dir" ident t_choices_dir;
              opt "--input_dir2" ident merged_results_dir;
191
              opt "--profil" string profile_prefix;
Carine Rey's avatar
Carine Rey committed
192 193 194 195 196 197 198
              opt "--out " ident out;
            ];]
        ]
          |> List.concat)
    )
  ]

199
let repo_post_analyses_all_trees_of_all_post_analyses_per_tree ~profile_prefix ~all_post_analyses_per_tree =
Carine Rey's avatar
Carine Rey committed
200 201 202 203 204 205 206 207 208
  let t_choices_l = List.map all_post_analyses_per_tree ~f:(fun post_analyses_res ->
      match post_analyses_res.t_choices with
      | Some w -> [w]
      | None -> []
    ) |> List.concat
  in
  let dataset_results_l =  List.map all_post_analyses_per_tree ~f:(fun post_analyses_res ->
      post_analyses_res.dataset_results_l) |> List.concat
  in
209
  let sens_spe_t_choices_plot = plot_sens_spe_t_choices  ~t_choices_l ~dataset_results_l ~profile_prefix in
Carine Rey's avatar
Carine Rey committed
210 211 212 213 214 215 216 217
  Repo.[
    item ["sens_spe.pdf"] (sens_spe_t_choices_plot / selector ["out.sens_spe_auto_t.pdf"]);
    item ["all_t_choices.pdf"] (sens_spe_t_choices_plot / selector ["out.max_t_per_tree.pdf"]);
    item ["sens_spe.tsv"] (sens_spe_t_choices_plot / selector ["out.sens_spe_auto_t.tsv"]);
    item ["all_t_choices.tsv"] (sens_spe_t_choices_plot / selector ["out.max_t_per_tree.tsv"]);
  ]


Carine Rey's avatar
Carine Rey committed
218
let repo_of_post_analyses_simu ~post_analyses_simu =
Carine Rey's avatar
Carine Rey committed
219
  [
220
    Repo.[
Carine Rey's avatar
Carine Rey committed
221
      item ["hypothesis_validation.pdf"] post_analyses_simu.simu_infos_plot
222 223
    ] |> Repo.shift "simu_infos"
    ;
Carine Rey's avatar
Carine Rey committed
224
    (List.map post_analyses_simu.simu_infos_l ~f:(fun simu_infos ->
225 226 227 228
         match simu_infos.simu_infos with
         | None -> []
         | Some w ->
           Repo.[
Carine Rey's avatar
Carine Rey committed
229
             item [simu_infos.tree_prefix ^ "@" ^ simu_infos.model_prefix ^ ".tsv"] w
230 231 232
           ] |> Repo.shift "simu_infos"
       ) |> List.concat
    );
Carine Rey's avatar
Carine Rey committed
233
  ] |> List.concat
Carine Rey's avatar
Carine Rey committed
234 235

let repo_of_post_analyses_res ~prefix ~post_analyses_res =
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
  [
    (match post_analyses_res.t_choices with
     | None -> []
     | Some w ->
       Repo.[
         item [prefix ^ ".t_choices.max_mcc_per_meth.tsv"] w.t_choices_max ;
         item [prefix ^ ".t_choices.complete.tsv"] w.t_choices_complete ;
         item [prefix ^ ".t_choices.pdf"] w.t_choices_plot ;
       ] |> Repo.shift "t_choices"
    );
    (
      match post_analyses_res.auto_t_plot_l with
      | None -> []
      | Some w_l ->
        List.map w_l ~f:(fun w ->
            Repo.[
              let prefix_f = w.tree_prefix ^ "@" ^ w.model_prefix in
              item [ prefix_f ^ ".auto_t.svg"] w.auto_t_plot ;
            ]
          )|> List.concat
        |> Repo.shift "auto_t_pdf"
    );
  ] |> List.concat
Carine Rey's avatar
Carine Rey committed
259