post_analyses.ml 14.7 KB
Newer Older
Carine Rey's avatar
Carine Rey committed
1 2 3 4 5 6 7 8
open Core
open Bistro.Std
open Bistro.EDSL
open Bistro_bioinfo.Std
open Bistro_utils
open File_formats
open Convergence_detection

Carine Rey's avatar
Carine Rey committed
9
type plot_trees
Carine Rey's avatar
Carine Rey committed
10
type post_analyses_dir
Carine Rey's avatar
Carine Rey committed
11
type sens_spe_t_choices_plot
Carine Rey's avatar
Carine Rey committed
12

Carine Rey's avatar
Carine Rey committed
13
type t_choices = {
Carine Rey's avatar
Carine Rey committed
14 15
  t_choices_complete: text_file workflow ;
  t_choices_max: text_file workflow ;
Carine Rey's avatar
Carine Rey committed
16
  t_choices_recall09: text_file workflow ;
Carine Rey's avatar
Carine Rey committed
17
  t_choices_plot: text_file workflow ;
Carine Rey's avatar
Carine Rey committed
18
  t_choices_condensed_plot: text_file workflow ;
Carine Rey's avatar
Carine Rey committed
19
  tree_prefix: string;
20 21
}

22 23 24 25 26 27 28 29 30 31
type auto_t_plot = {
  tree_prefix:string ;
  model_prefix:string ;
  auto_t_plot: svg workflow;
}


type post_analyses_res = {
  t_choices : t_choices option;
  auto_t_plot_l : auto_t_plot list option;
Carine Rey's avatar
Carine Rey committed
32
  dataset_results_l : dataset_res list;
33 34
}

Carine Rey's avatar
Carine Rey committed
35 36 37
type simu_infos = {
  simu_infos: text_file workflow option ;
  model_prefix: string ;
38
  tree_prefix: string ;
39
}
Carine Rey's avatar
Carine Rey committed
40

Carine Rey's avatar
Carine Rey committed
41 42 43 44 45 46 47
type reinfered_tree = {
  reinfered_tree : nw workflow ;
  input_tree : nhx workflow ;
  tree_prefix : string ;
  model_prefix : string ;
}

Carine Rey's avatar
Carine Rey committed
48
type post_analyses_simu = {
Carine Rey's avatar
Carine Rey committed
49
  simu_infos_l : simu_infos list;
50
  simu_infos_plot : text_file workflow ;
Carine Rey's avatar
Carine Rey committed
51
  trees_plot : text_file workflow ;
52
}
Carine Rey's avatar
Carine Rey committed
53

Carine Rey's avatar
Carine Rey committed
54
let r_env = docker_image ~account:"carinerey" ~name:"r_basics" ~tag:"08012018" ()
Carine Rey's avatar
Carine Rey committed
55 56

let is_hyp ~hyp (dataset_results :dataset_res) =
Carine Rey's avatar
Carine Rey committed
57 58 59
  let model_prefix = dataset_results.model_prefix in
  model_prefix = hyp

60 61
let build_cmd_t_choices (opt_name : string) mr_option  =
  match mr_option with
Carine Rey's avatar
Carine Rey committed
62 63
  | Some x -> [opt opt_name dep x]
  | None -> []
64

Carine Rey's avatar
Carine Rey committed
65
let make_t_choices ~h0_mr ~h0_NeBig_mr ~h0_NeSmall_mr ~haPCOC_mr ~haPC_mr ~haPC_NeBig_mr ~haPC_NeSmall_mr ~h0_NeBigInSmall_mr
66
    ~h0_NeSmallInBig_mr ~haPC_NeBigInSmall_mr ~haPC_NeSmallInBig_mr () : post_analyses_dir directory workflow =
67
  let env = r_env in
Carine Rey's avatar
Carine Rey committed
68
  let out = dest // "out" in
69
  let cmd_mr = List.map [
Carine Rey's avatar
Carine Rey committed
70 71 72 73 74 75 76 77 78 79 80 81 82
      ("--H0"                , h0_mr          );
      ("--H0NeBig"           , h0_NeBig_mr    );
      ("--H0NeSmall"         , h0_NeSmall_mr  );
      ("--H0NeBigInSmall"    , h0_NeBigInSmall_mr  );
      ("--H0NeSmallInBig"    , h0_NeSmallInBig_mr  );
      ("--HaPCOC"            , haPCOC_mr      );
      ("--HaPC"              , haPC_mr        );
      ("--HaPCNeBig"         , haPC_NeBig_mr  );
      ("--HaPCNeSmall"       , haPC_NeSmall_mr);
      ("--HaPCNeBigInSmall"  , haPC_NeBigInSmall_mr  );
      ("--HaPCNeSmallInBig"  , haPC_NeSmallInBig_mr);
    ] ~f:(fun (opt_name, mr_option) -> build_cmd_t_choices opt_name mr_option)
               |> List.concat in
Carine Rey's avatar
Carine Rey committed
83
  workflow ~descr:"post_analyses.t_choices" [
84
    docker env (
Carine Rey's avatar
Carine Rey committed
85 86
      and_list [
        mkdir_p dest ;
87
        cmd "Rscript" ([
Carine Rey's avatar
Carine Rey committed
88 89 90 91 92
            [file_dump (string Scripts.calc_t_per_meth) ;
             opt "--out " ident out;
            ] ;
            cmd_mr ;
          ] |> List.concat) ;
93
      ])
Carine Rey's avatar
Carine Rey committed
94 95
  ]

Carine Rey's avatar
Carine Rey committed
96 97 98 99
let make_simu_infos ?(descr="") ?(fna_infos) ~faa ~tree_sc : text_file workflow =
  let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"07022018" () in
  workflow ~descr:("post_analyses.simu_infos." ^ descr) [
    cmd "python" ~env [
100 101 102 103 104
      file_dump (string Scripts.calc_simu_infos) ;
      opt "--faa" dep faa;
      opt "--tree" dep tree_sc;
      option ( opt "--fna_infos" dep) fna_infos;
      opt "--output " ident dest;
Carine Rey's avatar
Carine Rey committed
105 106 107
    ];
  ]

108
let group_simu_infos ~simu_infos_l : simu_infos directory workflow =
109
  let env = r_env in
110
  let cmd_cp_l = List.map simu_infos_l ~f:(fun s ->
111
      match s.simu_infos with
Carine Rey's avatar
Carine Rey committed
112
      | Some w -> [cmd "cp" [dep w ; tmp // (s.tree_prefix ^"@"^ s.model_prefix ^ ".tsv")]]
113 114 115 116 117
      | None -> []
    ) |> List.concat
  in
  let out = dest // "out" in
  workflow ~descr:"post_analyses.plot_simu_infos" [
118
    docker env (
119
      and_list ([
120 121 122 123 124 125 126 127 128 129 130
          [mkdir_p dest];
          [mkdir_p tmp ];
          cmd_cp_l;
          [cmd "Rscript" [
              file_dump (string Scripts.plot_hyp_simu_validation) ;
              opt "--input_dir" ident tmp;
              opt "--out " ident out;
            ];]
        ]
          |> List.concat)
    )
131 132
  ]

Carine Rey's avatar
Carine Rey committed
133 134 135
let plot_trees ~reinfered_tree_l : plot_trees directory workflow =
  let env = r_env in
  let cmd_cp_l = List.map reinfered_tree_l ~f:(fun rt -> [
Carine Rey's avatar
Carine Rey committed
136 137
        cmd "cp" [dep rt.reinfered_tree ; tmp // (rt.tree_prefix ^"@"^ rt.model_prefix ^ ".nw")];
        cmd "cp" [dep rt.input_tree ; tmp // (rt.tree_prefix ^"@input_tree.nw")]
Carine Rey's avatar
Carine Rey committed
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
      ]) |> List.concat
  in
  let out = dest // "out" in
  workflow ~descr:"post_analyses.plot_trees" [
    docker env (
      and_list ([
          [mkdir_p dest];
          [mkdir_p tmp ];
          cmd_cp_l;
          [cmd "Rscript" [
              file_dump (string Scripts.plot_trees) ;
              opt "--input_dir" ident tmp;
              opt "--out " ident out;
            ];]
        ] |> List.concat)
    )
  ]

156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
let get_merged_results_opt hx = match hx with
  | Some w -> Some w.merged_results
  | None -> None

type res_all_hyp = {
  h0_res            : dataset_res option ;
  h0_NeBig_res      : dataset_res option ;
  h0_NeSmall_res    : dataset_res option ;
  ha_PC_res         : dataset_res option ;
  ha_PCOC_res       : dataset_res option ;
  ha_PC_NeBig_res   : dataset_res  option ;
  ha_PC_NeSmall_res : dataset_res option ;
  h0_NeBigInSmall_res : dataset_res option ;
  h0_NeSmallInBig_res : dataset_res option ;
  ha_PC_NeBigInSmall_res : dataset_res option ;
  ha_PC_NeSmallInBig_res : dataset_res option ;
Carine Rey's avatar
Carine Rey committed
172
}
173 174 175 176

let make_t_choices_per_couple {h0_res; h0_NeBig_res; h0_NeSmall_res; ha_PC_res; ha_PCOC_res; ha_PC_NeBig_res ; ha_PC_NeSmall_res;
                               h0_NeBigInSmall_res; h0_NeSmallInBig_res; ha_PC_NeBigInSmall_res; ha_PC_NeSmallInBig_res} =

Carine Rey's avatar
Carine Rey committed
177 178 179 180 181
  let h0_mr           = get_merged_results_opt h0_res in
  let h0_NeBig_mr     = get_merged_results_opt h0_NeBig_res in
  let h0_NeSmall_mr   = get_merged_results_opt h0_NeSmall_res in
  let h0_NeBigInSmall_mr   = get_merged_results_opt h0_NeBigInSmall_res in
  let h0_NeSmallInBig_mr   = get_merged_results_opt h0_NeSmallInBig_res in
182

Carine Rey's avatar
Carine Rey committed
183
  let haPCOC_mr       = get_merged_results_opt ha_PCOC_res in
184

Carine Rey's avatar
Carine Rey committed
185 186 187 188 189
  let haPC_mr         = get_merged_results_opt ha_PC_res in
  let haPC_NeBig_mr   = get_merged_results_opt ha_PC_NeBig_res in
  let haPC_NeSmall_mr = get_merged_results_opt ha_PC_NeSmall_res in
  let haPC_NeBigInSmall_mr   = get_merged_results_opt ha_PC_NeBigInSmall_res in
  let haPC_NeSmallInBig_mr = get_merged_results_opt ha_PC_NeSmallInBig_res in
190

Carine Rey's avatar
Carine Rey committed
191
  make_t_choices ~h0_mr ~h0_NeBig_mr ~h0_NeSmall_mr ~haPCOC_mr ~haPC_mr ~haPC_NeBig_mr ~haPC_NeSmall_mr ~h0_NeBigInSmall_mr
192 193 194
    ~h0_NeSmallInBig_mr ~haPC_NeBigInSmall_mr ~haPC_NeSmallInBig_mr ()


Carine Rey's avatar
Carine Rey committed
195
let get_t_choices ~(dataset_results_l: dataset_res list) : t_choices option =
196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
  let h0_res                = List.find dataset_results_l (is_hyp ~hyp: "H0"               ) in
  let h0_NeBig_res          = List.find dataset_results_l (is_hyp ~hyp: "H0_NeBig"         ) in
  let h0_NeSmall_res        = List.find dataset_results_l (is_hyp ~hyp: "H0_NeSmall"       ) in
  let h0_NeBigInSmall_res = List.find dataset_results_l (is_hyp ~hyp: "H0_BigNeInSmallNe") in
  let h0_NeSmallInBig_res = List.find dataset_results_l (is_hyp ~hyp: "H0_SmallNeInBigNe") in

  let ha_PCOC_res           = List.find dataset_results_l (is_hyp ~hyp: "HaPCOC"           ) in

  let ha_PC_res             = List.find dataset_results_l (is_hyp ~hyp: "HaPC"             ) in
  let ha_PC_NeBig_res       = List.find dataset_results_l (is_hyp ~hyp: "HaPC_NeBig"       ) in
  let ha_PC_NeSmall_res     = List.find dataset_results_l (is_hyp ~hyp: "HaPC_NeSmall"     ) in
  let ha_PC_NeBigInSmall_res = List.find dataset_results_l (is_hyp ~hyp: "HaPC_BigNeInSmallNe") in
  let ha_PC_NeSmallInBig_res = List.find dataset_results_l (is_hyp ~hyp: "HaPC_SmallNeInBigNe") in

  match (h0_res, ha_PCOC_res) with
  | (Some h0, Some _) ->
    let t_choices_dir = make_t_choices_per_couple {h0_res; h0_NeBig_res; h0_NeSmall_res; ha_PC_res; ha_PCOC_res; ha_PC_NeBig_res ; ha_PC_NeSmall_res;
                                                   h0_NeBigInSmall_res; h0_NeSmallInBig_res; ha_PC_NeBigInSmall_res; ha_PC_NeSmallInBig_res}  in
Carine Rey's avatar
Carine Rey committed
214 215
    let t_choices_max = t_choices_dir / selector ["out.max_MCC_per_meth.tsv"] in
    let t_choices_recall09 = t_choices_dir / selector ["out.recall09_per_meth.tsv"] in
216 217
    let t_choices_complete = t_choices_dir / selector ["out.complete.tsv"] in
    let t_choices_plot = t_choices_dir / selector ["out.pdf"] in
Carine Rey's avatar
Carine Rey committed
218
    let t_choices_condensed_plot = t_choices_dir / selector ["out_condensed.pdf"] in
Carine Rey's avatar
Carine Rey committed
219
    let tree_prefix = h0.tree_prefix in
Carine Rey's avatar
Carine Rey committed
220
    Some {t_choices_max; t_choices_recall09; t_choices_complete ; t_choices_plot; t_choices_condensed_plot; tree_prefix}
221
  | _ -> None
Carine Rey's avatar
Carine Rey committed
222

223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243


let plot_det_meth_res_auto_t ~t_choices ~dataset_results_l =
  match t_choices with
  | None -> None
  | Some w -> Some (
      List.map dataset_results_l ~f:(fun (dataset_results : dataset_res) ->
          let tree_prefix = dataset_results.tree_prefix in
          let model_prefix = dataset_results.model_prefix in
          let ready_dataset = dataset_results.dataset.dataset in
          let tree = Tree_dataset.tree ready_dataset.tree_dataset `Detection in
          let faa = ready_dataset.faa in
          let tsv = dataset_results.merged_results in
          let res_by_tools = dataset_results.res_by_tools in
          let plot_all_sites = true in
          let t_choices = w.t_choices_max in
          let auto_t_plot = plot_merge_results ~t_choices ~plot_all_sites ~res_by_tools ~tree ~faa ~tsv () in
          {tree_prefix; model_prefix ; auto_t_plot}
        )
    )

Carine Rey's avatar
Carine Rey committed
244 245 246
let get_simu_infos ~(dataset:Dataset.t) =
  let model_prefix = dataset.model_prefix in
  let ready_dataset = dataset.dataset in
Carine Rey's avatar
Carine Rey committed
247 248 249 250
  let faa = ready_dataset.faa in
  let tree_sc = Tree_dataset.tree ready_dataset.tree_dataset `Detection in
  let fna_infos = ready_dataset.fna_infos in
  match fna_infos with
251 252
  | Some w -> Some (make_simu_infos ~descr:model_prefix ~faa ~tree_sc ~fna_infos:w)
  | None -> None (*make_simu_infos ~faa ~tree_sc*)
Carine Rey's avatar
Carine Rey committed
253 254


Carine Rey's avatar
Carine Rey committed
255
let post_analyses_res_of_dataset_results_l ~dataset_results_l =
Carine Rey's avatar
Carine Rey committed
256
  let t_choices = get_t_choices ~dataset_results_l in
257
  let auto_t_plot_l = plot_det_meth_res_auto_t ~t_choices ~dataset_results_l in
Carine Rey's avatar
Carine Rey committed
258
  {t_choices; auto_t_plot_l;dataset_results_l}
Carine Rey's avatar
Carine Rey committed
259 260 261 262 263 264

let post_analyses_simu_of_simu_dataset_l ~simu_dataset_l =
  let simu_infos_l = List.map simu_dataset_l ~f:(fun dataset ->
      {simu_infos = (get_simu_infos ~dataset);
       tree_prefix = dataset.tree_prefix ;
       model_prefix = dataset.model_prefix
265 266
      }
    ) in
Carine Rey's avatar
Carine Rey committed
267 268 269 270 271 272 273 274 275 276
  let reinfered_tree_l = List.map simu_dataset_l ~f:(fun dataset ->
      let rd = dataset.dataset in
      let phy = Bppsuite.fa2phy rd.fna in
      let input_tree = rd.input_tree in
      let reinfered_tree = Phyml.phyml_tree ~tree:input_tree phy in
      {reinfered_tree; input_tree;
       tree_prefix = dataset.tree_prefix ;
       model_prefix = dataset.model_prefix
      }
    ) in
277
  let simu_infos_plot = group_simu_infos ~simu_infos_l / selector ["out.pdf"] in
Carine Rey's avatar
Carine Rey committed
278 279
  let trees_plot = plot_trees ~reinfered_tree_l / selector ["out.pdf"] in
  {simu_infos_l; simu_infos_plot; trees_plot}
Carine Rey's avatar
Carine Rey committed
280

Carine Rey's avatar
Carine Rey committed
281 282


283
let plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile_prefix : sens_spe_t_choices_plot directory workflow =
284
  let env = r_env in
Carine Rey's avatar
Carine Rey committed
285 286 287 288
  let t_choices_dir = tmp // "t_choices_dir" in
  let merged_results_dir = tmp // "merged_results_dir" in
  let out = dest // "out" in
  let cmd_cp_t_choices_l = List.map t_choices_l ~f:(fun t_choices ->
Carine Rey's avatar
Carine Rey committed
289
      cmd "cp" [dep t_choices.t_choices_recall09 ; t_choices_dir // (t_choices.tree_prefix ^ ".tsv")]
Carine Rey's avatar
Carine Rey committed
290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307
    )
  in
  let cmd_cp_merged_results_l = List.map dataset_results_l ~f:(fun dataset_results ->
      cmd "cp" [dep dataset_results.merged_results ; merged_results_dir // (dataset_results.tree_prefix ^"@"^ dataset_results.model_prefix ^ ".tsv")]
    )
  in
  workflow ~descr:"post_analyses.plot_sens_spe_all_trees" [
    docker env (
      and_list ([
          [mkdir_p dest];
          [mkdir_p t_choices_dir ];
          [mkdir_p merged_results_dir ];
          cmd_cp_t_choices_l;
          cmd_cp_merged_results_l;
          [cmd "Rscript" [
              file_dump (string Scripts.plot_sens_spe_all_trees) ;
              opt "--input_dir" ident t_choices_dir;
              opt "--input_dir2" ident merged_results_dir;
308
              opt "--profil" string profile_prefix;
Carine Rey's avatar
Carine Rey committed
309 310 311 312 313 314 315
              opt "--out " ident out;
            ];]
        ]
          |> List.concat)
    )
  ]

316
let repo_post_analyses_all_trees_of_all_post_analyses_per_tree ~profile_prefix ~all_post_analyses_per_tree =
Carine Rey's avatar
Carine Rey committed
317 318 319 320 321 322 323 324 325
  let t_choices_l = List.map all_post_analyses_per_tree ~f:(fun post_analyses_res ->
      match post_analyses_res.t_choices with
      | Some w -> [w]
      | None -> []
    ) |> List.concat
  in
  let dataset_results_l =  List.map all_post_analyses_per_tree ~f:(fun post_analyses_res ->
      post_analyses_res.dataset_results_l) |> List.concat
  in
326
  let sens_spe_t_choices_plot = plot_sens_spe_t_choices  ~t_choices_l ~dataset_results_l ~profile_prefix in
Carine Rey's avatar
Carine Rey committed
327 328 329 330 331 332 333 334 335
  [Repo.[
      item ["sens_spe.tsv"] (sens_spe_t_choices_plot / selector ["out.sens_spe_auto_t.tsv"]);
      item ["all_t_choices.tsv"] (sens_spe_t_choices_plot / selector ["out.t_per_tree.tsv"]);
    ] |> Repo.shift "pdf_tsv" ;
   Repo.[
     item ["sens_spe.pdf"] (sens_spe_t_choices_plot / selector ["out.sens_spe_auto_t.pdf"]);
     item ["all_t_choices.pdf"] (sens_spe_t_choices_plot / selector ["out.t_per_tree.pdf"]);
   ]
  ] |> List.concat
Carine Rey's avatar
Carine Rey committed
336

Carine Rey's avatar
Carine Rey committed
337
let repo_of_post_analyses_simu ~post_analyses_simu =
Carine Rey's avatar
Carine Rey committed
338
  [
339
    Repo.[
Carine Rey's avatar
Carine Rey committed
340 341
      item ["hypothesis_validation.pdf"] post_analyses_simu.simu_infos_plot ;
      item ["trees_validation.pdf"] post_analyses_simu.trees_plot;
Carine Rey's avatar
Carine Rey committed
342
    ]
343
    ;
Carine Rey's avatar
Carine Rey committed
344
    (List.map post_analyses_simu.simu_infos_l ~f:(fun simu_infos ->
345 346 347 348
         match simu_infos.simu_infos with
         | None -> []
         | Some w ->
           Repo.[
Carine Rey's avatar
Carine Rey committed
349
             item [simu_infos.tree_prefix ^ "@" ^ simu_infos.model_prefix ^ ".tsv"] w
Carine Rey's avatar
Carine Rey committed
350 351
           ] |> Repo.shift simu_infos.tree_prefix
           |> Repo.shift "tsv"
352 353
       ) |> List.concat
    );
Carine Rey's avatar
Carine Rey committed
354
  ] |> List.concat
Carine Rey's avatar
Carine Rey committed
355
  |> Repo.shift "Simulation_details"
Carine Rey's avatar
Carine Rey committed
356 357

let repo_of_post_analyses_res ~prefix ~post_analyses_res =
358 359 360 361
  [
    (match post_analyses_res.t_choices with
     | None -> []
     | Some w ->
Carine Rey's avatar
Carine Rey committed
362 363 364 365 366 367 368 369 370 371 372
       [
         Repo.[
           item [prefix ^ ".t_choices.max_mcc_per_meth.tsv"] w.t_choices_max ;
           item [prefix ^ ".t_choices.recall09_per_meth.tsv"] w.t_choices_recall09 ;
           item [prefix ^ ".t_choices.complete.tsv"] w.t_choices_complete ;
         ] |> Repo.shift "pdf_tsv" ;
         Repo.[
           item [prefix ^ ".t_choices.pdf"] w.t_choices_plot ;
           item [prefix ^ ".t_choices.condensed.pdf"] w.t_choices_condensed_plot ;
         ]
       ] |> List.concat
373
    );
Carine Rey's avatar
Carine Rey committed
374
    (*(
375 376 377 378 379 380 381 382 383 384
      match post_analyses_res.auto_t_plot_l with
      | None -> []
      | Some w_l ->
        List.map w_l ~f:(fun w ->
            Repo.[
              let prefix_f = w.tree_prefix ^ "@" ^ w.model_prefix in
              item [ prefix_f ^ ".auto_t.svg"] w.auto_t_plot ;
            ]
          )|> List.concat
        |> Repo.shift "auto_t_pdf"
Carine Rey's avatar
Carine Rey committed
385
      );*)
386
  ] |> List.concat
Carine Rey's avatar
Carine Rey committed
387