convergence_detection.ml 5.74 KB
Newer Older
Carine Rey's avatar
Carine Rey committed
1
open Core
Carine Rey's avatar
Carine Rey committed
2 3 4 5
open Bistro.EDSL
open Bistro.Std
open File_formats
open Bistro_bioinfo.Std
Carine Rey's avatar
Carine Rey committed
6
open Defs
Carine Rey's avatar
Carine Rey committed
7

Philippe Veber's avatar
Philippe Veber committed
8 9 10
type result = [
  | `Pcoc of [`pcoc] directory workflow
  | `Pcoc_gamma of [`pcoc] directory workflow
Carine Rey's avatar
Carine Rey committed
11
  | `Pcoc_C60 of [`pcoc] directory workflow
Philippe Veber's avatar
Philippe Veber committed
12
  | `Diffsel of [`diffsel] directory workflow
Carine Rey's avatar
Carine Rey committed
13 14
  | `Identical_LG of [`identical] directory workflow
  | `Identical_WAG of [`identical] directory workflow
15 16 17
  | `Topological_LG of [`topological] directory workflow
  | `Topological_WAG of [`topological] directory workflow
  | `Tdg09 of [`tdg09] directory workflow
18
  | `Multinomial of [`multinomial] directory workflow
19
  | `Msd of [`msd] directory workflow * float
Philippe Veber's avatar
Philippe Veber committed
20
]
Carine Rey's avatar
Carine Rey committed
21

Philippe Veber's avatar
Philippe Veber committed
22 23 24
let meth_string_of_result = function
  | `Pcoc _ -> "pcoc"
  | `Pcoc_gamma _ -> "pcoc_gamma"
Carine Rey's avatar
Carine Rey committed
25
  | `Pcoc_C60 _ -> "pcoc_gamma"
Philippe Veber's avatar
Philippe Veber committed
26
  | `Diffsel _ -> "diffsel"
Carine Rey's avatar
Carine Rey committed
27 28
  | `Identical_LG _ -> "identical_LG"
  | `Identical_WAG _ -> "identical_WAG"
29 30
  | `Topological_LG _ -> "topological_LG"
  | `Topological_WAG _ -> "topological_WAG"
Carine Rey's avatar
typo  
Carine Rey committed
31
  | `Tdg09 _ -> "tdg09"
32
  | `Multinomial _ -> "multinomial"
33
  | `Msd (_, e) -> sprintf "msd_%f" e
Carine Rey's avatar
Carine Rey committed
34 35 36 37

type dataset_res = {
  model_prefix : string ;
  tree_prefix : string ;
Carine Rey's avatar
Carine Rey committed
38
  dataset : Dataset.t ;
Philippe Veber's avatar
Philippe Veber committed
39
  res_by_tools: result list ;
Carine Rey's avatar
Carine Rey committed
40 41
  merged_results : text_file workflow ;
  plot_merged_results : svg workflow ;
Philippe Veber's avatar
Philippe Veber committed
42
}
Carine Rey's avatar
Carine Rey committed
43

Carine Rey's avatar
Carine Rey committed
44
let merge_results ?fna_infos ~res_by_tools () : text_file workflow =
45
  let env = docker_image ~account:"carinerey" ~name:"python_basics" ~tag:"07252018" () in
Carine Rey's avatar
Carine Rey committed
46 47 48 49
  let fna_infos = match fna_infos with
    | Some (sw) -> sw
    | _ -> None
  in
Carine Rey's avatar
Carine Rey committed
50
  let command = List.map res_by_tools ~f:(fun res ->
Philippe Veber's avatar
Philippe Veber committed
51
      let w = match res with
Carine Rey's avatar
Carine Rey committed
52
        | `Pcoc d -> Pcoc.results d
Philippe Veber's avatar
Philippe Veber committed
53
        | `Pcoc_gamma d -> Pcoc.results d
Carine Rey's avatar
Carine Rey committed
54
        | `Pcoc_C60 d -> Pcoc.results d
Philippe Veber's avatar
Philippe Veber committed
55
        | `Diffsel d -> Diffsel.selector d
Carine Rey's avatar
Carine Rey committed
56 57
        | `Identical_LG d -> Identical.results d
        | `Identical_WAG d -> Identical.results d
58 59 60
        | `Topological_LG d -> Topological.results d
        | `Topological_WAG d -> Topological.results d
        | `Tdg09 d -> Tamuri.results d
61
        | `Multinomial d -> Multinomial.results d
62
        | `Msd (d, e) -> Msd.results d
Philippe Veber's avatar
Philippe Veber committed
63 64 65 66
      in
      let opt = match res with
        | `Pcoc _ -> string "--pcoc"
        | `Pcoc_gamma _ -> string "--pcoc_gamma"
Carine Rey's avatar
Carine Rey committed
67
        | `Pcoc_C60 _ -> string "--pcoc_C60"
Philippe Veber's avatar
Philippe Veber committed
68
        | `Diffsel _ -> string "--diffsel"
Carine Rey's avatar
Carine Rey committed
69 70
        | `Identical_LG _ -> string "--identical_LG"
        | `Identical_WAG _ -> string "--identical_WAG"
71 72 73
        | `Topological_LG _ -> string "--topological_LG"
        | `Topological_WAG _ -> string "--topological_WAG"
        | `Tdg09 _ -> string "--tdg09"
74
        | `Multinomial _ -> string "--multinomial"
Carine Rey's avatar
Carine Rey committed
75
        | `Msd (w, e) -> string (sprintf "--msd %f" e)
Philippe Veber's avatar
Philippe Veber committed
76 77 78
      in
      seq ~sep:" " [opt; dep w]
    )
Carine Rey's avatar
Carine Rey committed
79 80
  in
  workflow ~descr:"convergence_detection.merge_results" [
Carine Rey's avatar
Carine Rey committed
81
    cmd "python" ~env [
82 83 84
      file_dump (string Scripts.merge_det_results) ;
      opt "-o" ident dest ;
      seq ~sep:" " command ;
Carine Rey's avatar
Carine Rey committed
85
      option( opt "--fna_infos" dep ) fna_infos;
86
    ] ;
Carine Rey's avatar
Carine Rey committed
87 88
  ]

89
let plot_merge_results ?t_choices ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv (): svg workflow =
Carine Rey's avatar
Carine Rey committed
90 91 92
  let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"06212018" () in
  (* use of pcoc env due to its working X server for dra plot with ete3 *)
  let meths = List.map res_by_tools ~f:(fun res ->
93 94 95 96 97 98 99 100 101
      let opt = match res with
        | `Pcoc _ -> "PCOC,PC,OC"
        | `Pcoc_gamma _ -> "PCOC_gamma,PC_gamma,OC_gamma,"
        | `Pcoc_C60 _ -> "PCOC_C60,PC_C60,OC_C60,"
        | `Diffsel _ -> "Diffsel_mean,Diffsel_max"
        | `Identical_LG _ -> "Identical_LG08"
        | `Identical_WAG _ -> "Identical_WAG01"
        | `Topological_LG _ -> "Topological_LG08"
        | `Topological_WAG _ -> "Topological_WAG01"
Carine Rey's avatar
Carine Rey committed
102 103
        | `Tdg09 _ -> "Tdg09_1MinusFDR,Tdg09_prob_post"
        | `Multinomial _ -> "Mutinomial_1MinusLRT"
104
        | `Msd _ -> "Msd_1MinusP"
105 106 107
      in
      string opt
    ) |> seq ~sep:","
Carine Rey's avatar
Carine Rey committed
108
  in
109
  let default_t = List.map res_by_tools ~f:(fun res ->
110 111 112 113 114 115 116 117 118
      let opt = match res with
        | `Pcoc _ -> "PCOC:0.99,PC:0.99,OC:0.99"
        | `Pcoc_gamma _ -> "PCOC_gamma:0.99,PC_gamma:0.99,OC_gamma:0.99"
        | `Pcoc_C60 _ -> "PCOC_C60:0.99,PC_C60:0.99,OC_C60:0.99"
        | `Diffsel _ -> "Diffsel_mean:0.11,Diffsel_max:0.9"
        | `Identical_LG _ -> "Identical_LG08:0.9"
        | `Identical_WAG _ -> "Identical_WAG01:0.9"
        | `Topological_LG _ -> "Topological_LG08:0.9"
        | `Topological_WAG _ -> "Topological_WAG01:0.9"
Carine Rey's avatar
Carine Rey committed
119 120
        | `Tdg09 _ -> "Tdg09_1MinusFDR:0.99,Tdg09_prob_post:0.99"
        | `Multinomial _ -> "Mutinomial_1MinusLRT:0.95"
121
        | `Msd _ -> "Msd_1MinusP:0.95"
122 123 124
      in
      string opt
    ) |> seq ~sep:","
125
  in
126 127 128 129
  let meths_t = match t_choices with
    | Some _ -> None
    | None -> Some default_t
  in
Carine Rey's avatar
Carine Rey committed
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
  let package_diffsel_script_utils = tmp // "diffsel_script_utils.py" in
  let package_plot_data = tmp // "plot_data.py" in
  let script_plot_convergent_sites = tmp // "plot_convergent_sites.py" in
  let out = dest // "results.svg" in
  workflow ~descr:"convergence_detection.plot_results" [
    docker env (
      and_list [
        mkdir_p tmp ;
        mkdir_p dest ;
        cd tmp ;

        cmd "cp" [ file_dump (string Scripts.diffsel_script_utils) ; package_diffsel_script_utils ] ;
        cmd "cp" [ file_dump (string Scripts.plot_data) ; package_plot_data] ;
        cmd "cp" [ file_dump (string Scripts.plot_convergent_sites); script_plot_convergent_sites ] ;

        cmd "python" [
          string "plot_convergent_sites.py" ;
          opt "-msa" dep faa ;
          opt "-tsv" dep tsv ;
          opt "-tree" dep tree ;
          opt "-out" ident out ;
          opt "-meth" ident meths ;
152 153
          option (opt "-t" ident) meths_t ;
          option (opt "--t_tsv" dep) t_choices ;
Carine Rey's avatar
Carine Rey committed
154 155 156 157
          flag string "--all_sites" plot_all_sites ;
        ]
      ]
    )
158
  ] / selector ["results.svg"]