convergence_detection.ml 5.46 KB
Newer Older
Carine Rey's avatar
Carine Rey committed
1
open Core
Carine Rey's avatar
Carine Rey committed
2 3 4 5
open Bistro.EDSL
open Bistro.Std
open File_formats
open Bistro_bioinfo.Std
Carine Rey's avatar
Carine Rey committed
6
open Defs
Carine Rey's avatar
Carine Rey committed
7

Philippe Veber's avatar
Philippe Veber committed
8 9 10
type result = [
  | `Pcoc of [`pcoc] directory workflow
  | `Pcoc_gamma of [`pcoc] directory workflow
Carine Rey's avatar
Carine Rey committed
11
  | `Pcoc_C60 of [`pcoc] directory workflow
Philippe Veber's avatar
Philippe Veber committed
12
  | `Diffsel of [`diffsel] directory workflow
Carine Rey's avatar
Carine Rey committed
13
  | `Diffsel_bis of [`diffsel] directory workflow
Carine Rey's avatar
Carine Rey committed
14 15
  | `Identical_LG of [`identical] directory workflow
  | `Identical_WAG of [`identical] directory workflow
16 17 18
  | `Topological_LG of [`topological] directory workflow
  | `Topological_WAG of [`topological] directory workflow
  | `Tdg09 of [`tdg09] directory workflow
19
  | `Multinomial of [`multinomial] directory workflow
Philippe Veber's avatar
Philippe Veber committed
20
]
Carine Rey's avatar
Carine Rey committed
21

Philippe Veber's avatar
Philippe Veber committed
22 23 24
let meth_string_of_result = function
  | `Pcoc _ -> "pcoc"
  | `Pcoc_gamma _ -> "pcoc_gamma"
Carine Rey's avatar
Carine Rey committed
25
  | `Pcoc_C60 _ -> "pcoc_gamma"
Philippe Veber's avatar
Philippe Veber committed
26
  | `Diffsel _ -> "diffsel"
Carine Rey's avatar
Carine Rey committed
27
  | `Diffsel_bis _ -> "diffsel_bis"
Carine Rey's avatar
Carine Rey committed
28 29
  | `Identical_LG _ -> "identical_LG"
  | `Identical_WAG _ -> "identical_WAG"
30 31
  | `Topological_LG _ -> "topological_LG"
  | `Topological_WAG _ -> "topological_WAG"
Carine Rey's avatar
typo  
Carine Rey committed
32
  | `Tdg09 _ -> "tdg09"
33
  | `Multinomial _ -> "multinomial"
Carine Rey's avatar
Carine Rey committed
34 35 36 37

type dataset_res = {
  model_prefix : string ;
  tree_prefix : string ;
Carine Rey's avatar
Carine Rey committed
38
  dataset : Dataset.t ;
Philippe Veber's avatar
Philippe Veber committed
39
  res_by_tools: result list ;
Carine Rey's avatar
Carine Rey committed
40 41
  merged_results : text_file workflow ;
  plot_merged_results : svg workflow ;
Philippe Veber's avatar
Philippe Veber committed
42
}
Carine Rey's avatar
Carine Rey committed
43

Carine Rey's avatar
Carine Rey committed
44
let merge_results ~res_by_tools : text_file workflow =
45
  let env = docker_image ~account:"carinerey" ~name:"python_basics" ~tag:"07182018" () in
Carine Rey's avatar
Carine Rey committed
46
  let command = List.map res_by_tools ~f:(fun res ->
Philippe Veber's avatar
Philippe Veber committed
47
      let w = match res with
Carine Rey's avatar
Carine Rey committed
48
        | `Pcoc d -> Pcoc.results d
Philippe Veber's avatar
Philippe Veber committed
49
        | `Pcoc_gamma d -> Pcoc.results d
Carine Rey's avatar
Carine Rey committed
50
        | `Pcoc_C60 d -> Pcoc.results d
Philippe Veber's avatar
Philippe Veber committed
51
        | `Diffsel d -> Diffsel.selector d
Carine Rey's avatar
Carine Rey committed
52
        | `Diffsel_bis d -> Diffsel.selector d
Carine Rey's avatar
Carine Rey committed
53 54
        | `Identical_LG d -> Identical.results d
        | `Identical_WAG d -> Identical.results d
55 56 57
        | `Topological_LG d -> Topological.results d
        | `Topological_WAG d -> Topological.results d
        | `Tdg09 d -> Tamuri.results d
58
        | `Multinomial d -> Multinomial.results d
Philippe Veber's avatar
Philippe Veber committed
59 60 61 62
      in
      let opt = match res with
        | `Pcoc _ -> string "--pcoc"
        | `Pcoc_gamma _ -> string "--pcoc_gamma"
Carine Rey's avatar
Carine Rey committed
63
        | `Pcoc_C60 _ -> string "--pcoc_C60"
Philippe Veber's avatar
Philippe Veber committed
64
        | `Diffsel _ -> string "--diffsel"
Carine Rey's avatar
Carine Rey committed
65
        | `Diffsel_bis _ -> string "--diffsel_bis"
Carine Rey's avatar
Carine Rey committed
66 67
        | `Identical_LG _ -> string "--identical_LG"
        | `Identical_WAG _ -> string "--identical_WAG"
68 69 70
        | `Topological_LG _ -> string "--topological_LG"
        | `Topological_WAG _ -> string "--topological_WAG"
        | `Tdg09 _ -> string "--tdg09"
71
        | `Multinomial _ -> string "--multinomial"
Philippe Veber's avatar
Philippe Veber committed
72 73 74
      in
      seq ~sep:" " [opt; dep w]
    )
Carine Rey's avatar
Carine Rey committed
75 76
  in
  workflow ~descr:"convergence_detection.merge_results" [
Carine Rey's avatar
Carine Rey committed
77
    cmd "python" ~env [
78 79 80 81
      file_dump (string Scripts.merge_det_results) ;
      opt "-o" ident dest ;
      seq ~sep:" " command ;
    ] ;
Carine Rey's avatar
Carine Rey committed
82 83
  ]

84
let plot_merge_results ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv : svg workflow =
Carine Rey's avatar
Carine Rey committed
85 86 87
  let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"06212018" () in
  (* use of pcoc env due to its working X server for dra plot with ete3 *)
  let meths = List.map res_by_tools ~f:(fun res ->
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
      let opt = match res with
        | `Pcoc _ -> "PCOC,PC,OC"
        | `Pcoc_gamma _ -> "PCOC_gamma,PC_gamma,OC_gamma,"
        | `Pcoc_C60 _ -> "PCOC_C60,PC_C60,OC_C60,"
        | `Diffsel _ -> "Diffsel_mean,Diffsel_max"
        | `Diffsel_bis _ -> "Diffsel_bis_mean,Diffsel_bis_max"
        | `Identical_LG _ -> "Identical_LG08"
        | `Identical_WAG _ -> "Identical_WAG01"
        | `Topological_LG _ -> "Topological_LG08"
        | `Topological_WAG _ -> "Topological_WAG01"
        | `Tdg09 _ -> "Tdg09_1-FDR,Tdg09_prob_post"
        | `Multinomial _ -> "Mutinomial_LRT"
      in
      string opt
    ) |> seq ~sep:","
Carine Rey's avatar
Carine Rey committed
103
  in
104
  let meths_t = List.map res_by_tools ~f:(fun res ->
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
      let opt = match res with
        | `Pcoc _ -> "PCOC:0.99,PC:0.99,OC:0.99"
        | `Pcoc_gamma _ -> "PCOC_gamma:0.99,PC_gamma:0.99,OC_gamma:0.99"
        | `Pcoc_C60 _ -> "PCOC_C60:0.99,PC_C60:0.99,OC_C60:0.99"
        | `Diffsel _ -> "Diffsel_mean:0.11,Diffsel_max:0.9"
        | `Diffsel_bis _ -> "Diffsel_bis_mean:0.11,Diffsel_bis_max:0.9"
        | `Identical_LG _ -> "Identical_LG08:0.9"
        | `Identical_WAG _ -> "Identical_WAG01:0.9"
        | `Topological_LG _ -> "Topological_LG08:0.9"
        | `Topological_WAG _ -> "Topological_WAG01:0.9"
        | `Tdg09 _ -> "Tdg09_1-FDR:0.9,Tdg09_prob_post:0.9"
        | `Multinomial _ -> "Mutinomial_LRT:0.9"
      in
      string opt
    ) |> seq ~sep:","
120
  in
Carine Rey's avatar
Carine Rey committed
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
  let package_diffsel_script_utils = tmp // "diffsel_script_utils.py" in
  let package_plot_data = tmp // "plot_data.py" in
  let script_plot_convergent_sites = tmp // "plot_convergent_sites.py" in
  let out = dest // "results.svg" in
  workflow ~descr:"convergence_detection.plot_results" [
    docker env (
      and_list [
        mkdir_p tmp ;
        mkdir_p dest ;
        cd tmp ;

        cmd "cp" [ file_dump (string Scripts.diffsel_script_utils) ; package_diffsel_script_utils ] ;
        cmd "cp" [ file_dump (string Scripts.plot_data) ; package_plot_data] ;
        cmd "cp" [ file_dump (string Scripts.plot_convergent_sites); script_plot_convergent_sites ] ;

        cmd "python" [
          string "plot_convergent_sites.py" ;
          opt "-msa" dep faa ;
          opt "-tsv" dep tsv ;
          opt "-tree" dep tree ;
          opt "-out" ident out ;
          opt "-meth" ident meths ;
143
          opt "-t" ident meths_t ;
Carine Rey's avatar
Carine Rey committed
144 145 146 147 148
          flag string "--all_sites" plot_all_sites ;
        ]
      ]
    )
  ]