convergence_detection.ml 6.13 KB
Newer Older
Carine Rey's avatar
Carine Rey committed
1
open Core
Carine Rey's avatar
Carine Rey committed
2 3 4 5
open Bistro.EDSL
open Bistro.Std
open File_formats
open Bistro_bioinfo.Std
Carine Rey's avatar
Carine Rey committed
6
open Defs
Carine Rey's avatar
Carine Rey committed
7 8 9 10 11 12 13 14

type pcoc_out
type diffsel_out

type det_out =
  | Pcoc_out
  | Diffsel_out

Carine Rey's avatar
Carine Rey committed
15
type det_result = {
16
  dataset : Dataset.t ;
Carine Rey's avatar
Carine Rey committed
17 18 19 20 21 22 23 24
  det_meth : det_meth ;
  det_result : det_out directory workflow ;
  }

type dataset_res = {
  model_prefix : string ;
  tree_prefix : string ;
  res_by_tools: det_result list ;
Carine Rey's avatar
Carine Rey committed
25 26
  merged_results : text_file workflow ;
  plot_merged_results : svg workflow ;
Carine Rey's avatar
Carine Rey committed
27 28
  }

Carine Rey's avatar
Carine Rey committed
29
let pcoc ?plot_complete ?gamma  ~(faa:aminoacid_fasta workflow) ~(tree:_ workflow) :  (*`pcoc TODO*) det_out directory workflow =
Carine Rey's avatar
Carine Rey committed
30
  let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"06212018" () in
Carine Rey's avatar
Carine Rey committed
31
  workflow ~descr:"convergence_detection.pcoc" [
Carine Rey's avatar
Carine Rey committed
32 33 34 35 36 37
    cmd "pcoc_det.py" ~env [
        opt "-t" dep tree;
        opt "-m" string "-";
        opt "-aa"  dep faa ;
        opt "-o"  ident dest ;
        option ( flag string "--gamma" ) gamma;
38
        option ( flag string "--plot --plot_complete_ali" ) plot_complete;
Carine Rey's avatar
Carine Rey committed
39
    ]
Carine Rey's avatar
Carine Rey committed
40 41 42 43
  ]


let pcoc_selector (run_pcoc:det_out directory workflow) : text_file workflow =
Carine Rey's avatar
Carine Rey committed
44 45 46 47
  let out_pcoc = (dep run_pcoc) // "RUN*/*.results.tsv" in
  workflow ~descr:"convergence_detection.selector_pcoc" [
    cmd "cp" [out_pcoc; ident dest] ;
  ]
Carine Rey's avatar
Carine Rey committed
48 49


Carine Rey's avatar
Carine Rey committed
50
let diffsel ~(phy_n:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:int) ~(n_cycles: int) :  (*`diffsel TODO*) det_out directory workflow =
Carine Rey's avatar
Carine Rey committed
51
  let env = docker_image ~account:"vlanore" ~name:"diffsel" ~tag:"v1.0" () in
Carine Rey's avatar
Carine Rey committed
52 53 54 55
  let tmp_tree = tmp // "myrun.tree" in
  let tmp_ali = tmp // "myrun.ali" in
  let dest_tree = dest // "myrun.tree" in
  let dest_ali = dest // "myrun.ali" in
Carine Rey's avatar
Carine Rey committed
56
  let chainname = dest // "myrun" in
Carine Rey's avatar
Carine Rey committed
57
  (*_build/diffsel -t data/samhd1.tree -d data/samhd1.ali -ncond 3 -x 1 10000 myrun*)
Carine Rey's avatar
Carine Rey committed
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
  workflow ~descr:"convergence_detection.run_diffsel" [
    docker env (
      and_list [
        mkdir_p dest;
        cmd "cp" [dep phy_n; dest_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep tree; dest_tree]; (* required dep to link the file in the env *)
        cmd "cp" [dep phy_n; tmp_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep tree; tmp_tree]; (* required dep to link the file in the env *)
        cmd "/diffsel/_build/diffsel" [
                   opt "-t" ident tmp_tree;
                   opt "-d" ident tmp_ali ;
                   opt "-ncond"  int 2 ;
                   opt "-x" seq [ int w_every; string " "; int n_cycles];
                   ident chainname ;
        ];
      ]
    )
  ]

let diffsel_selector run_diffsel : text_file workflow =
  let env = docker_image ~account:"vlanore" ~name:"diffsel" ~tag:"v1.0" () in
Carine Rey's avatar
Carine Rey committed
79 80
  let package = tmp // "diffsel_script_utils.py" in
  let script = tmp // "diffsel_analyze_result.py" in
Carine Rey's avatar
Carine Rey committed
81 82 83 84
  let tmp_tree = tmp // "myrun.tree" in
  let tmp_ali = tmp // "myrun.ali" in
  let dep_tree = (dep run_diffsel) // "myrun.tree" in
  let dep_ali = (dep run_diffsel) // "myrun.ali" in
Carine Rey's avatar
Carine Rey committed
85
  let chainname = (dep run_diffsel) // "myrun" in
Carine Rey's avatar
Carine Rey committed
86
  let out = dest in
Carine Rey's avatar
Carine Rey committed
87 88 89
  workflow ~descr:"convergence_detection.parse_diffsel" [
    docker env (
      and_list [
Carine Rey's avatar
Carine Rey committed
90 91 92
        mkdir_p tmp ;
        cd tmp ;
        cmd "ls" [dep run_diffsel];
Carine Rey's avatar
Carine Rey committed
93 94 95

        cmd "cp" [dep_ali; tmp_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep_tree; tmp_tree]; (* required dep to link the file in the env *)
Carine Rey's avatar
Carine Rey committed
96

Carine Rey's avatar
Carine Rey committed
97
        (*python diffsel_analyze_result.py [-r /path/to/readdiffsel] [-o output_file] chainname *)
Carine Rey's avatar
Carine Rey committed
98 99
        cmd "cp"  [ file_dump (string Scripts.diffsel_script_utils) ; package] ;
        cmd "cp" [ file_dump (string Scripts.diffsel_analyze_result) ; script] ;
Carine Rey's avatar
Carine Rey committed
100

Carine Rey's avatar
Carine Rey committed
101 102
        cmd "python" [
          string "diffsel_analyze_result.py" ;
Carine Rey's avatar
Carine Rey committed
103 104 105
          opt "-r" string "/diffsel/_build/readdiffsel" ;
          opt "-o" ident out ;
          ident chainname ;
Carine Rey's avatar
Carine Rey committed
106 107 108
        ]
      ]
    )
Carine Rey's avatar
Carine Rey committed
109
  ]
Carine Rey's avatar
Carine Rey committed
110 111 112


let merge_results ~res_by_tools : text_file workflow =
Carine Rey's avatar
Carine Rey committed
113
  let env = docker_image ~account:"carinerey" ~name:"ete3" ~tag:"3.0.0b35" () in
Carine Rey's avatar
Carine Rey committed
114 115 116
  let command = List.map res_by_tools ~f:(fun res ->
    let def_meth = res.det_meth in
    let w = match def_meth with
Carine Rey's avatar
Carine Rey committed
117 118 119
      | Pcoc -> pcoc_selector res.det_result
      | Pcoc_gamma -> pcoc_selector res.det_result
      | Diffsel -> diffsel_selector res.det_result
Carine Rey's avatar
Carine Rey committed
120 121 122 123 124 125 126 127 128 129
    in
    let opt = match def_meth with
      | Pcoc -> string "--pcoc"
      | Pcoc_gamma -> string "--pcoc_gamma"
      | Diffsel -> string "--diffsel"
    in
    seq ~sep:" " [opt; dep w]
  )
  in
  workflow ~descr:"convergence_detection.merge_results" [
Carine Rey's avatar
Carine Rey committed
130
    cmd "python" ~env [
Carine Rey's avatar
Carine Rey committed
131 132 133 134 135 136 137
            file_dump (string Scripts.merge_det_results) ;
            opt "-o" ident dest ;
            seq ~sep:" " command ;
          ] ;
  ]


Carine Rey's avatar
Carine Rey committed
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
let plot_merge_results ~res_by_tools ~tree ~faa ~tsv : svg workflow =
  let plot_all_sites = true in
  (*let env = docker_image ~account:"carinerey" ~name:"ete3" ~tag:"3.0.0b35" () in*)
  let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"06212018" () in
  (* use of pcoc env due to its working X server for dra plot with ete3 *)
  let meths = List.map res_by_tools ~f:(fun res ->
    let def_meth = res.det_meth in
    let opt = match def_meth with
      | Pcoc -> string "PCOC"
      | Pcoc_gamma -> string "PCOC_gamma"
      | Diffsel -> string "Diffsel"
    in
    opt
  ) |> seq ~sep:","
  in
  let package_diffsel_script_utils = tmp // "diffsel_script_utils.py" in
  let package_plot_data = tmp // "plot_data.py" in
  let script_plot_convergent_sites = tmp // "plot_convergent_sites.py" in
  let out = dest // "results.svg" in
  workflow ~descr:"convergence_detection.plot_results" [
    docker env (
      and_list [
        mkdir_p tmp ;
        mkdir_p dest ;
        cd tmp ;

        cmd "cp" [ file_dump (string Scripts.diffsel_script_utils) ; package_diffsel_script_utils ] ;
        cmd "cp" [ file_dump (string Scripts.plot_data) ; package_plot_data] ;
        cmd "cp" [ file_dump (string Scripts.plot_convergent_sites); script_plot_convergent_sites ] ;

        cmd "python" [
          string "plot_convergent_sites.py" ;
          opt "-msa" dep faa ;
          opt "-tsv" dep tsv ;
          opt "-tree" dep tree ;
          opt "-out" ident out ;
          opt "-meth" ident meths ;
          flag string "--all_sites" plot_all_sites ;
        ]
      ]
    )
  ]