convergence_detection.ml 5.32 KB
Newer Older
Carine Rey's avatar
Carine Rey committed
1
open Core
Carine Rey's avatar
Carine Rey committed
2 3 4 5
open Bistro.EDSL
open Bistro.Std
open File_formats
open Bistro_bioinfo.Std
Carine Rey's avatar
Carine Rey committed
6
open Defs
Carine Rey's avatar
Carine Rey committed
7

Philippe Veber's avatar
Philippe Veber committed
8 9 10 11 12
type result = [
  | `Pcoc of [`pcoc] directory workflow
  | `Pcoc_gamma of [`pcoc] directory workflow
  | `Diffsel of [`diffsel] directory workflow
]
Carine Rey's avatar
Carine Rey committed
13

Philippe Veber's avatar
Philippe Veber committed
14 15 16 17
let meth_string_of_result = function
  | `Pcoc _ -> "pcoc"
  | `Pcoc_gamma _ -> "pcoc_gamma"
  | `Diffsel _ -> "diffsel"
Carine Rey's avatar
Carine Rey committed
18 19 20 21

type dataset_res = {
  model_prefix : string ;
  tree_prefix : string ;
Philippe Veber's avatar
Philippe Veber committed
22
  res_by_tools: result list ;
Carine Rey's avatar
Carine Rey committed
23 24
  merged_results : text_file workflow ;
  plot_merged_results : svg workflow ;
Philippe Veber's avatar
Philippe Veber committed
25
}
Carine Rey's avatar
Carine Rey committed
26

Carine Rey's avatar
Carine Rey committed
27

Philippe Veber's avatar
Philippe Veber committed
28
let diffsel ~(phy_n:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:int) ~(n_cycles: int) : [`diffsel] directory workflow =
Carine Rey's avatar
Carine Rey committed
29
  let env = docker_image ~account:"vlanore" ~name:"diffsel" ~tag:"v1.0" () in
Carine Rey's avatar
Carine Rey committed
30 31 32 33
  let tmp_tree = tmp // "myrun.tree" in
  let tmp_ali = tmp // "myrun.ali" in
  let dest_tree = dest // "myrun.tree" in
  let dest_ali = dest // "myrun.ali" in
Carine Rey's avatar
Carine Rey committed
34
  let chainname = dest // "myrun" in
Carine Rey's avatar
Carine Rey committed
35
  (*_build/diffsel -t data/samhd1.tree -d data/samhd1.ali -ncond 3 -x 1 10000 myrun*)
Carine Rey's avatar
Carine Rey committed
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
  workflow ~descr:"convergence_detection.run_diffsel" [
    docker env (
      and_list [
        mkdir_p dest;
        cmd "cp" [dep phy_n; dest_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep tree; dest_tree]; (* required dep to link the file in the env *)
        cmd "cp" [dep phy_n; tmp_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep tree; tmp_tree]; (* required dep to link the file in the env *)
        cmd "/diffsel/_build/diffsel" [
                   opt "-t" ident tmp_tree;
                   opt "-d" ident tmp_ali ;
                   opt "-ncond"  int 2 ;
                   opt "-x" seq [ int w_every; string " "; int n_cycles];
                   ident chainname ;
        ];
      ]
    )
  ]

let diffsel_selector run_diffsel : text_file workflow =
  let env = docker_image ~account:"vlanore" ~name:"diffsel" ~tag:"v1.0" () in
Carine Rey's avatar
Carine Rey committed
57 58
  let package = tmp // "diffsel_script_utils.py" in
  let script = tmp // "diffsel_analyze_result.py" in
Carine Rey's avatar
Carine Rey committed
59 60 61 62
  let tmp_tree = tmp // "myrun.tree" in
  let tmp_ali = tmp // "myrun.ali" in
  let dep_tree = (dep run_diffsel) // "myrun.tree" in
  let dep_ali = (dep run_diffsel) // "myrun.ali" in
Carine Rey's avatar
Carine Rey committed
63
  let chainname = (dep run_diffsel) // "myrun" in
Carine Rey's avatar
Carine Rey committed
64
  let out = dest in
Carine Rey's avatar
Carine Rey committed
65 66 67
  workflow ~descr:"convergence_detection.parse_diffsel" [
    docker env (
      and_list [
Carine Rey's avatar
Carine Rey committed
68 69 70
        mkdir_p tmp ;
        cd tmp ;
        cmd "ls" [dep run_diffsel];
Carine Rey's avatar
Carine Rey committed
71 72 73

        cmd "cp" [dep_ali; tmp_ali]; (* required dep to link the file in the env *)
        cmd "cp" [dep_tree; tmp_tree]; (* required dep to link the file in the env *)
Carine Rey's avatar
Carine Rey committed
74

Carine Rey's avatar
Carine Rey committed
75
        (*python diffsel_analyze_result.py [-r /path/to/readdiffsel] [-o output_file] chainname *)
Carine Rey's avatar
Carine Rey committed
76 77
        cmd "cp"  [ file_dump (string Scripts.diffsel_script_utils) ; package] ;
        cmd "cp" [ file_dump (string Scripts.diffsel_analyze_result) ; script] ;
Carine Rey's avatar
Carine Rey committed
78

Carine Rey's avatar
Carine Rey committed
79 80
        cmd "python" [
          string "diffsel_analyze_result.py" ;
Carine Rey's avatar
Carine Rey committed
81 82 83
          opt "-r" string "/diffsel/_build/readdiffsel" ;
          opt "-o" ident out ;
          ident chainname ;
Carine Rey's avatar
Carine Rey committed
84 85 86
        ]
      ]
    )
Carine Rey's avatar
Carine Rey committed
87
  ]
Carine Rey's avatar
Carine Rey committed
88 89 90


let merge_results ~res_by_tools : text_file workflow =
Carine Rey's avatar
Carine Rey committed
91
  let env = docker_image ~account:"carinerey" ~name:"ete3" ~tag:"3.0.0b35" () in
Carine Rey's avatar
Carine Rey committed
92
  let command = List.map res_by_tools ~f:(fun res ->
Philippe Veber's avatar
Philippe Veber committed
93 94 95 96 97 98 99 100 101 102 103 104
      let w = match res with
        | `Pcoc d
        | `Pcoc_gamma d -> Pcoc.results d
        | `Diffsel d -> diffsel_selector d
      in
      let opt = match res with
        | `Pcoc _ -> string "--pcoc"
        | `Pcoc_gamma _ -> string "--pcoc_gamma"
        | `Diffsel _ -> string "--diffsel"
      in
      seq ~sep:" " [opt; dep w]
    )
Carine Rey's avatar
Carine Rey committed
105 106
  in
  workflow ~descr:"convergence_detection.merge_results" [
Carine Rey's avatar
Carine Rey committed
107
    cmd "python" ~env [
Carine Rey's avatar
Carine Rey committed
108 109 110 111 112 113 114
            file_dump (string Scripts.merge_det_results) ;
            opt "-o" ident dest ;
            seq ~sep:" " command ;
          ] ;
  ]


Carine Rey's avatar
Carine Rey committed
115 116 117 118 119 120
let plot_merge_results ~res_by_tools ~tree ~faa ~tsv : svg workflow =
  let plot_all_sites = true in
  (*let env = docker_image ~account:"carinerey" ~name:"ete3" ~tag:"3.0.0b35" () in*)
  let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"06212018" () in
  (* use of pcoc env due to its working X server for dra plot with ete3 *)
  let meths = List.map res_by_tools ~f:(fun res ->
Philippe Veber's avatar
Philippe Veber committed
121 122 123 124
    let opt = match res with
      | `Pcoc _ -> "PCOC"
      | `Pcoc_gamma _ -> "PCOC_gamma"
      | `Diffsel _ -> "Diffsel"
Carine Rey's avatar
Carine Rey committed
125
    in
Philippe Veber's avatar
Philippe Veber committed
126
    string opt
Carine Rey's avatar
Carine Rey committed
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
  ) |> seq ~sep:","
  in
  let package_diffsel_script_utils = tmp // "diffsel_script_utils.py" in
  let package_plot_data = tmp // "plot_data.py" in
  let script_plot_convergent_sites = tmp // "plot_convergent_sites.py" in
  let out = dest // "results.svg" in
  workflow ~descr:"convergence_detection.plot_results" [
    docker env (
      and_list [
        mkdir_p tmp ;
        mkdir_p dest ;
        cd tmp ;

        cmd "cp" [ file_dump (string Scripts.diffsel_script_utils) ; package_diffsel_script_utils ] ;
        cmd "cp" [ file_dump (string Scripts.plot_data) ; package_plot_data] ;
        cmd "cp" [ file_dump (string Scripts.plot_convergent_sites); script_plot_convergent_sites ] ;

        cmd "python" [
          string "plot_convergent_sites.py" ;
          opt "-msa" dep faa ;
          opt "-tsv" dep tsv ;
          opt "-tree" dep tree ;
          opt "-out" ident out ;
          opt "-meth" ident meths ;
          flag string "--all_sites" plot_all_sites ;
        ]
      ]
    )
  ]