convergence_detection.ml 5.14 KB
Newer Older
Carine Rey's avatar
Carine Rey committed
1
open Core
Carine Rey's avatar
Carine Rey committed
2 3 4 5
open Bistro.EDSL
open Bistro.Std
open File_formats
open Bistro_bioinfo.Std
Carine Rey's avatar
Carine Rey committed
6
open Defs
Carine Rey's avatar
Carine Rey committed
7 8 9 10 11 12 13 14

type pcoc_out
type diffsel_out

type det_out =
  | Pcoc_out
  | Diffsel_out

Carine Rey's avatar
Carine Rey committed
15 16 17 18 19 20 21 22 23 24
type det_result = {
  dataset : dataset ;
  det_meth : det_meth ;
  det_result : det_out directory workflow ;
  }

type dataset_res = {
  model_prefix : string ;
  tree_prefix : string ;
  res_by_tools: det_result list ;
Carine Rey's avatar
Carine Rey committed
25 26
  merged_results : text_file workflow ;
  plot_merged_results : svg workflow ;
Carine Rey's avatar
Carine Rey committed
27 28
  }

Carine Rey's avatar
Carine Rey committed
29
let pcoc ?plot_complete ?gamma  ~(faa:aminoacid_fasta workflow) ~(tree:_ workflow) :  (*`pcoc TODO*) det_out directory workflow =
Carine Rey's avatar
Carine Rey committed
30
  let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"06212018" () in
Carine Rey's avatar
Carine Rey committed
31
  let run_pcoc = workflow ~descr:"convergence_detection.pcoc" [
Carine Rey's avatar
Carine Rey committed
32 33 34 35 36 37
    cmd "pcoc_det.py" ~env [
        opt "-t" dep tree;
        opt "-m" string "-";
        opt "-aa"  dep faa ;
        opt "-o"  ident dest ;
        option ( flag string "--gamma" ) gamma;
38
        option ( flag string "--plot --plot_complete_ali" ) plot_complete;
Carine Rey's avatar
Carine Rey committed
39
    ]
Carine Rey's avatar
Carine Rey committed
40 41 42 43 44
  ] (* TODO / selector ["RUN*/*.results.tsv"] *) in
  let out_pcoc = (dep run_pcoc) // "RUN*/*.results.tsv" in
  workflow ~descr:"convergence_detection.selector_pcoc" [
    cmd "cp" [out_pcoc; ident dest] ;
  ]
Carine Rey's avatar
Carine Rey committed
45 46


Carine Rey's avatar
Carine Rey committed
47
let diffsel ~(phy_n:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:int) ~(n_cycles: int) :  (*`diffsel TODO*) det_out directory workflow =
Carine Rey's avatar
Carine Rey committed
48
  let env = docker_image ~account:"vlanore" ~name:"diffsel" ~tag:"v1.0" () in
Carine Rey's avatar
Carine Rey committed
49
  let chainname = dest // "myrun" in
Carine Rey's avatar
Carine Rey committed
50
  (*_build/diffsel -t data/samhd1.tree -d data/samhd1.ali -ncond 3 -x 1 10000 myrun*)
Carine Rey's avatar
Carine Rey committed
51
  let run_diffsel = workflow ~descr:"convergence_detection.run_diffsel" [
Carine Rey's avatar
Carine Rey committed
52 53 54 55 56 57
    mkdir_p dest;
    cmd "_build/diffsel" ~env [
        opt "-t" dep tree;
        opt "-d"  dep phy_n ;
        opt "-ncond"  int 2 ;
        opt "-x" seq [ int w_every; string " "; int n_cycles];
Carine Rey's avatar
Carine Rey committed
58
        ident chainname ;
Carine Rey's avatar
Carine Rey committed
59
    ];
Carine Rey's avatar
Carine Rey committed
60 61 62
  ] in
  let package = tmp // "diffsel_script_utils.py" in
  let script = tmp // "diffsel_analyze_result.py" in
Carine Rey's avatar
Carine Rey committed
63
  let chainname = (dep run_diffsel) // "myrun" in
Carine Rey's avatar
Carine Rey committed
64
  let out = dest in
Carine Rey's avatar
Carine Rey committed
65 66 67
  workflow ~descr:"convergence_detection.parse_diffsel" [
    docker env (
      and_list [
Carine Rey's avatar
Carine Rey committed
68 69 70 71 72 73
        mkdir_p tmp ;
        cd tmp ;
        cmd "ls" [dep run_diffsel];
        cmd "ls" [dep phy_n]; (* required dep to link the file in the env *)
        cmd "ls" [dep tree]; (* required dep to link the file in the env *)

Carine Rey's avatar
Carine Rey committed
74
        (*python diffsel_analyze_result.py [-r /path/to/readdiffsel] [-o output_file] chainname *)
Carine Rey's avatar
Carine Rey committed
75 76
        cmd "cp"  [ file_dump (string Scripts.diffsel_script_utils) ; package] ;
        cmd "cp" [ file_dump (string Scripts.diffsel_analyze_result) ; script] ;
Carine Rey's avatar
Carine Rey committed
77

Carine Rey's avatar
Carine Rey committed
78 79
        cmd "python" [
          string "diffsel_analyze_result.py" ;
Carine Rey's avatar
Carine Rey committed
80 81 82
          opt "-r" string "/diffsel/_build/readdiffsel" ;
          opt "-o" ident out ;
          ident chainname ;
Carine Rey's avatar
Carine Rey committed
83 84 85
        ]
      ]
    )
Carine Rey's avatar
Carine Rey committed
86
  ]
Carine Rey's avatar
Carine Rey committed
87 88 89


let merge_results ~res_by_tools : text_file workflow =
Carine Rey's avatar
Carine Rey committed
90
  let env = docker_image ~account:"carinerey" ~name:"ete3" ~tag:"3.0.0b35" () in
Carine Rey's avatar
Carine Rey committed
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
  let command = List.map res_by_tools ~f:(fun res ->
    let def_meth = res.det_meth in
    let w = match def_meth with
      | Pcoc -> res.det_result
      | Pcoc_gamma -> res.det_result
      | Diffsel -> res.det_result
    in
    let opt = match def_meth with
      | Pcoc -> string "--pcoc"
      | Pcoc_gamma -> string "--pcoc_gamma"
      | Diffsel -> string "--diffsel"
    in
    seq ~sep:" " [opt; dep w]
  )
  in
  workflow ~descr:"convergence_detection.merge_results" [
Carine Rey's avatar
Carine Rey committed
107
    cmd "python" ~env [
Carine Rey's avatar
Carine Rey committed
108 109 110 111 112 113 114
            file_dump (string Scripts.merge_det_results) ;
            opt "-o" ident dest ;
            seq ~sep:" " command ;
          ] ;
  ]


Carine Rey's avatar
Carine Rey committed
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
let plot_merge_results ~res_by_tools ~tree ~faa ~tsv : svg workflow =
  let plot_all_sites = true in
  (*let env = docker_image ~account:"carinerey" ~name:"ete3" ~tag:"3.0.0b35" () in*)
  let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"06212018" () in
  (* use of pcoc env due to its working X server for dra plot with ete3 *)
  let meths = List.map res_by_tools ~f:(fun res ->
    let def_meth = res.det_meth in
    let opt = match def_meth with
      | Pcoc -> string "PCOC"
      | Pcoc_gamma -> string "PCOC_gamma"
      | Diffsel -> string "Diffsel"
    in
    opt
  ) |> seq ~sep:","
  in
  let package_diffsel_script_utils = tmp // "diffsel_script_utils.py" in
  let package_plot_data = tmp // "plot_data.py" in
  let script_plot_convergent_sites = tmp // "plot_convergent_sites.py" in
  let out = dest // "results.svg" in
  workflow ~descr:"convergence_detection.plot_results" [
    docker env (
      and_list [
        mkdir_p tmp ;
        mkdir_p dest ;
        cd tmp ;

        cmd "cp" [ file_dump (string Scripts.diffsel_script_utils) ; package_diffsel_script_utils ] ;
        cmd "cp" [ file_dump (string Scripts.plot_data) ; package_plot_data] ;
        cmd "cp" [ file_dump (string Scripts.plot_convergent_sites); script_plot_convergent_sites ] ;

        cmd "python" [
          string "plot_convergent_sites.py" ;
          opt "-msa" dep faa ;
          opt "-tsv" dep tsv ;
          opt "-tree" dep tree ;
          opt "-out" ident out ;
          opt "-meth" ident meths ;
          flag string "--all_sites" plot_all_sites ;
        ]
      ]
    )
  ]