convergence_detection.ml 3.47 KB
Newer Older
Carine Rey's avatar
Carine Rey committed
1
open Core
Carine Rey's avatar
Carine Rey committed
2 3 4 5
open Bistro.EDSL
open Bistro.Std
open File_formats
open Bistro_bioinfo.Std
Carine Rey's avatar
Carine Rey committed
6
open Defs
Carine Rey's avatar
Carine Rey committed
7 8 9 10 11 12 13 14

type pcoc_out
type diffsel_out

type det_out =
  | Pcoc_out
  | Diffsel_out

Carine Rey's avatar
Carine Rey committed
15 16 17 18 19 20 21 22 23 24 25 26 27
type det_result = {
  dataset : dataset ;
  det_meth : det_meth ;
  det_result : det_out directory workflow ;
  }

type dataset_res = {
  model_prefix : string ;
  tree_prefix : string ;
  res_by_tools: det_result list ;
  merged_results : text_file workflow
  }

Carine Rey's avatar
Carine Rey committed
28
let pcoc ?plot_complete ?gamma  ~(faa:aminoacid_fasta workflow) ~(tree:_ workflow) :  (*`pcoc TODO*) det_out directory workflow =
Carine Rey's avatar
Carine Rey committed
29
  let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"06212018" () in
Carine Rey's avatar
Carine Rey committed
30
  let run_pcoc = workflow ~descr:"convergence_detection.pcoc" [
Carine Rey's avatar
Carine Rey committed
31 32 33 34 35 36
    cmd "pcoc_det.py" ~env [
        opt "-t" dep tree;
        opt "-m" string "-";
        opt "-aa"  dep faa ;
        opt "-o"  ident dest ;
        option ( flag string "--gamma" ) gamma;
37
        option ( flag string "--plot --plot_complete_ali" ) plot_complete;
Carine Rey's avatar
Carine Rey committed
38
    ]
Carine Rey's avatar
Carine Rey committed
39 40 41 42 43
  ] (* TODO / selector ["RUN*/*.results.tsv"] *) in
  let out_pcoc = (dep run_pcoc) // "RUN*/*.results.tsv" in
  workflow ~descr:"convergence_detection.selector_pcoc" [
    cmd "cp" [out_pcoc; ident dest] ;
  ]
Carine Rey's avatar
Carine Rey committed
44 45


Carine Rey's avatar
Carine Rey committed
46
let diffsel ~(phy_n:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:int) ~(n_cycles: int) :  (*`diffsel TODO*) det_out directory workflow =
Carine Rey's avatar
Carine Rey committed
47
  let env = docker_image ~account:"vlanore" ~name:"diffsel" ~tag:"v1.0" () in
Carine Rey's avatar
Carine Rey committed
48
  let chainname = dest // "myrun" in
Carine Rey's avatar
Carine Rey committed
49
  (*_build/diffsel -t data/samhd1.tree -d data/samhd1.ali -ncond 3 -x 1 10000 myrun*)
Carine Rey's avatar
Carine Rey committed
50
  let run_diffsel = workflow ~descr:"convergence_detection.run_diffsel" [
Carine Rey's avatar
Carine Rey committed
51 52 53 54 55 56
    mkdir_p dest;
    cmd "_build/diffsel" ~env [
        opt "-t" dep tree;
        opt "-d"  dep phy_n ;
        opt "-ncond"  int 2 ;
        opt "-x" seq [ int w_every; string " "; int n_cycles];
Carine Rey's avatar
Carine Rey committed
57
        ident chainname ;
Carine Rey's avatar
Carine Rey committed
58
    ];
Carine Rey's avatar
Carine Rey committed
59 60 61
  ] in
  let package = tmp // "diffsel_script_utils.py" in
  let script = tmp // "diffsel_analyze_result.py" in
Carine Rey's avatar
Carine Rey committed
62
  let chainname = (dep run_diffsel) // "myrun" in
Carine Rey's avatar
Carine Rey committed
63
  let out = dest in
Carine Rey's avatar
Carine Rey committed
64 65 66
  workflow ~descr:"convergence_detection.parse_diffsel" [
    docker env (
      and_list [
Carine Rey's avatar
Carine Rey committed
67 68 69 70 71 72
        mkdir_p tmp ;
        cd tmp ;
        cmd "ls" [dep run_diffsel];
        cmd "ls" [dep phy_n]; (* required dep to link the file in the env *)
        cmd "ls" [dep tree]; (* required dep to link the file in the env *)

Carine Rey's avatar
Carine Rey committed
73
        (*python diffsel_analyze_result.py [-r /path/to/readdiffsel] [-o output_file] chainname *)
Carine Rey's avatar
Carine Rey committed
74
        cmd "cat" ~stdout:package [ file_dump (string Scripts.diffsel_script_utils) ] ;
Carine Rey's avatar
Carine Rey committed
75
        cmd "cat" ~stdout:script [ file_dump (string Scripts.diffsel_analyze_result) ] ;
Carine Rey's avatar
Carine Rey committed
76

Carine Rey's avatar
Carine Rey committed
77 78
        cmd "python" [
          string "diffsel_analyze_result.py" ;
Carine Rey's avatar
Carine Rey committed
79 80 81
          opt "-r" string "/diffsel/_build/readdiffsel" ;
          opt "-o" ident out ;
          ident chainname ;
Carine Rey's avatar
Carine Rey committed
82 83 84
        ]
      ]
    )
Carine Rey's avatar
Carine Rey committed
85
  ]
Carine Rey's avatar
Carine Rey committed
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112


let merge_results ~res_by_tools : text_file workflow =
  let command = List.map res_by_tools ~f:(fun res ->
    let def_meth = res.det_meth in
    let w = match def_meth with
      | Pcoc -> res.det_result
      | Pcoc_gamma -> res.det_result
      | Diffsel -> res.det_result
    in
    let opt = match def_meth with
      | Pcoc -> string "--pcoc"
      | Pcoc_gamma -> string "--pcoc_gamma"
      | Diffsel -> string "--diffsel"
    in
    seq ~sep:" " [opt; dep w]
  )
  in
  workflow ~descr:"convergence_detection.merge_results" [
    cmd "python"  [
            file_dump (string Scripts.merge_det_results) ;
            opt "-o" ident dest ;
            seq ~sep:" " command ;
          ] ;
  ]