convergence_detection.ml 3.56 KB
Newer Older
Carine Rey's avatar
Carine Rey committed
1
open Core
Carine Rey's avatar
Carine Rey committed
2 3 4 5
open Bistro.EDSL
open Bistro.Std
open File_formats
open Bistro_bioinfo.Std
Carine Rey's avatar
Carine Rey committed
6
open Defs
Carine Rey's avatar
Carine Rey committed
7 8 9 10 11 12 13 14

type pcoc_out
type diffsel_out

type det_out =
  | Pcoc_out
  | Diffsel_out

Carine Rey's avatar
Carine Rey committed
15 16 17 18 19 20 21 22 23 24 25 26 27
type det_result = {
  dataset : dataset ;
  det_meth : det_meth ;
  det_result : det_out directory workflow ;
  }

type dataset_res = {
  model_prefix : string ;
  tree_prefix : string ;
  res_by_tools: det_result list ;
  merged_results : text_file workflow
  }

Carine Rey's avatar
Carine Rey committed
28
let pcoc ?plot_complete ?gamma  ~(faa:aminoacid_fasta workflow) ~(tree:_ workflow) :  (*`pcoc TODO*) det_out directory workflow =
Carine Rey's avatar
Carine Rey committed
29
  let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"06212018" () in
Carine Rey's avatar
Carine Rey committed
30
  let run_pcoc = workflow ~descr:"convergence_detection.pcoc" [
Carine Rey's avatar
Carine Rey committed
31 32 33 34 35 36
    cmd "pcoc_det.py" ~env [
        opt "-t" dep tree;
        opt "-m" string "-";
        opt "-aa"  dep faa ;
        opt "-o"  ident dest ;
        option ( flag string "--gamma" ) gamma;
37
        option ( flag string "--plot --plot_complete_ali" ) plot_complete;
Carine Rey's avatar
Carine Rey committed
38
    ]
Carine Rey's avatar
Carine Rey committed
39 40 41 42 43
  ] (* TODO / selector ["RUN*/*.results.tsv"] *) in
  let out_pcoc = (dep run_pcoc) // "RUN*/*.results.tsv" in
  workflow ~descr:"convergence_detection.selector_pcoc" [
    cmd "cp" [out_pcoc; ident dest] ;
  ]
Carine Rey's avatar
Carine Rey committed
44 45


Carine Rey's avatar
Carine Rey committed
46
let diffsel ~(phy_n:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:int) ~(n_cycles: int) :  (*`diffsel TODO*) det_out directory workflow =
Carine Rey's avatar
Carine Rey committed
47
  let env = docker_image ~account:"vlanore" ~name:"diffsel" ~tag:"v1.0" () in
Carine Rey's avatar
Carine Rey committed
48
  let chainname = dest // "myrun" in
Carine Rey's avatar
Carine Rey committed
49
  (*_build/diffsel -t data/samhd1.tree -d data/samhd1.ali -ncond 3 -x 1 10000 myrun*)
Carine Rey's avatar
Carine Rey committed
50
  let run_diffsel = workflow ~descr:"convergence_detection.run_diffsel" [
Carine Rey's avatar
Carine Rey committed
51 52 53 54 55 56
    mkdir_p dest;
    cmd "_build/diffsel" ~env [
        opt "-t" dep tree;
        opt "-d"  dep phy_n ;
        opt "-ncond"  int 2 ;
        opt "-x" seq [ int w_every; string " "; int n_cycles];
Carine Rey's avatar
Carine Rey committed
57
        ident chainname ;
Carine Rey's avatar
Carine Rey committed
58
    ];
Carine Rey's avatar
Carine Rey committed
59 60 61
  ] in
  let package = tmp // "diffsel_script_utils.py" in
  let script = tmp // "diffsel_analyze_result.py" in
Carine Rey's avatar
Carine Rey committed
62
  let chainname = (dep run_diffsel) // "myrun" in
Carine Rey's avatar
Carine Rey committed
63
  let out = dest in
Carine Rey's avatar
Carine Rey committed
64 65 66
  workflow ~descr:"convergence_detection.parse_diffsel" [
    docker env (
      and_list [
Carine Rey's avatar
Carine Rey committed
67 68 69 70 71 72
        mkdir_p tmp ;
        cd tmp ;
        cmd "ls" [dep run_diffsel];
        cmd "ls" [dep phy_n]; (* required dep to link the file in the env *)
        cmd "ls" [dep tree]; (* required dep to link the file in the env *)

Carine Rey's avatar
Carine Rey committed
73
        (*python diffsel_analyze_result.py [-r /path/to/readdiffsel] [-o output_file] chainname *)
Carine Rey's avatar
Carine Rey committed
74
        cmd "cat" ~stdout:package [ file_dump (string Scripts.diffsel_script_utils) ] ;
Carine Rey's avatar
Carine Rey committed
75
        cmd "cat" ~stdout:script [ file_dump (string Scripts.diffsel_analyze_result) ] ;
Carine Rey's avatar
Carine Rey committed
76

Carine Rey's avatar
Carine Rey committed
77 78
        cmd "python" [
          string "diffsel_analyze_result.py" ;
Carine Rey's avatar
Carine Rey committed
79 80 81
          opt "-r" string "/diffsel/_build/readdiffsel" ;
          opt "-o" ident out ;
          ident chainname ;
Carine Rey's avatar
Carine Rey committed
82 83 84
        ]
      ]
    )
Carine Rey's avatar
Carine Rey committed
85
  ]
Carine Rey's avatar
Carine Rey committed
86 87 88


let merge_results ~res_by_tools : text_file workflow =
Carine Rey's avatar
Carine Rey committed
89
  let env = docker_image ~account:"carinerey" ~name:"ete3" ~tag:"3.0.0b35" () in
Carine Rey's avatar
Carine Rey committed
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
  let command = List.map res_by_tools ~f:(fun res ->
    let def_meth = res.det_meth in
    let w = match def_meth with
      | Pcoc -> res.det_result
      | Pcoc_gamma -> res.det_result
      | Diffsel -> res.det_result
    in
    let opt = match def_meth with
      | Pcoc -> string "--pcoc"
      | Pcoc_gamma -> string "--pcoc_gamma"
      | Diffsel -> string "--diffsel"
    in
    seq ~sep:" " [opt; dep w]
  )
  in
  workflow ~descr:"convergence_detection.merge_results" [
Carine Rey's avatar
Carine Rey committed
106
    cmd "python" ~env [
Carine Rey's avatar
Carine Rey committed
107 108 109 110 111 112 113
            file_dump (string Scripts.merge_det_results) ;
            opt "-o" ident dest ;
            seq ~sep:" " command ;
          ] ;
  ]