tdg09.ml 3.61 KB
Newer Older
1
open Core
Philippe Veber's avatar
Philippe Veber committed
2 3
open Bistro
open Bistro.Shell_dsl
4 5
open File_formats

Philippe Veber's avatar
Philippe Veber committed
6
let img = Env.env_tdg09
7

Philippe Veber's avatar
Philippe Veber committed
8
let tdg09  ?(descr="") ~(faa:aminoacid_fasta file) ~(tree:_ file) () : [`tdg09] directory =
9 10 11
  let tdg09_out = dest // "tdg09.yaml" in
  let tmp_ali_phy = dest // "tmp_ali.phy" in
  let tmp_tree = dest // "tmp.nw" in
Carine Rey's avatar
Carine Rey committed
12 13
  let package = tmp // "diffsel_script_utils.py" in
  let script = tmp // "try_again.py" in
Philippe Veber's avatar
Philippe Veber committed
14 15 16 17 18
  Workflow.shell ~descr:("convergence_detection.run_tdg09."^descr) ~img [
    and_list [
      mkdir_p dest ;
      mkdir_p tmp ;
      cd tmp ;
Carine Rey's avatar
Carine Rey committed
19

Philippe Veber's avatar
Philippe Veber committed
20 21 22 23 24 25 26
      cmd "python" [
        file_dump (string Scripts.rename_input_tree_ali_tdg09) ;
        opt "-t" dep tree;
        opt "-a" dep faa;
        opt "-out_a " ident tmp_ali_phy;
        opt "-out_t " ident tmp_tree;
      ];
Carine Rey's avatar
Carine Rey committed
27

Philippe Veber's avatar
Philippe Veber committed
28 29
      cmd "cp"  [ file_dump (string Scripts.diffsel_script_utils) ; package] ;
      cmd "cp" [ file_dump (string Scripts.try_again) ; script] ;
Carine Rey's avatar
Carine Rey committed
30

Philippe Veber's avatar
Philippe Veber committed
31 32 33 34 35 36
      cmd "java" ~stdout:tdg09_out [
        string "-cp /tdg09/tdg09.jar tdg09.Analyse";
        opt "-tree" ident tmp_tree;
        opt "-alignment"  ident tmp_ali_phy ;
        opt "-threads"  int 1 ;
        opt "-groups"  string "XE ME" ;
37
      ]
Philippe Veber's avatar
Philippe Veber committed
38
    ]
39 40
  ]

41
let results run_tdg09 : cpt file =
Philippe Veber's avatar
Philippe Veber committed
42
  let tdg09_out = Workflow.select run_tdg09 [ "tdg09.yaml" ] in
Philippe Veber's avatar
Philippe Veber committed
43 44
  Workflow.shell ~descr:"convergence_detection.parse_tdg09" ~img [
    cmd "python" [
45 46 47 48
      file_dump (string Scripts.parse_results_tdg09) ;
      opt "-tdg09" dep tdg09_out;
      opt "-o" ident dest;
    ]
49
  ]
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120

module Inhouse_implementation = struct
  open Phylogenetics
  open Codepitk

  module Leaf_info = struct
    type t = int * string * Convergence_tree.condition
    type species = int
    let condition (_, _, c) = c
    let species (s, _, _) = s
  end

  module Site = struct
    type t = Alignment.t * int
    type species = int
    let get_aa (al, j) sp =
      match Amino_acid.of_char al.Alignment.sequences.(sp).[j] with
      | Some aa -> aa
      | None -> failwith "Unexpected character in alignment"
  end

  let reindex_tree alignment (tree : Convergence_tree.t) =
    let leaf_index =
      alignment.Alignment.descriptions
      |> Array.mapi ~f:(fun i s -> s, i)
      |> Array.to_list
      |> String.Map.of_alist_exn
    in
    let node s _ = s in
    let branch _ b = b.Convergence_tree.condition in
    let leaf c s =
      let i = String.Map.find_exn leaf_index s in
      (i, s, c)
    in
    Tree.propagate tree ~init:`Ancestral ~node ~branch ~leaf

  include Tdg09.Make(Convergence_tree.Branch_info)(Leaf_info)(Site)

  let run_on_alignment wag tree alignment =
    let n = Alignment.ncols alignment in
    let foreach_site j =
      let site = alignment, j in
      let _, _, lrt = Model3.lrt ~mode:`sparse wag tree site in
      lrt
    in
    Array.init n ~f:foreach_site
end

let wag : text file =
  Bistro_unix.wget "https://www.ebi.ac.uk/goldman-srv/WAG/wag.dat"

let inhouse_tdg09 (tree : nhx file) (fa : #fasta file) =
  let f = fun%workflow dest ->
    let open Phylogenetics in
    let open Codepitk in
    let alignment =
      Alignment.from_fasta [%path fa]
      |> Rresult.R.get_ok
    in
    let tree =
      Convergence_tree.from_file [%path tree]
      |> Rresult.R.failwith_error_msg
    in
    let wag = Wag.parse [%path wag] in
    let reindexed_tree = Inhouse_implementation.reindex_tree alignment tree in
    let res = Inhouse_implementation.run_on_alignment wag reindexed_tree alignment in
    let one_minus_pvals = Array.map res ~f:(fun lrt -> Some (1. -. lrt.pvalue)) in
    let cpt = Cpt.make ["inhouse_tdg09_1mpval", one_minus_pvals] in
    Result.iter cpt ~f:(Cpt.to_file ~output:dest)
  in
  Workflow.path_plugin ~descr:"detection_pipeline.inhouse_tdg09" f