Commit 9d0a474a authored by Philippe Veber's avatar Philippe Veber
Browse files

added in house implementation of TDG09

parent a8a7d82a
...@@ -43,6 +43,8 @@ module type S = sig ...@@ -43,6 +43,8 @@ module type S = sig
val failsafe_tdg09 : query -> cpt file val failsafe_tdg09 : query -> cpt file
val inhouse_tdg09 : query -> cpt file
val pcoc : ?gamma:bool -> ?ncat:int -> query -> cpt file val pcoc : ?gamma:bool -> ?ncat:int -> query -> cpt file
val pcoc_v2 : val pcoc_v2 :
...@@ -162,6 +164,11 @@ module Make (Q : Query) = struct ...@@ -162,6 +164,11 @@ module Make (Q : Query) = struct
let failsafe_tdg09 d = Workflow.trywith (tdg09 d) (mock_tdg09 d) let failsafe_tdg09 d = Workflow.trywith (tdg09 d) (mock_tdg09 d)
let inhouse_tdg09 d =
Tamuri.inhouse_tdg09
(tree ~branch_length_unit:`Amino_acid d)
(amino_acid_alignment d)
let gemma q ~lmm_test ~relatedness_mode = let gemma q ~lmm_test ~relatedness_mode =
let alignment = amino_acid_alignment q in let alignment = amino_acid_alignment q in
let tree = tree ~branch_length_unit:`Amino_acid q in let tree = tree ~branch_length_unit:`Amino_acid q in
......
...@@ -43,6 +43,8 @@ module type S = sig ...@@ -43,6 +43,8 @@ module type S = sig
val failsafe_tdg09 : query -> cpt file val failsafe_tdg09 : query -> cpt file
val inhouse_tdg09 : query -> cpt file
val pcoc : ?gamma:bool -> ?ncat:int -> query -> cpt file val pcoc : ?gamma:bool -> ?ncat:int -> query -> cpt file
val pcoc_v2 : val pcoc_v2 :
......
...@@ -47,3 +47,74 @@ let results run_tdg09 : cpt file = ...@@ -47,3 +47,74 @@ let results run_tdg09 : cpt file =
opt "-o" ident dest; opt "-o" ident dest;
] ]
] ]
module Inhouse_implementation = struct
open Phylogenetics
open Codepitk
module Leaf_info = struct
type t = int * string * Convergence_tree.condition
type species = int
let condition (_, _, c) = c
let species (s, _, _) = s
end
module Site = struct
type t = Alignment.t * int
type species = int
let get_aa (al, j) sp =
match Amino_acid.of_char al.Alignment.sequences.(sp).[j] with
| Some aa -> aa
| None -> failwith "Unexpected character in alignment"
end
let reindex_tree alignment (tree : Convergence_tree.t) =
let leaf_index =
alignment.Alignment.descriptions
|> Array.mapi ~f:(fun i s -> s, i)
|> Array.to_list
|> String.Map.of_alist_exn
in
let node s _ = s in
let branch _ b = b.Convergence_tree.condition in
let leaf c s =
let i = String.Map.find_exn leaf_index s in
(i, s, c)
in
Tree.propagate tree ~init:`Ancestral ~node ~branch ~leaf
include Tdg09.Make(Convergence_tree.Branch_info)(Leaf_info)(Site)
let run_on_alignment wag tree alignment =
let n = Alignment.ncols alignment in
let foreach_site j =
let site = alignment, j in
let _, _, lrt = Model3.lrt ~mode:`sparse wag tree site in
lrt
in
Array.init n ~f:foreach_site
end
let wag : text file =
Bistro_unix.wget "https://www.ebi.ac.uk/goldman-srv/WAG/wag.dat"
let inhouse_tdg09 (tree : nhx file) (fa : #fasta file) =
let f = fun%workflow dest ->
let open Phylogenetics in
let open Codepitk in
let alignment =
Alignment.from_fasta [%path fa]
|> Rresult.R.get_ok
in
let tree =
Convergence_tree.from_file [%path tree]
|> Rresult.R.failwith_error_msg
in
let wag = Wag.parse [%path wag] in
let reindexed_tree = Inhouse_implementation.reindex_tree alignment tree in
let res = Inhouse_implementation.run_on_alignment wag reindexed_tree alignment in
let one_minus_pvals = Array.map res ~f:(fun lrt -> Some (1. -. lrt.pvalue)) in
let cpt = Cpt.make ["inhouse_tdg09_1mpval", one_minus_pvals] in
Result.iter cpt ~f:(Cpt.to_file ~output:dest)
in
Workflow.path_plugin ~descr:"detection_pipeline.inhouse_tdg09" f
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment