Commit 792657df authored by Philippe Veber's avatar Philippe Veber
Browse files

Added multinomial asymptotic LRT

parent 07852306
(library
(name reviewphiltrans)
(libraries biocaml.ez bistro.bioinfo bistro.utils convdet ocaml-r.graphics ocaml-r.grDevices)
(libraries biocaml.ez bistro.bioinfo bistro.utils convdet gzt ocaml-r.graphics ocaml-r.grDevices)
(preprocess
(pps ppx_jane ppx_csv_conv bistro.ppx ppx_here)))
......
......@@ -13,3 +13,63 @@ let multinomial ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkflow) : text_f
opt "-o" ident dest ;
]
]
let%pworkflow multinomial_asymptotic_lrt ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkflow) (* : text_file pworkflow *) =
let open Phylogenetics in
let open Convdet in
let fold_leaves (root : _ Tree.t) ~init ~f =
let open Tree in
let rec node acc branch_data n =
match n.branches with
| [] -> f acc branch_data n.node_data
| xs ->
List.fold xs ~init:acc ~f:branch
and branch acc b = node acc b.branch_data b.tip in
List.fold root.branches ~init ~f:branch
in
let alignment =
Alignment.from_fasta [%path faa]
|> Rresult.R.get_ok
in
let tree = Simulator.tree_from_file [%path tree_sc] in
let leaves = fold_leaves tree ~init:[] ~f:(fun acc (_, cond) ni ->
match ni.name with
| None -> failwith "Leaves of the tree should be named"
| Some n -> (
match Alignment.find_sequence alignment n with
| None -> failwithf "Could not find %s in alignment" n ()
| Some seq -> (seq, cond) :: acc
)
)
in
let seqs0, seqs1 = List.partition_map leaves ~f:(function
| (aa, 0) -> `Fst aa
| (aa, 1) -> `Snd aa
| _ -> assert false
)
in
let counts seqs i =
Amino_acid.vector (fun aa ->
let aa = Amino_acid.to_char aa in
List.count seqs ~f:(fun s -> Char.equal s.[i] aa)
)
in
let site i =
let module MT = Multinomial_test in
let c0 = (counts seqs0 i :> int array) in
let c1 = (counts seqs1 i :> int array) in
let d = MT.data ~x1:c0 ~x2:c1 in
let r = MT.LRT.asymptotic_test d in
r._T_, r.pvalue
in
let n = Alignment.ncols alignment in
let res = List.init n ~f:site in
let res_lines =
List.mapi res ~f:(fun i (lr, pval) ->
sprintf "%d\t%f\t%f" i (1. -. pval) lr
)
in
let header = "Sites\t1MinusLRT\tLikelihoodRatio" in
Out_channel.write_lines [%dest] (header :: res_lines)
......@@ -149,6 +149,11 @@ let multinomial d =
~tree_sc:(tree d)
~faa:(amino_acid_alignment d)
let multinomial_asymptotic_lrt d =
Multinomial.multinomial_asymptotic_lrt
~tree_sc:(tree d)
~faa:(amino_acid_alignment d)
let diffseldsparse ?pi ?shiftprob ?eps d =
Diffseldsparse.diffseldsparse
?pi ?shiftprob ?eps
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment