Commit f9f89f6e authored by Philippe Veber's avatar Philippe Veber
Browse files

multinomial: refactoring

parent eb3f23a1
......@@ -4,8 +4,8 @@ open File_formats
let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ file) ~(faa:aminoacid_fasta file) (* : cpt file *) =
let open Phylogenetics in
let open Phylogenetics_convergence in
let module MT = Multinomial_test in
let module MT = Phylogenetics_convergence.Multinomial_test in
let open Codepitk in
let meth = [%param meth] in
let test = match meth with
| `Asymptotic_LRT -> MT.LRT.asymptotic_test
......@@ -13,55 +13,20 @@ let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ file) ~(faa:ami
| `Asymptotic_sparse -> MT.Sparse.asymptotic_test
| `Simulation_sparse -> MT.Sparse.simulation_test ~sample_size:10_000
in
let fold_leaves (root : _ Tree.t) ~init ~f =
let open Tree in
let rec node acc branch_data = function
| Leaf d ->
f acc branch_data d
| Node n ->
List1.fold n.branches ~init:acc ~f:branch
and branch acc (Branch b) = node acc b.data b.tip in
match root with
| Leaf _ -> init
| Node n -> List1.fold n.branches ~init ~f:branch
in
let alignment =
Alignment.from_fasta [%path faa]
|> Rresult.R.get_ok
in
let tree = Codepitk.Utils.tree_from_file [%path tree_sc] in
let leaves = fold_leaves tree ~init:[] ~f:(fun acc bi ni ->
let cond = Phylogenetics_convergence.Simulator.Branch_info.condition bi in
match ni.name with
| None -> failwith "Leaves of the tree should be named"
| Some n -> (
match Alignment.find_sequence alignment n with
| None -> failwithf "Could not find %s in alignment" n ()
| Some seq -> (seq, cond) :: acc
)
)
in
let seqs0, seqs1 = List.partition_map leaves ~f:Either.(function
| (aa, `Ancestral) -> First aa
| (aa, `Convergent) -> Second aa
)
in
let counts seqs i =
Amino_acid.Table.init (fun aa ->
let aa = Amino_acid.to_char aa in
List.count seqs ~f:(fun s -> Char.equal s.[i] aa)
)
let tree =
Convergence_tree.from_file [%path tree_sc]
|> Rresult.R.failwith_error_msg
in
let site i =
let c0 = (counts seqs0 i :> int array) in
let c1 = (counts seqs1 i :> int array) in
let site c0 c1 =
let d = MT.data ~x1:c0 ~x2:c1 in
let r = test d in
r._T_, r.pvalue
in
let n = Alignment.ncols alignment in
let res = List.init n ~f:site in
let res = Multinomial.site_loop tree alignment site in
let res_lines =
List.mapi res ~f:(fun i (lr, pval) ->
sprintf "%d\t%f\t%f" (i + 1) (1. -. pval) lr
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment