multinomial.ml 2.8 KB
Newer Older
1
open Core
Philippe Veber's avatar
Philippe Veber committed
2
open Bistro
3 4
open File_formats

5
let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ file) ~(faa:aminoacid_fasta file) (* : cpt file *) =
6
  let open Phylogenetics in
7
  let open Phylogenetics_convergence in
Philippe Veber's avatar
Philippe Veber committed
8 9 10 11 12 13 14 15
  let module MT = Multinomial_test in
  let meth = [%param meth] in
  let test = match meth with
    | `Asymptotic_LRT -> MT.LRT.asymptotic_test
    | `Simulation_LRT -> MT.LRT.simulation_test ~sample_size:10_000
    | `Asymptotic_sparse -> MT.Sparse.asymptotic_test
    | `Simulation_sparse -> MT.Sparse.simulation_test ~sample_size:10_000
  in
16 17
  let fold_leaves (root : _ Tree.t) ~init ~f =
    let open Tree in
18 19 20 21
    let rec node acc branch_data = function
      | Leaf d ->
        f acc branch_data d
      | Node n ->
Philippe Veber's avatar
Philippe Veber committed
22
        List1.fold n.branches ~init:acc ~f:branch
23

24 25 26
    and branch acc (Branch b) = node acc b.data b.tip in
    match root with
    | Leaf _ -> init
Philippe Veber's avatar
Philippe Veber committed
27
    | Node n -> List1.fold n.branches ~init ~f:branch
28 29 30 31 32
  in
  let alignment =
    Alignment.from_fasta [%path faa]
    |> Rresult.R.get_ok
  in
Philippe Veber's avatar
Philippe Veber committed
33
  let tree = Codepitk.Utils.tree_from_file [%path tree_sc] in
Philippe Veber's avatar
Philippe Veber committed
34 35
  let leaves = fold_leaves tree ~init:[] ~f:(fun acc bi ni ->
      let cond = Phylogenetics_convergence.Simulator.Branch_info.condition bi in
36 37 38 39 40 41 42 43 44
      match ni.name with
      | None -> failwith "Leaves of the tree should be named"
      | Some n -> (
          match Alignment.find_sequence alignment n with
          | None -> failwithf "Could not find %s in alignment" n ()
          | Some seq -> (seq, cond) :: acc
        )
    )
  in
Philippe Veber's avatar
Philippe Veber committed
45
  let seqs0, seqs1 = List.partition_map leaves ~f:Either.(function
Philippe Veber's avatar
Philippe Veber committed
46 47
      | (aa, `Ancestral) -> First aa
      | (aa, `Convergent) -> Second aa
48 49 50
    )
  in
  let counts seqs i =
51
    Amino_acid.Table.init (fun aa ->
52 53 54 55 56 57 58 59
        let aa = Amino_acid.to_char aa in
        List.count seqs ~f:(fun s -> Char.equal s.[i] aa)
      )
  in
  let site i =
    let c0 = (counts seqs0 i :> int array) in
    let c1 = (counts seqs1 i :> int array) in
    let d = MT.data ~x1:c0 ~x2:c1 in
Philippe Veber's avatar
Philippe Veber committed
60
    let r = test d in
61 62 63 64 65 66
    r._T_, r.pvalue
  in
  let n = Alignment.ncols alignment in
  let res = List.init n ~f:site in
  let res_lines =
    List.mapi res ~f:(fun i (lr, pval) ->
Philippe Veber's avatar
Philippe Veber committed
67
        sprintf "%d\t%f\t%f" (i + 1) (1. -. pval) lr
68 69
      )
  in
70
  let header = "Sites\tMultinomial_1mp\tMultinomial_LikelihoodRatio" in
71
  Out_channel.write_lines [%dest] (header :: res_lines)
Philippe Veber's avatar
Philippe Veber committed
72 73 74 75 76

let multinomial_asymptotic_lrt ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Asymptotic_LRT ~tree_sc ~faa
let multinomial_simulation_lrt ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Simulation_LRT ~tree_sc ~faa
let multinomial_asymptotic_sparse ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Asymptotic_sparse ~tree_sc ~faa
let multinomial_simulation_sparse ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Simulation_sparse ~tree_sc ~faa