multinomial.ml 3.13 KB
Newer Older
1
open Core
Philippe Veber's avatar
Philippe Veber committed
2 3
open Bistro
open Bistro.Shell_dsl
4 5
open File_formats

Philippe Veber's avatar
Philippe Veber committed
6
let multinomial ?(descr="") ~(tree_sc:_ file) ~(faa:aminoacid_fasta file) () : text file =
Philippe Veber's avatar
Philippe Veber committed
7
  let img = Env.env_py in
Philippe Veber's avatar
Philippe Veber committed
8 9
  Workflow.shell ~descr:("calc_multinomial."^descr) ~img [
    cmd "python" [
10 11 12
      file_dump (string Scripts.calc_multinomial) ;
      opt "-t" dep tree_sc;
      opt "-a" dep faa;
13
      opt "-o"  ident dest ;
14 15
    ]
  ]
16

Philippe Veber's avatar
Philippe Veber committed
17
let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ file) ~(faa:aminoacid_fasta file) (* : text file *) =
18
  let open Phylogenetics in
19
  let open Phylogenetics_convergence in
Philippe Veber's avatar
Philippe Veber committed
20 21 22 23 24 25 26 27
  let module MT = Multinomial_test in
  let meth = [%param meth] in
  let test = match meth with
    | `Asymptotic_LRT -> MT.LRT.asymptotic_test
    | `Simulation_LRT -> MT.LRT.simulation_test ~sample_size:10_000
    | `Asymptotic_sparse -> MT.Sparse.asymptotic_test
    | `Simulation_sparse -> MT.Sparse.simulation_test ~sample_size:10_000
  in
28 29
  let fold_leaves (root : _ Tree.t) ~init ~f =
    let open Tree in
30 31 32 33
    let rec node acc branch_data = function
      | Leaf d ->
        f acc branch_data d
      | Node n ->
Philippe Veber's avatar
Philippe Veber committed
34
        List1.fold n.branches ~init:acc ~f:branch
35

36 37 38
    and branch acc (Branch b) = node acc b.data b.tip in
    match root with
    | Leaf _ -> init
Philippe Veber's avatar
Philippe Veber committed
39
    | Node n -> List1.fold n.branches ~init ~f:branch
40 41 42 43 44
  in
  let alignment =
    Alignment.from_fasta [%path faa]
    |> Rresult.R.get_ok
  in
Philippe Veber's avatar
Philippe Veber committed
45
  let tree = Codepitk.Utils.tree_from_file [%path tree_sc] in
Philippe Veber's avatar
Philippe Veber committed
46 47
  let leaves = fold_leaves tree ~init:[] ~f:(fun acc bi ni ->
      let cond = Phylogenetics_convergence.Simulator.Branch_info.condition bi in
48 49 50 51 52 53 54 55 56
      match ni.name with
      | None -> failwith "Leaves of the tree should be named"
      | Some n -> (
          match Alignment.find_sequence alignment n with
          | None -> failwithf "Could not find %s in alignment" n ()
          | Some seq -> (seq, cond) :: acc
        )
    )
  in
Philippe Veber's avatar
Philippe Veber committed
57
  let seqs0, seqs1 = List.partition_map leaves ~f:Either.(function
Philippe Veber's avatar
Philippe Veber committed
58 59
      | (aa, `Ancestral) -> First aa
      | (aa, `Convergent) -> Second aa
60 61 62
    )
  in
  let counts seqs i =
63
    Amino_acid.Table.init (fun aa ->
64 65 66 67 68 69 70 71
        let aa = Amino_acid.to_char aa in
        List.count seqs ~f:(fun s -> Char.equal s.[i] aa)
      )
  in
  let site i =
    let c0 = (counts seqs0 i :> int array) in
    let c1 = (counts seqs1 i :> int array) in
    let d = MT.data ~x1:c0 ~x2:c1 in
Philippe Veber's avatar
Philippe Veber committed
72
    let r = test d in
73 74 75 76 77 78
    r._T_, r.pvalue
  in
  let n = Alignment.ncols alignment in
  let res = List.init n ~f:site in
  let res_lines =
    List.mapi res ~f:(fun i (lr, pval) ->
Philippe Veber's avatar
Philippe Veber committed
79
        sprintf "%d\t%f\t%f" (i + 1) (1. -. pval) lr
80 81 82 83
      )
  in
  let header = "Sites\t1MinusLRT\tLikelihoodRatio" in
  Out_channel.write_lines [%dest] (header :: res_lines)
Philippe Veber's avatar
Philippe Veber committed
84 85 86 87 88

let multinomial_asymptotic_lrt ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Asymptotic_LRT ~tree_sc ~faa
let multinomial_simulation_lrt ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Simulation_LRT ~tree_sc ~faa
let multinomial_asymptotic_sparse ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Asymptotic_sparse ~tree_sc ~faa
let multinomial_simulation_sparse ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Simulation_sparse ~tree_sc ~faa