multinomial.ml 3.14 KB
Newer Older
1
open Core
Philippe Veber's avatar
Philippe Veber committed
2 3
open Bistro
open Bistro.Shell_dsl
4 5
open File_formats

6
let multinomial ?(descr="") ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkflow) : text_file pworkflow =
Philippe Veber's avatar
Philippe Veber committed
7
  let img = Env.env_py in
Carine Rey's avatar
Carine Rey committed
8 9
  Workflow.shell ~descr:("calc_multinomial."^descr) [
    mkdir_p dest;
Philippe Veber's avatar
Philippe Veber committed
10
    cmd "python" ~img [
11 12 13
      file_dump (string Scripts.calc_multinomial) ;
      opt "-t" dep tree_sc;
      opt "-a" dep faa;
14
      opt "-o"  ident dest ;
15 16
    ]
  ]
17

Philippe Veber's avatar
Philippe Veber committed
18
let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkflow) (* : text_file pworkflow *) =
19
  let open Phylogenetics in
20
  let open Phylogenetics_convergence in
Philippe Veber's avatar
Philippe Veber committed
21 22 23 24 25 26 27 28
  let module MT = Multinomial_test in
  let meth = [%param meth] in
  let test = match meth with
    | `Asymptotic_LRT -> MT.LRT.asymptotic_test
    | `Simulation_LRT -> MT.LRT.simulation_test ~sample_size:10_000
    | `Asymptotic_sparse -> MT.Sparse.asymptotic_test
    | `Simulation_sparse -> MT.Sparse.simulation_test ~sample_size:10_000
  in
29 30
  let fold_leaves (root : _ Tree.t) ~init ~f =
    let open Tree in
31 32 33 34 35
    let rec node acc branch_data = function
      | Leaf d ->
        f acc branch_data d
      | Node n ->
        Non_empty_list.fold n.branches ~init:acc ~f:branch
36

37 38 39 40
    and branch acc (Branch b) = node acc b.data b.tip in
    match root with
    | Leaf _ -> init
    | Node n -> Non_empty_list.fold n.branches ~init ~f:branch
41 42 43 44 45
  in
  let alignment =
    Alignment.from_fasta [%path faa]
    |> Rresult.R.get_ok
  in
46
  let tree = Reviewphiltrans_toolbox.Utils.tree_from_file [%path tree_sc] in
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
  let leaves = fold_leaves tree ~init:[] ~f:(fun acc (_, cond) ni ->
      match ni.name with
      | None -> failwith "Leaves of the tree should be named"
      | Some n -> (
          match Alignment.find_sequence alignment n with
          | None -> failwithf "Could not find %s in alignment" n ()
          | Some seq -> (seq, cond) :: acc
        )
    )
  in
  let seqs0, seqs1 = List.partition_map leaves ~f:(function
      | (aa, 0) -> `Fst aa
      | (aa, 1) -> `Snd aa
      | _ -> assert false
    )
  in
  let counts seqs i =
64
    Amino_acid.Table.init (fun aa ->
65 66 67 68 69 70 71 72
        let aa = Amino_acid.to_char aa in
        List.count seqs ~f:(fun s -> Char.equal s.[i] aa)
      )
  in
  let site i =
    let c0 = (counts seqs0 i :> int array) in
    let c1 = (counts seqs1 i :> int array) in
    let d = MT.data ~x1:c0 ~x2:c1 in
Philippe Veber's avatar
Philippe Veber committed
73
    let r = test d in
74 75 76 77 78 79
    r._T_, r.pvalue
  in
  let n = Alignment.ncols alignment in
  let res = List.init n ~f:site in
  let res_lines =
    List.mapi res ~f:(fun i (lr, pval) ->
Philippe Veber's avatar
Philippe Veber committed
80
        sprintf "%d\t%f\t%f" (i + 1) (1. -. pval) lr
81 82 83 84
      )
  in
  let header = "Sites\t1MinusLRT\tLikelihoodRatio" in
  Out_channel.write_lines [%dest] (header :: res_lines)
Philippe Veber's avatar
Philippe Veber committed
85 86 87 88 89

let multinomial_asymptotic_lrt ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Asymptotic_LRT ~tree_sc ~faa
let multinomial_simulation_lrt ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Simulation_LRT ~tree_sc ~faa
let multinomial_asymptotic_sparse ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Asymptotic_sparse ~tree_sc ~faa
let multinomial_simulation_sparse ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Simulation_sparse ~tree_sc ~faa