multinomial.ml 3 KB
Newer Older
1
open Core
Philippe Veber's avatar
Philippe Veber committed
2 3
open Bistro
open Bistro.Shell_dsl
4 5
open File_formats

6
let multinomial ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkflow) : text_file pworkflow =
Philippe Veber's avatar
Philippe Veber committed
7 8 9
  let img = Env.env_py in
  Workflow.shell ~descr:("calc_multinomial") [
    cmd "python" ~img [
10 11 12
      file_dump (string Scripts.calc_multinomial) ;
      opt "-t" dep tree_sc;
      opt "-a" dep faa;
13
      opt "-o"  ident dest ;
14 15
    ]
  ]
16

Philippe Veber's avatar
Philippe Veber committed
17
let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkflow) (* : text_file pworkflow *) =
18 19
  let open Phylogenetics in
  let open Convdet in
Philippe Veber's avatar
Philippe Veber committed
20 21 22 23 24 25 26 27
  let module MT = Multinomial_test in
  let meth = [%param meth] in
  let test = match meth with
    | `Asymptotic_LRT -> MT.LRT.asymptotic_test
    | `Simulation_LRT -> MT.LRT.simulation_test ~sample_size:10_000
    | `Asymptotic_sparse -> MT.Sparse.asymptotic_test
    | `Simulation_sparse -> MT.Sparse.simulation_test ~sample_size:10_000
  in
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
  let fold_leaves (root : _ Tree.t) ~init ~f =
    let open Tree in
    let rec node acc branch_data n =
      match n.branches with
      | [] -> f acc branch_data n.node_data
      | xs ->
        List.fold xs ~init:acc ~f:branch

    and branch acc b = node acc b.branch_data b.tip in
    List.fold root.branches ~init ~f:branch
  in
  let alignment =
    Alignment.from_fasta [%path faa]
    |> Rresult.R.get_ok
  in
  let tree = Simulator.tree_from_file [%path tree_sc] in
  let leaves = fold_leaves tree ~init:[] ~f:(fun acc (_, cond) ni ->
      match ni.name with
      | None -> failwith "Leaves of the tree should be named"
      | Some n -> (
          match Alignment.find_sequence alignment n with
          | None -> failwithf "Could not find %s in alignment" n ()
          | Some seq -> (seq, cond) :: acc
        )
    )
  in
  let seqs0, seqs1 = List.partition_map leaves ~f:(function
      | (aa, 0) -> `Fst aa
      | (aa, 1) -> `Snd aa
      | _ -> assert false
    )
  in
  let counts seqs i =
    Amino_acid.vector (fun aa ->
        let aa = Amino_acid.to_char aa in
        List.count seqs ~f:(fun s -> Char.equal s.[i] aa)
      )
  in
  let site i =
    let c0 = (counts seqs0 i :> int array) in
    let c1 = (counts seqs1 i :> int array) in
    let d = MT.data ~x1:c0 ~x2:c1 in
Philippe Veber's avatar
Philippe Veber committed
70
    let r = test d in
71 72 73 74 75 76 77 78 79 80 81
    r._T_, r.pvalue
  in
  let n = Alignment.ncols alignment in
  let res = List.init n ~f:site in
  let res_lines =
    List.mapi res ~f:(fun i (lr, pval) ->
        sprintf "%d\t%f\t%f" i (1. -. pval) lr
      )
  in
  let header = "Sites\t1MinusLRT\tLikelihoodRatio" in
  Out_channel.write_lines [%dest] (header :: res_lines)
Philippe Veber's avatar
Philippe Veber committed
82 83 84 85 86

let multinomial_asymptotic_lrt ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Asymptotic_LRT ~tree_sc ~faa
let multinomial_simulation_lrt ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Simulation_LRT ~tree_sc ~faa
let multinomial_asymptotic_sparse ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Asymptotic_sparse ~tree_sc ~faa
let multinomial_simulation_sparse ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Simulation_sparse ~tree_sc ~faa