multinomial.ml 2.24 KB
Newer Older
1
open Core
Philippe Veber's avatar
Philippe Veber committed
2 3
open Bistro
open Bistro.Shell_dsl
4 5
open File_formats

6
let multinomial ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkflow) : text_file pworkflow =
Philippe Veber's avatar
Philippe Veber committed
7 8 9
  let img = Env.env_py in
  Workflow.shell ~descr:("calc_multinomial") [
    cmd "python" ~img [
10 11 12
      file_dump (string Scripts.calc_multinomial) ;
      opt "-t" dep tree_sc;
      opt "-a" dep faa;
13
      opt "-o"  ident dest ;
14 15
    ]
  ]
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75

let%pworkflow multinomial_asymptotic_lrt ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkflow) (* : text_file pworkflow *) =
  let open Phylogenetics in
  let open Convdet in
  let fold_leaves (root : _ Tree.t) ~init ~f =
    let open Tree in
    let rec node acc branch_data n =
      match n.branches with
      | [] -> f acc branch_data n.node_data
      | xs ->
        List.fold xs ~init:acc ~f:branch

    and branch acc b = node acc b.branch_data b.tip in
    List.fold root.branches ~init ~f:branch
  in
  let alignment =
    Alignment.from_fasta [%path faa]
    |> Rresult.R.get_ok
  in
  let tree = Simulator.tree_from_file [%path tree_sc] in
  let leaves = fold_leaves tree ~init:[] ~f:(fun acc (_, cond) ni ->
      match ni.name with
      | None -> failwith "Leaves of the tree should be named"
      | Some n -> (
          match Alignment.find_sequence alignment n with
          | None -> failwithf "Could not find %s in alignment" n ()
          | Some seq -> (seq, cond) :: acc
        )
    )
  in
  let seqs0, seqs1 = List.partition_map leaves ~f:(function
      | (aa, 0) -> `Fst aa
      | (aa, 1) -> `Snd aa
      | _ -> assert false
    )
  in
  let counts seqs i =
    Amino_acid.vector (fun aa ->
        let aa = Amino_acid.to_char aa in
        List.count seqs ~f:(fun s -> Char.equal s.[i] aa)
      )
  in
  let site i =
    let module MT = Multinomial_test in

    let c0 = (counts seqs0 i :> int array) in
    let c1 = (counts seqs1 i :> int array) in
    let d = MT.data ~x1:c0 ~x2:c1 in
    let r = MT.LRT.asymptotic_test d in
    r._T_, r.pvalue
  in
  let n = Alignment.ncols alignment in
  let res = List.init n ~f:site in
  let res_lines =
    List.mapi res ~f:(fun i (lr, pval) ->
        sprintf "%d\t%f\t%f" i (1. -. pval) lr
      )
  in
  let header = "Sites\t1MinusLRT\tLikelihoodRatio" in
  Out_channel.write_lines [%dest] (header :: res_lines)