open Core open Bistro open Bistro.Shell_dsl open File_formats let multinomial ?(descr="") ~(tree_sc:_ file) ~(faa:aminoacid_fasta file) : text file = let img = Env.env_py in Workflow.shell ~descr:("calc_multinomial."^descr) [ mkdir_p dest; cmd "python" ~img [ file_dump (string Scripts.calc_multinomial) ; opt "-t" dep tree_sc; opt "-a" dep faa; opt "-o" ident dest ; ] ] let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ file) ~(faa:aminoacid_fasta file) (* : text file *) = let open Phylogenetics in let open Phylogenetics_convergence in let module MT = Multinomial_test in let meth = [%param meth] in let test = match meth with | `Asymptotic_LRT -> MT.LRT.asymptotic_test | `Simulation_LRT -> MT.LRT.simulation_test ~sample_size:10_000 | `Asymptotic_sparse -> MT.Sparse.asymptotic_test | `Simulation_sparse -> MT.Sparse.simulation_test ~sample_size:10_000 in let fold_leaves (root : _ Tree.t) ~init ~f = let open Tree in let rec node acc branch_data = function | Leaf d -> f acc branch_data d | Node n -> Non_empty_list.fold n.branches ~init:acc ~f:branch and branch acc (Branch b) = node acc b.data b.tip in match root with | Leaf _ -> init | Node n -> Non_empty_list.fold n.branches ~init ~f:branch in let alignment = Alignment.from_fasta [%path faa] |> Rresult.R.get_ok in let tree = Reviewphiltrans_toolbox.Utils.tree_from_file [%path tree_sc] in let leaves = fold_leaves tree ~init:[] ~f:(fun acc (_, cond) ni -> match ni.name with | None -> failwith "Leaves of the tree should be named" | Some n -> ( match Alignment.find_sequence alignment n with | None -> failwithf "Could not find %s in alignment" n () | Some seq -> (seq, cond) :: acc ) ) in let seqs0, seqs1 = List.partition_map leaves ~f:(function | (aa, 0) -> `Fst aa | (aa, 1) -> `Snd aa | _ -> assert false ) in let counts seqs i = Amino_acid.Table.init (fun aa -> let aa = Amino_acid.to_char aa in List.count seqs ~f:(fun s -> Char.equal s.[i] aa) ) in let site i = let c0 = (counts seqs0 i :> int array) in let c1 = (counts seqs1 i :> int array) in let d = MT.data ~x1:c0 ~x2:c1 in let r = test d in r._T_, r.pvalue in let n = Alignment.ncols alignment in let res = List.init n ~f:site in let res_lines = List.mapi res ~f:(fun i (lr, pval) -> sprintf "%d\t%f\t%f" (i + 1) (1. -. pval) lr ) in let header = "Sites\t1MinusLRT\tLikelihoodRatio" in Out_channel.write_lines [%dest] (header :: res_lines) let multinomial_asymptotic_lrt ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Asymptotic_LRT ~tree_sc ~faa let multinomial_simulation_lrt ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Simulation_LRT ~tree_sc ~faa let multinomial_asymptotic_sparse ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Asymptotic_sparse ~tree_sc ~faa let multinomial_simulation_sparse ~tree_sc ~faa = multinomial_ocaml_implementation ~meth:`Simulation_sparse ~tree_sc ~faa