Commit a3df223f authored by Philippe Veber's avatar Philippe Veber
Browse files

update wrt phylogenetics and bistro

parent e5695953
open Core
open Convdet
open Phylogenetics
let ok_exn err = function
| Ok x -> x
......@@ -45,7 +45,7 @@ let nucleotide_fasta_gc ?pos fa =
seq_gc ?pos seqs
let nucleotide_fasta_gc_ac ?pos tree fa =
let tree = Convdet.Simulator.tree_from_file tree in
let tree = Reviewphiltrans_toolbox.Utils.tree_from_file tree in
let seqs = strings_from_fasta fa in
let leaf_state =
Phylogenetics.Tree.map_leaves tree ~root:(0., 0) ~f:(fun _ b -> snd b = 0)
......@@ -76,7 +76,7 @@ let command =
main ~alignment
]
let%pworkflow histogram (fa : #Bistro_bioinfo.fasta Bistro.pworkflow) =
let%pworkflow histogram (fa : #Bistro.fasta Bistro.pworkflow) =
let al = ok_exn Alignment.show_parsing_error @@ Alignment.from_fasta [%path fa] in
let float_array_of_int_list x =
Array.of_list x
......
(library
(name reviewphiltrans)
(libraries bistro.bioinfo bistro.utils convdet gzt ocaml-r.graphics ocaml-r.grDevices reviewphiltrans_toolbox )
(libraries bistro.bioinfo bistro.utils gzt ocaml-r.graphics ocaml-r.grDevices phylogenetics.convergence reviewphiltrans_toolbox )
(preprocess
(pps ppx_jane ppx_csv_conv bistro.ppx ppx_here)))
......
open Bistro
open Bistro_bioinfo
class type nhx = object
inherit text_file
......
......@@ -17,7 +17,7 @@ let multinomial ?(descr="") ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkfl
let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkflow) (* : text_file pworkflow *) =
let open Phylogenetics in
let open Convdet in
let open Phylogenetics_convergence in
let module MT = Multinomial_test in
let meth = [%param meth] in
let test = match meth with
......@@ -28,20 +28,22 @@ let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ pworkflow) ~(fa
in
let fold_leaves (root : _ Tree.t) ~init ~f =
let open Tree in
let rec node acc branch_data n =
match n.branches with
| [] -> f acc branch_data n.node_data
| xs ->
List.fold xs ~init:acc ~f:branch
let rec node acc branch_data = function
| Leaf d ->
f acc branch_data d
| Node n ->
Non_empty_list.fold n.branches ~init:acc ~f:branch
and branch acc b = node acc b.branch_data b.tip in
List.fold root.branches ~init ~f:branch
and branch acc (Branch b) = node acc b.data b.tip in
match root with
| Leaf _ -> init
| Node n -> Non_empty_list.fold n.branches ~init ~f:branch
in
let alignment =
Alignment.from_fasta [%path faa]
|> Rresult.R.get_ok
in
let tree = Simulator.tree_from_file [%path tree_sc] in
let tree = Reviewphiltrans_toolbox.Utils.tree_from_file [%path tree_sc] in
let leaves = fold_leaves tree ~init:[] ~f:(fun acc (_, cond) ni ->
match ni.name with
| None -> failwith "Leaves of the tree should be named"
......@@ -59,7 +61,7 @@ let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ pworkflow) ~(fa
)
in
let counts seqs i =
Amino_acid.vector (fun aa ->
Amino_acid.Table.init (fun aa ->
let aa = Amino_acid.to_char aa in
List.count seqs ~f:(fun s -> Char.equal s.[i] aa)
)
......
open Core_kernel
let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~gBGC:(gBGC0, gBGC1) ~tree ~fitness_profiles () =
let open Phylogenetics in
let () = Option.iter ~f:Random.init [%param seed] in
let n_h0 = [%param n_h0] in
let n_ha = [%param n_ha] in
......@@ -9,17 +10,17 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
let gBGC0 = [%param gBGC0] in
let gBGC1 = [%param gBGC1] in
let branch_factor = [%param branch_factor] in
let tree = Convdet.Simulator.tree_from_file ?alpha:branch_factor [%path tree] in
let fitness_profiles = Convdet.Profile_tsv.(read [%path fitness_profiles] |> to_fitness) in
let rescale_fitness beta = Convdet.Amino_acid.vector_map ~f:(( *. ) beta) in
let tree = Reviewphiltrans_toolbox.Utils.tree_from_file ?alpha:branch_factor [%path tree] in
let fitness_profiles = Phylogenetics_convergence.Profile_tsv.(read [%path fitness_profiles] |> to_fitness) in
let rescale_fitness beta = Amino_acid.Vector.map ~f:(( *. ) beta) in
let base_param =
let p = Convdet.Mutsel.random_param ~alpha_nucleotide:10. ~alpha_fitness:0.1 in
let p = Mutsel.random_param ~alpha_nucleotide:10. ~alpha_fitness:0.1 in
{ p with omega = 1. }
in
let random_profile beta =
Random.int (Array.length fitness_profiles)
|> Array.get fitness_profiles
|> Convdet.Amino_acid.vector_of_array_exn
|> Amino_acid.Vector.of_array_exn
|> rescale_fitness beta
in
let h0_params = Array.init n_h0 ~f:(fun _ ->
......@@ -29,8 +30,9 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
(p, q)
)
in
let most_probable_aa (pref : float Convdet.Amino_acid.vector) =
let arr = Array.mapi (pref :> float array) ~f:(fun i x -> x, i) in
let most_probable_aa (pref : Amino_acid.vector) =
let pref = Amino_acid.Vector.to_array pref in
let arr = Array.mapi pref ~f:(fun i x -> x, i) in
match Array.max_elt ~compare:Poly.compare arr with
| None -> assert false
| Some (_, i) -> i
......@@ -58,13 +60,17 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
| 1 -> q
| _ -> assert false
in
let root_condition = Convdet.Simulator.Mutsel.root_condition tree in
let root_condition =
Option.value_exn (Phylogenetics_convergence.Simulator.root_condition tree)
in
let root_dists = Array.init (n_h0 + n_ha) ~f:(fun i ->
Convdet.Mutsel.stationary_distribution (params i root_condition)
Mutsel.stationary_distribution (params i root_condition)
|> Mutsel.NSCodon.Vector.to_array
|> Mutsel.NSCodon.Table.of_array_exn
)
in
let root = Convdet.Simulator.Mutsel.hmm0 ~len:(n_h0 + n_ha) ~dist:(Array.get root_dists) in
let ali = Convdet.Simulator.Mutsel.alignment tree ~root params in
let root = Phylogenetics_convergence.Simulator.Mutsel.hmm0 ~len:(n_h0 + n_ha) ~dist:(Array.get root_dists) in
let ali = Phylogenetics_convergence.Simulator.Mutsel.alignment tree ~root params in
let species_name =
Phylogenetics.Tree.leaves tree
|> List.map ~f:(fun { name } -> Option.value_exn name) in
......@@ -78,7 +84,7 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
let save_fitness_histogram dest =
let data =
Array.fold (Array.append h0_params ha_params) ~init:[] ~f:(fun acc (p,q) ->
(p.scaled_fitness :> float array) :: (q.scaled_fitness :> float array) :: acc
(Amino_acid.Vector.to_array p.scaled_fitness) :: (Amino_acid.Vector.to_array q.scaled_fitness) :: acc
)
|> Array.concat
in
......@@ -98,29 +104,20 @@ let simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s ~gBGC ~tree ~fitness_profil
let%pworkflow pair_tree ~branch_length1 ~branch_length2 ~npairs =
let open Phylogenetics in
let branch_length1, branch_length2, npairs = [%param branch_length1, branch_length2, npairs] in
let tree ?name branches = {
Tree.node_data = { Newick_ast.name } ;
branches ;
}
in
let leaf name = Tree.leaf { Newick_ast.name = Some name } in
let branch ~length ~condition tip =
let tags = match condition with
| `Ancestral -> ["Condition", "0"]
| `Convergent -> ["Condition", "1" ; "Transition", "1"]
in
{
Tree.branch_data = { Newick_ast.length = Some length ; tags } ;
tip ;
}
in
Tree.branch { Newick_ast.length = Some length ; tags } tip in
let make_pair i =
tree [
branch ~length:branch_length2 ~condition:`Ancestral (tree ~name:(sprintf "A%d" i) []) ;
branch ~length:branch_length2 ~condition:`Convergent (tree ~name:(sprintf "C%d" i) []) ;
]
Tree.binary_node { Newick_ast.name = None }
(branch ~length:branch_length2 ~condition:`Ancestral (leaf (sprintf "A%d" i)))
(branch ~length:branch_length2 ~condition:`Convergent (leaf (sprintf "C%d" i)))
|> branch ~length:branch_length1 ~condition:`Ancestral
in
let tree =
Newick.Tree (tree (List.init npairs ~f:make_pair))
Newick.Tree (Tree.node { Newick_ast.name = None } (Non_empty_list.init npairs ~f:make_pair))
in
Newick.to_file tree [%dest]
(library
(name reviewphiltrans_toolbox)
(libraries biocaml.ez convdet gzt ocaml-r.graphics ocaml-r.grDevices)
(libraries biocaml.ez gzt ocaml-r.graphics ocaml-r.grDevices phylogenetics)
(preprocess (pps ppx_jane)))
open Core_kernel
let tree_from_file ?(alpha = 1.) fn =
match Phylogenetics.Newick.from_file fn with
| Branch (Branch { tip = t ; _ })
| Tree t ->
Phylogenetics.Tree.map t ~node:Fn.id ~leaf:Fn.id ~branch:Phylogenetics.Newick.(fun b ->
let length = Option.value_exn b.length *. alpha in
let condition =
match List.Assoc.find ~equal:String.equal b.tags "Condition" with
| Some s -> Int.of_string s
| None -> failwith "Missing Condition tag"
in
length, condition
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment