Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit a3df223f authored by Philippe Veber's avatar Philippe Veber
Browse files

update wrt phylogenetics and bistro

parent e5695953
open Core
open Convdet
open Phylogenetics
let ok_exn err = function
| Ok x -> x
......@@ -45,7 +45,7 @@ let nucleotide_fasta_gc ?pos fa =
seq_gc ?pos seqs
let nucleotide_fasta_gc_ac ?pos tree fa =
let tree = Convdet.Simulator.tree_from_file tree in
let tree = Reviewphiltrans_toolbox.Utils.tree_from_file tree in
let seqs = strings_from_fasta fa in
let leaf_state =
Phylogenetics.Tree.map_leaves tree ~root:(0., 0) ~f:(fun _ b -> snd b = 0)
......@@ -76,7 +76,7 @@ let command =
main ~alignment
]
let%pworkflow histogram (fa : #Bistro_bioinfo.fasta Bistro.pworkflow) =
let%pworkflow histogram (fa : #Bistro.fasta Bistro.pworkflow) =
let al = ok_exn Alignment.show_parsing_error @@ Alignment.from_fasta [%path fa] in
let float_array_of_int_list x =
Array.of_list x
......
(library
(name reviewphiltrans)
(libraries bistro.bioinfo bistro.utils convdet gzt ocaml-r.graphics ocaml-r.grDevices reviewphiltrans_toolbox )
(libraries bistro.bioinfo bistro.utils gzt ocaml-r.graphics ocaml-r.grDevices phylogenetics.convergence reviewphiltrans_toolbox )
(preprocess
(pps ppx_jane ppx_csv_conv bistro.ppx ppx_here)))
......
open Bistro
open Bistro_bioinfo
class type nhx = object
inherit text_file
......
......@@ -17,7 +17,7 @@ let multinomial ?(descr="") ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkfl
let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkflow) (* : text_file pworkflow *) =
let open Phylogenetics in
let open Convdet in
let open Phylogenetics_convergence in
let module MT = Multinomial_test in
let meth = [%param meth] in
let test = match meth with
......@@ -28,20 +28,22 @@ let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ pworkflow) ~(fa
in
let fold_leaves (root : _ Tree.t) ~init ~f =
let open Tree in
let rec node acc branch_data n =
match n.branches with
| [] -> f acc branch_data n.node_data
| xs ->
List.fold xs ~init:acc ~f:branch
let rec node acc branch_data = function
| Leaf d ->
f acc branch_data d
| Node n ->
Non_empty_list.fold n.branches ~init:acc ~f:branch
and branch acc b = node acc b.branch_data b.tip in
List.fold root.branches ~init ~f:branch
and branch acc (Branch b) = node acc b.data b.tip in
match root with
| Leaf _ -> init
| Node n -> Non_empty_list.fold n.branches ~init ~f:branch
in
let alignment =
Alignment.from_fasta [%path faa]
|> Rresult.R.get_ok
in
let tree = Simulator.tree_from_file [%path tree_sc] in
let tree = Reviewphiltrans_toolbox.Utils.tree_from_file [%path tree_sc] in
let leaves = fold_leaves tree ~init:[] ~f:(fun acc (_, cond) ni ->
match ni.name with
| None -> failwith "Leaves of the tree should be named"
......@@ -59,7 +61,7 @@ let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ pworkflow) ~(fa
)
in
let counts seqs i =
Amino_acid.vector (fun aa ->
Amino_acid.Table.init (fun aa ->
let aa = Amino_acid.to_char aa in
List.count seqs ~f:(fun s -> Char.equal s.[i] aa)
)
......
open Core_kernel
let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~gBGC:(gBGC0, gBGC1) ~tree ~fitness_profiles () =
let open Phylogenetics in
let () = Option.iter ~f:Random.init [%param seed] in
let n_h0 = [%param n_h0] in
let n_ha = [%param n_ha] in
......@@ -9,17 +10,17 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
let gBGC0 = [%param gBGC0] in
let gBGC1 = [%param gBGC1] in
let branch_factor = [%param branch_factor] in
let tree = Convdet.Simulator.tree_from_file ?alpha:branch_factor [%path tree] in
let fitness_profiles = Convdet.Profile_tsv.(read [%path fitness_profiles] |> to_fitness) in
let rescale_fitness beta = Convdet.Amino_acid.vector_map ~f:(( *. ) beta) in
let tree = Reviewphiltrans_toolbox.Utils.tree_from_file ?alpha:branch_factor [%path tree] in
let fitness_profiles = Phylogenetics_convergence.Profile_tsv.(read [%path fitness_profiles] |> to_fitness) in
let rescale_fitness beta = Amino_acid.Vector.map ~f:(( *. ) beta) in
let base_param =
let p = Convdet.Mutsel.random_param ~alpha_nucleotide:10. ~alpha_fitness:0.1 in
let p = Mutsel.random_param ~alpha_nucleotide:10. ~alpha_fitness:0.1 in
{ p with omega = 1. }
in
let random_profile beta =
Random.int (Array.length fitness_profiles)
|> Array.get fitness_profiles
|> Convdet.Amino_acid.vector_of_array_exn
|> Amino_acid.Vector.of_array_exn
|> rescale_fitness beta
in
let h0_params = Array.init n_h0 ~f:(fun _ ->
......@@ -29,8 +30,9 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
(p, q)
)
in
let most_probable_aa (pref : float Convdet.Amino_acid.vector) =
let arr = Array.mapi (pref :> float array) ~f:(fun i x -> x, i) in
let most_probable_aa (pref : Amino_acid.vector) =
let pref = Amino_acid.Vector.to_array pref in
let arr = Array.mapi pref ~f:(fun i x -> x, i) in
match Array.max_elt ~compare:Poly.compare arr with
| None -> assert false
| Some (_, i) -> i
......@@ -58,13 +60,17 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
| 1 -> q
| _ -> assert false
in
let root_condition = Convdet.Simulator.Mutsel.root_condition tree in
let root_condition =
Option.value_exn (Phylogenetics_convergence.Simulator.root_condition tree)
in
let root_dists = Array.init (n_h0 + n_ha) ~f:(fun i ->
Convdet.Mutsel.stationary_distribution (params i root_condition)
Mutsel.stationary_distribution (params i root_condition)
|> Mutsel.NSCodon.Vector.to_array
|> Mutsel.NSCodon.Table.of_array_exn
)
in
let root = Convdet.Simulator.Mutsel.hmm0 ~len:(n_h0 + n_ha) ~dist:(Array.get root_dists) in
let ali = Convdet.Simulator.Mutsel.alignment tree ~root params in
let root = Phylogenetics_convergence.Simulator.Mutsel.hmm0 ~len:(n_h0 + n_ha) ~dist:(Array.get root_dists) in
let ali = Phylogenetics_convergence.Simulator.Mutsel.alignment tree ~root params in
let species_name =
Phylogenetics.Tree.leaves tree
|> List.map ~f:(fun { name } -> Option.value_exn name) in
......@@ -78,7 +84,7 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
let save_fitness_histogram dest =
let data =
Array.fold (Array.append h0_params ha_params) ~init:[] ~f:(fun acc (p,q) ->
(p.scaled_fitness :> float array) :: (q.scaled_fitness :> float array) :: acc
(Amino_acid.Vector.to_array p.scaled_fitness) :: (Amino_acid.Vector.to_array q.scaled_fitness) :: acc
)
|> Array.concat
in
......@@ -98,29 +104,20 @@ let simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s ~gBGC ~tree ~fitness_profil
let%pworkflow pair_tree ~branch_length1 ~branch_length2 ~npairs =
let open Phylogenetics in
let branch_length1, branch_length2, npairs = [%param branch_length1, branch_length2, npairs] in
let tree ?name branches = {
Tree.node_data = { Newick_ast.name } ;
branches ;
}
in
let leaf name = Tree.leaf { Newick_ast.name = Some name } in
let branch ~length ~condition tip =
let tags = match condition with
| `Ancestral -> ["Condition", "0"]
| `Convergent -> ["Condition", "1" ; "Transition", "1"]
in
{
Tree.branch_data = { Newick_ast.length = Some length ; tags } ;
tip ;
}
in
Tree.branch { Newick_ast.length = Some length ; tags } tip in
let make_pair i =
tree [
branch ~length:branch_length2 ~condition:`Ancestral (tree ~name:(sprintf "A%d" i) []) ;
branch ~length:branch_length2 ~condition:`Convergent (tree ~name:(sprintf "C%d" i) []) ;
]
Tree.binary_node { Newick_ast.name = None }
(branch ~length:branch_length2 ~condition:`Ancestral (leaf (sprintf "A%d" i)))
(branch ~length:branch_length2 ~condition:`Convergent (leaf (sprintf "C%d" i)))
|> branch ~length:branch_length1 ~condition:`Ancestral
in
let tree =
Newick.Tree (tree (List.init npairs ~f:make_pair))
Newick.Tree (Tree.node { Newick_ast.name = None } (Non_empty_list.init npairs ~f:make_pair))
in
Newick.to_file tree [%dest]
(library
(name reviewphiltrans_toolbox)
(libraries biocaml.ez convdet gzt ocaml-r.graphics ocaml-r.grDevices)
(libraries biocaml.ez gzt ocaml-r.graphics ocaml-r.grDevices phylogenetics)
(preprocess (pps ppx_jane)))
open Core_kernel
let tree_from_file ?(alpha = 1.) fn =
match Phylogenetics.Newick.from_file fn with
| Branch (Branch { tip = t ; _ })
| Tree t ->
Phylogenetics.Tree.map t ~node:Fn.id ~leaf:Fn.id ~branch:Phylogenetics.Newick.(fun b ->
let length = Option.value_exn b.length *. alpha in
let condition =
match List.Assoc.find ~equal:String.equal b.tags "Condition" with
| Some s -> Int.of_string s
| None -> failwith "Missing Condition tag"
in
length, condition
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment