Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit a3df223f authored by Philippe Veber's avatar Philippe Veber
Browse files

update wrt phylogenetics and bistro

parent e5695953
open Core open Core
open Convdet open Phylogenetics
let ok_exn err = function let ok_exn err = function
| Ok x -> x | Ok x -> x
...@@ -45,7 +45,7 @@ let nucleotide_fasta_gc ?pos fa = ...@@ -45,7 +45,7 @@ let nucleotide_fasta_gc ?pos fa =
seq_gc ?pos seqs seq_gc ?pos seqs
let nucleotide_fasta_gc_ac ?pos tree fa = let nucleotide_fasta_gc_ac ?pos tree fa =
let tree = Convdet.Simulator.tree_from_file tree in let tree = Reviewphiltrans_toolbox.Utils.tree_from_file tree in
let seqs = strings_from_fasta fa in let seqs = strings_from_fasta fa in
let leaf_state = let leaf_state =
Phylogenetics.Tree.map_leaves tree ~root:(0., 0) ~f:(fun _ b -> snd b = 0) Phylogenetics.Tree.map_leaves tree ~root:(0., 0) ~f:(fun _ b -> snd b = 0)
...@@ -76,7 +76,7 @@ let command = ...@@ -76,7 +76,7 @@ let command =
main ~alignment main ~alignment
] ]
let%pworkflow histogram (fa : #Bistro_bioinfo.fasta Bistro.pworkflow) = let%pworkflow histogram (fa : #Bistro.fasta Bistro.pworkflow) =
let al = ok_exn Alignment.show_parsing_error @@ Alignment.from_fasta [%path fa] in let al = ok_exn Alignment.show_parsing_error @@ Alignment.from_fasta [%path fa] in
let float_array_of_int_list x = let float_array_of_int_list x =
Array.of_list x Array.of_list x
......
(library (library
(name reviewphiltrans) (name reviewphiltrans)
(libraries bistro.bioinfo bistro.utils convdet gzt ocaml-r.graphics ocaml-r.grDevices reviewphiltrans_toolbox ) (libraries bistro.bioinfo bistro.utils gzt ocaml-r.graphics ocaml-r.grDevices phylogenetics.convergence reviewphiltrans_toolbox )
(preprocess (preprocess
(pps ppx_jane ppx_csv_conv bistro.ppx ppx_here))) (pps ppx_jane ppx_csv_conv bistro.ppx ppx_here)))
......
open Bistro open Bistro
open Bistro_bioinfo
class type nhx = object class type nhx = object
inherit text_file inherit text_file
......
...@@ -17,7 +17,7 @@ let multinomial ?(descr="") ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkfl ...@@ -17,7 +17,7 @@ let multinomial ?(descr="") ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkfl
let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkflow) (* : text_file pworkflow *) = let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkflow) (* : text_file pworkflow *) =
let open Phylogenetics in let open Phylogenetics in
let open Convdet in let open Phylogenetics_convergence in
let module MT = Multinomial_test in let module MT = Multinomial_test in
let meth = [%param meth] in let meth = [%param meth] in
let test = match meth with let test = match meth with
...@@ -28,20 +28,22 @@ let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ pworkflow) ~(fa ...@@ -28,20 +28,22 @@ let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ pworkflow) ~(fa
in in
let fold_leaves (root : _ Tree.t) ~init ~f = let fold_leaves (root : _ Tree.t) ~init ~f =
let open Tree in let open Tree in
let rec node acc branch_data n = let rec node acc branch_data = function
match n.branches with | Leaf d ->
| [] -> f acc branch_data n.node_data f acc branch_data d
| xs -> | Node n ->
List.fold xs ~init:acc ~f:branch Non_empty_list.fold n.branches ~init:acc ~f:branch
and branch acc b = node acc b.branch_data b.tip in and branch acc (Branch b) = node acc b.data b.tip in
List.fold root.branches ~init ~f:branch match root with
| Leaf _ -> init
| Node n -> Non_empty_list.fold n.branches ~init ~f:branch
in in
let alignment = let alignment =
Alignment.from_fasta [%path faa] Alignment.from_fasta [%path faa]
|> Rresult.R.get_ok |> Rresult.R.get_ok
in in
let tree = Simulator.tree_from_file [%path tree_sc] in let tree = Reviewphiltrans_toolbox.Utils.tree_from_file [%path tree_sc] in
let leaves = fold_leaves tree ~init:[] ~f:(fun acc (_, cond) ni -> let leaves = fold_leaves tree ~init:[] ~f:(fun acc (_, cond) ni ->
match ni.name with match ni.name with
| None -> failwith "Leaves of the tree should be named" | None -> failwith "Leaves of the tree should be named"
...@@ -59,7 +61,7 @@ let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ pworkflow) ~(fa ...@@ -59,7 +61,7 @@ let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ pworkflow) ~(fa
) )
in in
let counts seqs i = let counts seqs i =
Amino_acid.vector (fun aa -> Amino_acid.Table.init (fun aa ->
let aa = Amino_acid.to_char aa in let aa = Amino_acid.to_char aa in
List.count seqs ~f:(fun s -> Char.equal s.[i] aa) List.count seqs ~f:(fun s -> Char.equal s.[i] aa)
) )
......
open Core_kernel open Core_kernel
let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~gBGC:(gBGC0, gBGC1) ~tree ~fitness_profiles () = let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~gBGC:(gBGC0, gBGC1) ~tree ~fitness_profiles () =
let open Phylogenetics in
let () = Option.iter ~f:Random.init [%param seed] in let () = Option.iter ~f:Random.init [%param seed] in
let n_h0 = [%param n_h0] in let n_h0 = [%param n_h0] in
let n_ha = [%param n_ha] in let n_ha = [%param n_ha] in
...@@ -9,17 +10,17 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g ...@@ -9,17 +10,17 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
let gBGC0 = [%param gBGC0] in let gBGC0 = [%param gBGC0] in
let gBGC1 = [%param gBGC1] in let gBGC1 = [%param gBGC1] in
let branch_factor = [%param branch_factor] in let branch_factor = [%param branch_factor] in
let tree = Convdet.Simulator.tree_from_file ?alpha:branch_factor [%path tree] in let tree = Reviewphiltrans_toolbox.Utils.tree_from_file ?alpha:branch_factor [%path tree] in
let fitness_profiles = Convdet.Profile_tsv.(read [%path fitness_profiles] |> to_fitness) in let fitness_profiles = Phylogenetics_convergence.Profile_tsv.(read [%path fitness_profiles] |> to_fitness) in
let rescale_fitness beta = Convdet.Amino_acid.vector_map ~f:(( *. ) beta) in let rescale_fitness beta = Amino_acid.Vector.map ~f:(( *. ) beta) in
let base_param = let base_param =
let p = Convdet.Mutsel.random_param ~alpha_nucleotide:10. ~alpha_fitness:0.1 in let p = Mutsel.random_param ~alpha_nucleotide:10. ~alpha_fitness:0.1 in
{ p with omega = 1. } { p with omega = 1. }
in in
let random_profile beta = let random_profile beta =
Random.int (Array.length fitness_profiles) Random.int (Array.length fitness_profiles)
|> Array.get fitness_profiles |> Array.get fitness_profiles
|> Convdet.Amino_acid.vector_of_array_exn |> Amino_acid.Vector.of_array_exn
|> rescale_fitness beta |> rescale_fitness beta
in in
let h0_params = Array.init n_h0 ~f:(fun _ -> let h0_params = Array.init n_h0 ~f:(fun _ ->
...@@ -29,8 +30,9 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g ...@@ -29,8 +30,9 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
(p, q) (p, q)
) )
in in
let most_probable_aa (pref : float Convdet.Amino_acid.vector) = let most_probable_aa (pref : Amino_acid.vector) =
let arr = Array.mapi (pref :> float array) ~f:(fun i x -> x, i) in let pref = Amino_acid.Vector.to_array pref in
let arr = Array.mapi pref ~f:(fun i x -> x, i) in
match Array.max_elt ~compare:Poly.compare arr with match Array.max_elt ~compare:Poly.compare arr with
| None -> assert false | None -> assert false
| Some (_, i) -> i | Some (_, i) -> i
...@@ -58,13 +60,17 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g ...@@ -58,13 +60,17 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
| 1 -> q | 1 -> q
| _ -> assert false | _ -> assert false
in in
let root_condition = Convdet.Simulator.Mutsel.root_condition tree in let root_condition =
Option.value_exn (Phylogenetics_convergence.Simulator.root_condition tree)
in
let root_dists = Array.init (n_h0 + n_ha) ~f:(fun i -> let root_dists = Array.init (n_h0 + n_ha) ~f:(fun i ->
Convdet.Mutsel.stationary_distribution (params i root_condition) Mutsel.stationary_distribution (params i root_condition)
|> Mutsel.NSCodon.Vector.to_array
|> Mutsel.NSCodon.Table.of_array_exn
) )
in in
let root = Convdet.Simulator.Mutsel.hmm0 ~len:(n_h0 + n_ha) ~dist:(Array.get root_dists) in let root = Phylogenetics_convergence.Simulator.Mutsel.hmm0 ~len:(n_h0 + n_ha) ~dist:(Array.get root_dists) in
let ali = Convdet.Simulator.Mutsel.alignment tree ~root params in let ali = Phylogenetics_convergence.Simulator.Mutsel.alignment tree ~root params in
let species_name = let species_name =
Phylogenetics.Tree.leaves tree Phylogenetics.Tree.leaves tree
|> List.map ~f:(fun { name } -> Option.value_exn name) in |> List.map ~f:(fun { name } -> Option.value_exn name) in
...@@ -78,7 +84,7 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g ...@@ -78,7 +84,7 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
let save_fitness_histogram dest = let save_fitness_histogram dest =
let data = let data =
Array.fold (Array.append h0_params ha_params) ~init:[] ~f:(fun acc (p,q) -> Array.fold (Array.append h0_params ha_params) ~init:[] ~f:(fun acc (p,q) ->
(p.scaled_fitness :> float array) :: (q.scaled_fitness :> float array) :: acc (Amino_acid.Vector.to_array p.scaled_fitness) :: (Amino_acid.Vector.to_array q.scaled_fitness) :: acc
) )
|> Array.concat |> Array.concat
in in
...@@ -98,29 +104,20 @@ let simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s ~gBGC ~tree ~fitness_profil ...@@ -98,29 +104,20 @@ let simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s ~gBGC ~tree ~fitness_profil
let%pworkflow pair_tree ~branch_length1 ~branch_length2 ~npairs = let%pworkflow pair_tree ~branch_length1 ~branch_length2 ~npairs =
let open Phylogenetics in let open Phylogenetics in
let branch_length1, branch_length2, npairs = [%param branch_length1, branch_length2, npairs] in let branch_length1, branch_length2, npairs = [%param branch_length1, branch_length2, npairs] in
let tree ?name branches = { let leaf name = Tree.leaf { Newick_ast.name = Some name } in
Tree.node_data = { Newick_ast.name } ;
branches ;
}
in
let branch ~length ~condition tip = let branch ~length ~condition tip =
let tags = match condition with let tags = match condition with
| `Ancestral -> ["Condition", "0"] | `Ancestral -> ["Condition", "0"]
| `Convergent -> ["Condition", "1" ; "Transition", "1"] | `Convergent -> ["Condition", "1" ; "Transition", "1"]
in in
{ Tree.branch { Newick_ast.length = Some length ; tags } tip in
Tree.branch_data = { Newick_ast.length = Some length ; tags } ;
tip ;
}
in
let make_pair i = let make_pair i =
tree [ Tree.binary_node { Newick_ast.name = None }
branch ~length:branch_length2 ~condition:`Ancestral (tree ~name:(sprintf "A%d" i) []) ; (branch ~length:branch_length2 ~condition:`Ancestral (leaf (sprintf "A%d" i)))
branch ~length:branch_length2 ~condition:`Convergent (tree ~name:(sprintf "C%d" i) []) ; (branch ~length:branch_length2 ~condition:`Convergent (leaf (sprintf "C%d" i)))
]
|> branch ~length:branch_length1 ~condition:`Ancestral |> branch ~length:branch_length1 ~condition:`Ancestral
in in
let tree = let tree =
Newick.Tree (tree (List.init npairs ~f:make_pair)) Newick.Tree (Tree.node { Newick_ast.name = None } (Non_empty_list.init npairs ~f:make_pair))
in in
Newick.to_file tree [%dest] Newick.to_file tree [%dest]
(library (library
(name reviewphiltrans_toolbox) (name reviewphiltrans_toolbox)
(libraries biocaml.ez convdet gzt ocaml-r.graphics ocaml-r.grDevices) (libraries biocaml.ez gzt ocaml-r.graphics ocaml-r.grDevices phylogenetics)
(preprocess (pps ppx_jane))) (preprocess (pps ppx_jane)))
open Core_kernel
let tree_from_file ?(alpha = 1.) fn =
match Phylogenetics.Newick.from_file fn with
| Branch (Branch { tip = t ; _ })
| Tree t ->
Phylogenetics.Tree.map t ~node:Fn.id ~leaf:Fn.id ~branch:Phylogenetics.Newick.(fun b ->
let length = Option.value_exn b.length *. alpha in
let condition =
match List.Assoc.find ~equal:String.equal b.tags "Condition" with
| Some s -> Int.of_string s
| None -> failwith "Missing Condition tag"
in
length, condition
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment