Commit c33c07df authored by Philippe Veber's avatar Philippe Veber
Browse files

refactoring

parent 631e6a6a
......@@ -6,6 +6,11 @@ class type nhx = object
method format : [`nhx]
end
class type diffsel_tree = object
inherit text_file
method format : [`diffsel_tree]
end
class type nucleotide_fasta = object
inherit fasta
method alphabet : [`Nucleotide]
......@@ -33,7 +38,7 @@ type raw_dataset = {
type ready_dataset = {
input_tree: nhx workflow ;
parsed_tree : parsed_input_tree directory workflow ;
tree_dataset : [`tree_dataset] directory workflow ;
fna: nucleotide_fasta workflow ;
faa: aminoacid_fasta workflow ;
}
......@@ -5,38 +5,21 @@ open Bistro.Std
open File_formats
open Models_def
let parse_input_tree ~tree : parsed_input_tree directory workflow =
workflow ~descr:"utils.parse_input_tree" [
(*let env = docker_image ~account:"carinerey" ~name:"ete3:3.0.0b35" () in*)
cmd "python" (*~env*) [
file_dump (string Scripts.parse_input_tree) ;
opt "-t" dep tree ;
opt "-o" ident dest ;
]
]
let select_out parsed_tree t = match t with
| Nodes_H0 -> parsed_tree / selector [ "tree.H0.node_ids" ]
| Nodes_Ha -> parsed_tree / selector [ "tree.Ha.node_ids" ]
| Tree4detect -> parsed_tree / selector [ "tree.only_convergent_tags.nhx" ]
| Tree4simu -> parsed_tree / selector [ "tree.only_node_ids.nhx" ]
| Tree_diffsel -> parsed_tree / selector [ "tree.diffsel" ]
let ready_dataset_of_raw_dataset raw_dataset =
let input_tree = raw_dataset.input_tree in
let fna = raw_dataset.fna in
let parsed_tree = parse_input_tree ~tree:input_tree in
let tree_dataset = Tree_dataset.prepare input_tree in
let faa = Bppsuite.fna2faa ~fna in
{ input_tree; parsed_tree; fna; faa}
{ input_tree; tree_dataset ; fna; faa}
let repo_of_ready_dataset (ready_dataset:ready_dataset) =
Repo.[
item ["ready_dataset/input_tree.nhx"] ready_dataset.input_tree ;
item ["ready_dataset/tree.H0.node_ids" ] (ready_dataset.parsed_tree / selector [ "tree.H0.node_ids" ]) ;
item ["ready_dataset/tree.Ha.node_ids" ] (ready_dataset.parsed_tree / selector [ "tree.Ha.node_ids" ]) ;
item ["ready_dataset/tree.only_convergent_tags.nhx" ] (ready_dataset.parsed_tree / selector [ "tree.only_convergent_tags.nhx" ]) ;
item ["ready_dataset/tree.only_node_ids.nhx" ] (ready_dataset.parsed_tree / selector [ "tree.only_node_ids.nhx" ]) ;
item ["ready_dataset/tree.diffsel" ] (ready_dataset.parsed_tree / selector [ "tree.diffsel" ]) ;
item ["ready_dataset/tree.H0.node_ids" ] (ready_dataset.tree_dataset / selector [ "tree.H0.node_ids" ]) ;
item ["ready_dataset/tree.Ha.node_ids" ] (ready_dataset.tree_dataset / selector [ "tree.Ha.node_ids" ]) ;
item ["ready_dataset/tree.only_convergent_tags.nhx" ] (ready_dataset.tree_dataset / selector [ "tree.only_convergent_tags.nhx" ]) ;
item ["ready_dataset/tree.only_node_ids.nhx" ] (ready_dataset.tree_dataset / selector [ "tree.only_node_ids.nhx" ]) ;
item ["ready_dataset/tree.diffsel" ] (ready_dataset.tree_dataset / selector [ "tree.diffsel" ]) ;
item ["ready_dataset/simulated_sequences.fna"] ready_dataset.fna ;
item ["ready_dataset/simulated_sequences.faa"] ready_dataset.faa ;
]
......@@ -48,13 +31,14 @@ let repo_of_raw_dataset (raw_dataset:raw_dataset) =
]
let derive_from_model ~model ~tree ~parsed_tree ~preview =
let derive_from_model ~model ~tree ~tree_dataset ~preview =
let nb_sites = if preview then 10 else 100 in
let config = define_bpp_config_of_model model in
let fna = Bppsuite.bppseqgen ~nb_sites ~tree:(select_out parsed_tree Tree4simu) ~config in
let tree = Tree_dataset.tree tree_dataset `Simulation in
let fna = Bppsuite.bppseqgen ~nb_sites ~tree ~config in
let faa = Bppsuite.fna2faa ~fna in
let raw_dataset = { input_tree = tree ; fna} in
let ready_dataset = { input_tree = tree ; parsed_tree; fna; faa} in
let ready_dataset = { input_tree = tree ; tree_dataset ; fna; faa} in
if preview then
repo_of_ready_dataset ready_dataset
else
......@@ -63,10 +47,12 @@ let derive_from_model ~model ~tree ~parsed_tree ~preview =
let derive_from_tree ~tree_dir ~tree ~preview =
let tree = input (Filename.concat tree_dir tree) in
let parsed_tree = parse_input_tree ~tree in
let tree_dataset = Tree_dataset.prepare tree in
let models = [H0; Ha] in
List.map models ~f:(fun model ->
Repo.shift (string_of_model model) (derive_from_model ~model ~tree ~parsed_tree ~preview))
derive_from_model ~model ~tree ~tree_dataset ~preview
|> Repo.shift (string_of_model model)
)
|> List.concat
let derive ~tree_dir ~trees ~preview =
......
open Core
open Bistro_utils
open Bistro.EDSL
open Bistro.Std
open File_formats
open Models_def
let parse_input_tree ~tree : parsed_input_tree directory workflow =
workflow ~descr:"utils.parse_input_tree" [
(*let env = docker_image ~account:"carinerey" ~name:"ete3:3.0.0b35" () in*)
cmd "python" (*~env*) [
file_dump (string Scripts.parse_input_tree) ;
opt "-t" dep tree ;
opt "-o" ident dest ;
]
]
open Core
open Bistro.EDSL
open Bistro.Std
open File_formats
open Models_def
let env = docker_image ~account:"carinerey" ~name:"ete3:3.0.0b35" ()
let prepare tree =
workflow ~descr:"utils.parse_input_tree" [
cmd "python" ~env [
file_dump (string Scripts.parse_input_tree) ;
opt "-t" dep tree ;
opt "-o" ident dest ;
]
]
let nodes dataset model =
dataset / selector (
match model with
| Models_def.H0 -> [ "tree.H0.node_ids" ]
| Ha -> [ "tree.Ha.node_ids" ]
)
let tree dataset mode =
dataset / selector (
match mode with
`Detection -> [ "tree.only_convergent_tags.nhx" ]
| `Simulation -> [ "tree.only_node_ids.nhx" ]
)
let diffsel_tree dataset =
dataset / selector [ "tree.diffsel" ]
open Bistro.Std
open File_formats
val prepare : nhx workflow -> [`tree_dataset] directory workflow
val nodes :
[`tree_dataset] directory workflow ->
Models_def.model ->
text_file workflow
val tree :
[`tree_dataset] directory workflow ->
[`Detection | `Simulation] ->
nhx workflow
val diffsel_tree :
[`tree_dataset] directory workflow ->
diffsel_tree workflow
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment