Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit 2ac8dfe8 authored by Philippe Veber's avatar Philippe Veber
Browse files

put each dataset variant in its own module

parent 186f64bc
......@@ -13,7 +13,7 @@ type det_out =
| Diffsel_out
type det_result = {
dataset : dataset ;
dataset : Dataset.t ;
det_meth : det_meth ;
det_result : det_out directory workflow ;
}
......
......@@ -13,7 +13,7 @@ type det_out =
| Diffsel_out
type det_result = {
dataset : dataset ;
dataset : Dataset.t ;
det_meth : det_meth ;
det_result : det_out directory workflow ;
}
......
open Core
open Bistro_utils
type t = {
model_prefix: string ;
tree_prefix : string ;
dataset : Ready_dataset.t ;
}
let repo ~preview dataset_l =
List.map dataset_l ~f:(fun dataset ->
let model_prefix = dataset.model_prefix in
let tree_prefix = dataset.tree_prefix in
let repo =
if preview then
Ready_dataset.repo dataset.dataset
else
Raw_dataset.repo (Ready_dataset.to_raw dataset.dataset)
in
Repo.shift tree_prefix (Repo.shift model_prefix repo)
)
|> List.concat
......@@ -10,29 +10,6 @@ type output_parse_input_tree =
| Tree4simu
| Tree_diffsel
type raw_dataset = {
input_tree: nhx workflow ;
fna: nucleotide_fasta workflow ;
}
type ready_dataset = {
input_tree: nhx workflow ;
tree_dataset : [`tree_dataset] directory workflow ;
fna: nucleotide_fasta workflow ;
faa: aminoacid_fasta workflow ;
}
type ('a,'b) w_dataset =
| Raw_dataset of 'a
| Ready_dataset of 'b
type dataset = {
model_prefix: string ;
tree_prefix : string ;
ready_dataset : ready_dataset
}
type det_meth =
| Pcoc
| Pcoc_gamma
......
......@@ -6,55 +6,6 @@ open File_formats
open Defs
open Convergence_detection
let ready_dataset_of_raw_dataset raw_dataset =
let input_tree = raw_dataset.input_tree in
let fna = raw_dataset.fna in
let tree_dataset = Tree_dataset.prepare input_tree in
let faa = Bppsuite.fna2faa ~fna in
{ input_tree; tree_dataset ; fna; faa}
let raw_dataset_of_ready_dataset ready_dataset =
let input_tree = ready_dataset.input_tree in
let fna = ready_dataset.fna in
{ input_tree; fna}
let repo_of_ready_dataset (rd : ready_dataset) =
Repo.[
item ["input_tree.nhx"] rd.input_tree ;
item ["tree.H0.node_ids" ] (Tree_dataset.nodes rd.tree_dataset H0) ;
item ["tree.Ha.node_ids" ] (Tree_dataset.nodes rd.tree_dataset Ha) ;
item ["tree.only_convergent_tags.nhx" ] (Tree_dataset.tree rd.tree_dataset `Detection) ;
item ["tree.only_node_ids.nhx" ] (Tree_dataset.tree rd.tree_dataset `Simulation) ;
item ["tree.diffsel" ] (Tree_dataset.diffsel_tree rd.tree_dataset) ;
item ["simulated_sequences.fna"] rd.fna ;
item ["simulated_sequences.faa"] rd.faa ;
]
|> Repo.shift "ready_dataset"
let repo_of_raw_dataset (raw_dataset:raw_dataset) =
Repo.[
item ["input_tree.nhx"] raw_dataset.input_tree ;
item ["simulated_sequences.fna"] raw_dataset.fna ;
]
|> Repo.shift "raw_dataset"
let repo_of_dataset_l ~preview dataset_l =
List.map dataset_l ~f:(fun (dataset : Defs.dataset) ->
let model_prefix = dataset.model_prefix in
let tree_prefix = dataset.tree_prefix in
let w_dataset =
if preview then
Ready_dataset dataset.ready_dataset
else Raw_dataset (raw_dataset_of_ready_dataset dataset.ready_dataset) in
let repo_d = match w_dataset with
| Ready_dataset d -> repo_of_ready_dataset d
| Raw_dataset d -> repo_of_raw_dataset d
in
Repo.shift tree_prefix (Repo.shift model_prefix repo_d)
)
|> List.concat
let derive_from_model ~model ~tree ~tree_dataset ~tree_prefix ~profile_f ~preview =
let model_prefix = Convergence_hypothesis.string_of_model model in
let nb_sites = if preview then 20 else 1000 in
......@@ -70,8 +21,8 @@ let derive_from_model ~model ~tree ~tree_dataset ~tree_prefix ~profile_f ~previe
let fna = Bppsuite.bppseqgen_multi_profiles ~descr ~nb_sites ~tree ~config:config_p ~profile_f in
let faa = Bppsuite.fna2faa ~fna in
let ready_dataset = { input_tree = tree ; tree_dataset ; fna; faa} in
{ model_prefix; tree_prefix; ready_dataset }
let ready_dataset = { Ready_dataset.input_tree = tree ; tree_dataset ; fna; faa} in
{ Dataset.model_prefix; tree_prefix; dataset = ready_dataset }
let derive_from_tree ~tree_dir ~tree ~profile_f ~preview =
let tree_prefix = Filename.chop_extension tree in
......@@ -126,12 +77,12 @@ let repo_of_dataset_results_l ~dataset_results_l =
)
|> List.concat
let derive_from_det_meth ~det_meth ~dataset ~preview =
let faa = dataset.ready_dataset.faa in
let fna = dataset.ready_dataset.fna in
let derive_from_det_meth ~det_meth ~(dataset : Dataset.t) ~preview =
let faa = dataset.dataset.faa in
let fna = dataset.dataset.fna in
let phy_n = Bppsuite.fa2phy ~fna in
let pcoc_tree = Tree_dataset.tree dataset.ready_dataset.tree_dataset `Detection in
let diffsel_tree = Tree_dataset.diffsel_tree dataset.ready_dataset.tree_dataset in
let pcoc_tree = Tree_dataset.tree dataset.dataset.tree_dataset `Detection in
let diffsel_tree = Tree_dataset.diffsel_tree dataset.dataset.tree_dataset in
let w_every = if preview then 1 else 10 in
let n_cycles = if preview then 100 else 1000 in
let det_result = match det_meth with
......@@ -151,8 +102,8 @@ let derive_from_dataset ~dataset ~preview =
) in
let merged_results = merge_results ~res_by_tools in
let tsv = merged_results in
let faa = dataset.ready_dataset.faa in
let tree = Tree_dataset.tree dataset.ready_dataset.tree_dataset `Detection in
let faa = dataset.dataset.faa in
let tree = Tree_dataset.tree dataset.dataset.tree_dataset `Detection in
let plot_merged_results = plot_merge_results ~res_by_tools ~tsv ~faa ~tree in
let model_prefix = dataset.model_prefix in
let tree_prefix = dataset.tree_prefix in
......@@ -172,7 +123,7 @@ let main ~outdir ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn ~preview () =
let dataset_l = derive_sim ~tree_dir ~trees ~profile_fn ~preview in
let dataset_results_l = derive_det ~dataset_l ~profile_fn ~preview in
let repo = [
repo_of_dataset_l dataset_l ~preview ;
Dataset.repo dataset_l ~preview ;
repo_of_dataset_results_l ~dataset_results_l;
]
|> List.concat
......
open Bistro.Std
open File_formats
open Bistro_utils
type t = {
input_tree: nhx workflow ;
fna: nucleotide_fasta workflow ;
}
let repo rd =
Repo.[
item ["input_tree.nhx"] rd.input_tree ;
item ["simulated_sequences.fna"] rd.fna ;
]
|> Repo.shift "raw_dataset"
open Bistro.Std
open Bistro_utils
open File_formats
type t = {
input_tree: nhx workflow ;
tree_dataset : [`tree_dataset] directory workflow ;
fna: nucleotide_fasta workflow ;
faa: aminoacid_fasta workflow ;
}
let of_raw (raw_dataset : Raw_dataset.t) =
let input_tree = raw_dataset.input_tree in
let fna = raw_dataset.fna in
let tree_dataset = Tree_dataset.prepare input_tree in
let faa = Bppsuite.fna2faa ~fna in
{ input_tree; tree_dataset ; fna; faa}
let repo rd =
Repo.[
item ["input_tree.nhx"] rd.input_tree ;
item ["tree.H0.node_ids" ] (Tree_dataset.nodes rd.tree_dataset H0) ;
item ["tree.Ha.node_ids" ] (Tree_dataset.nodes rd.tree_dataset Ha) ;
item ["tree.only_convergent_tags.nhx" ] (Tree_dataset.tree rd.tree_dataset `Detection) ;
item ["tree.only_node_ids.nhx" ] (Tree_dataset.tree rd.tree_dataset `Simulation) ;
item ["tree.diffsel" ] (Tree_dataset.diffsel_tree rd.tree_dataset) ;
item ["simulated_sequences.fna"] rd.fna ;
item ["simulated_sequences.faa"] rd.faa ;
]
|> Repo.shift "ready_dataset"
let to_raw { input_tree ; fna } =
{ Raw_dataset.input_tree ; fna }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment