Commit 2ac8dfe8 authored by Philippe Veber's avatar Philippe Veber
Browse files

put each dataset variant in its own module

parent 186f64bc
......@@ -13,7 +13,7 @@ type det_out =
| Diffsel_out
type det_result = {
dataset : dataset ;
dataset : Dataset.t ;
det_meth : det_meth ;
det_result : det_out directory workflow ;
}
......
......@@ -13,7 +13,7 @@ type det_out =
| Diffsel_out
type det_result = {
dataset : dataset ;
dataset : Dataset.t ;
det_meth : det_meth ;
det_result : det_out directory workflow ;
}
......
open Core
open Bistro_utils
type t = {
model_prefix: string ;
tree_prefix : string ;
dataset : Ready_dataset.t ;
}
let repo ~preview dataset_l =
List.map dataset_l ~f:(fun dataset ->
let model_prefix = dataset.model_prefix in
let tree_prefix = dataset.tree_prefix in
let repo =
if preview then
Ready_dataset.repo dataset.dataset
else
Raw_dataset.repo (Ready_dataset.to_raw dataset.dataset)
in
Repo.shift tree_prefix (Repo.shift model_prefix repo)
)
|> List.concat
......@@ -10,29 +10,6 @@ type output_parse_input_tree =
| Tree4simu
| Tree_diffsel
type raw_dataset = {
input_tree: nhx workflow ;
fna: nucleotide_fasta workflow ;
}
type ready_dataset = {
input_tree: nhx workflow ;
tree_dataset : [`tree_dataset] directory workflow ;
fna: nucleotide_fasta workflow ;
faa: aminoacid_fasta workflow ;
}
type ('a,'b) w_dataset =
| Raw_dataset of 'a
| Ready_dataset of 'b
type dataset = {
model_prefix: string ;
tree_prefix : string ;
ready_dataset : ready_dataset
}
type det_meth =
| Pcoc
| Pcoc_gamma
......
......@@ -6,55 +6,6 @@ open File_formats
open Defs
open Convergence_detection
let ready_dataset_of_raw_dataset raw_dataset =
let input_tree = raw_dataset.input_tree in
let fna = raw_dataset.fna in
let tree_dataset = Tree_dataset.prepare input_tree in
let faa = Bppsuite.fna2faa ~fna in
{ input_tree; tree_dataset ; fna; faa}
let raw_dataset_of_ready_dataset ready_dataset =
let input_tree = ready_dataset.input_tree in
let fna = ready_dataset.fna in
{ input_tree; fna}
let repo_of_ready_dataset (rd : ready_dataset) =
Repo.[
item ["input_tree.nhx"] rd.input_tree ;
item ["tree.H0.node_ids" ] (Tree_dataset.nodes rd.tree_dataset H0) ;
item ["tree.Ha.node_ids" ] (Tree_dataset.nodes rd.tree_dataset Ha) ;
item ["tree.only_convergent_tags.nhx" ] (Tree_dataset.tree rd.tree_dataset `Detection) ;
item ["tree.only_node_ids.nhx" ] (Tree_dataset.tree rd.tree_dataset `Simulation) ;
item ["tree.diffsel" ] (Tree_dataset.diffsel_tree rd.tree_dataset) ;
item ["simulated_sequences.fna"] rd.fna ;
item ["simulated_sequences.faa"] rd.faa ;
]
|> Repo.shift "ready_dataset"
let repo_of_raw_dataset (raw_dataset:raw_dataset) =
Repo.[
item ["input_tree.nhx"] raw_dataset.input_tree ;
item ["simulated_sequences.fna"] raw_dataset.fna ;
]
|> Repo.shift "raw_dataset"
let repo_of_dataset_l ~preview dataset_l =
List.map dataset_l ~f:(fun (dataset : Defs.dataset) ->
let model_prefix = dataset.model_prefix in
let tree_prefix = dataset.tree_prefix in
let w_dataset =
if preview then
Ready_dataset dataset.ready_dataset
else Raw_dataset (raw_dataset_of_ready_dataset dataset.ready_dataset) in
let repo_d = match w_dataset with
| Ready_dataset d -> repo_of_ready_dataset d
| Raw_dataset d -> repo_of_raw_dataset d
in
Repo.shift tree_prefix (Repo.shift model_prefix repo_d)
)
|> List.concat
let derive_from_model ~model ~tree ~tree_dataset ~tree_prefix ~profile_f ~preview =
let model_prefix = Convergence_hypothesis.string_of_model model in
let nb_sites = if preview then 20 else 1000 in
......@@ -70,8 +21,8 @@ let derive_from_model ~model ~tree ~tree_dataset ~tree_prefix ~profile_f ~previe
let fna = Bppsuite.bppseqgen_multi_profiles ~descr ~nb_sites ~tree ~config:config_p ~profile_f in
let faa = Bppsuite.fna2faa ~fna in
let ready_dataset = { input_tree = tree ; tree_dataset ; fna; faa} in
{ model_prefix; tree_prefix; ready_dataset }
let ready_dataset = { Ready_dataset.input_tree = tree ; tree_dataset ; fna; faa} in
{ Dataset.model_prefix; tree_prefix; dataset = ready_dataset }
let derive_from_tree ~tree_dir ~tree ~profile_f ~preview =
let tree_prefix = Filename.chop_extension tree in
......@@ -126,12 +77,12 @@ let repo_of_dataset_results_l ~dataset_results_l =
)
|> List.concat
let derive_from_det_meth ~det_meth ~dataset ~preview =
let faa = dataset.ready_dataset.faa in
let fna = dataset.ready_dataset.fna in
let derive_from_det_meth ~det_meth ~(dataset : Dataset.t) ~preview =
let faa = dataset.dataset.faa in
let fna = dataset.dataset.fna in
let phy_n = Bppsuite.fa2phy ~fna in
let pcoc_tree = Tree_dataset.tree dataset.ready_dataset.tree_dataset `Detection in
let diffsel_tree = Tree_dataset.diffsel_tree dataset.ready_dataset.tree_dataset in
let pcoc_tree = Tree_dataset.tree dataset.dataset.tree_dataset `Detection in
let diffsel_tree = Tree_dataset.diffsel_tree dataset.dataset.tree_dataset in
let w_every = if preview then 1 else 10 in
let n_cycles = if preview then 100 else 1000 in
let det_result = match det_meth with
......@@ -151,8 +102,8 @@ let derive_from_dataset ~dataset ~preview =
) in
let merged_results = merge_results ~res_by_tools in
let tsv = merged_results in
let faa = dataset.ready_dataset.faa in
let tree = Tree_dataset.tree dataset.ready_dataset.tree_dataset `Detection in
let faa = dataset.dataset.faa in
let tree = Tree_dataset.tree dataset.dataset.tree_dataset `Detection in
let plot_merged_results = plot_merge_results ~res_by_tools ~tsv ~faa ~tree in
let model_prefix = dataset.model_prefix in
let tree_prefix = dataset.tree_prefix in
......@@ -172,7 +123,7 @@ let main ~outdir ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn ~preview () =
let dataset_l = derive_sim ~tree_dir ~trees ~profile_fn ~preview in
let dataset_results_l = derive_det ~dataset_l ~profile_fn ~preview in
let repo = [
repo_of_dataset_l dataset_l ~preview ;
Dataset.repo dataset_l ~preview ;
repo_of_dataset_results_l ~dataset_results_l;
]
|> List.concat
......
open Bistro.Std
open File_formats
open Bistro_utils
type t = {
input_tree: nhx workflow ;
fna: nucleotide_fasta workflow ;
}
let repo rd =
Repo.[
item ["input_tree.nhx"] rd.input_tree ;
item ["simulated_sequences.fna"] rd.fna ;
]
|> Repo.shift "raw_dataset"
open Bistro.Std
open Bistro_utils
open File_formats
type t = {
input_tree: nhx workflow ;
tree_dataset : [`tree_dataset] directory workflow ;
fna: nucleotide_fasta workflow ;
faa: aminoacid_fasta workflow ;
}
let of_raw (raw_dataset : Raw_dataset.t) =
let input_tree = raw_dataset.input_tree in
let fna = raw_dataset.fna in
let tree_dataset = Tree_dataset.prepare input_tree in
let faa = Bppsuite.fna2faa ~fna in
{ input_tree; tree_dataset ; fna; faa}
let repo rd =
Repo.[
item ["input_tree.nhx"] rd.input_tree ;
item ["tree.H0.node_ids" ] (Tree_dataset.nodes rd.tree_dataset H0) ;
item ["tree.Ha.node_ids" ] (Tree_dataset.nodes rd.tree_dataset Ha) ;
item ["tree.only_convergent_tags.nhx" ] (Tree_dataset.tree rd.tree_dataset `Detection) ;
item ["tree.only_node_ids.nhx" ] (Tree_dataset.tree rd.tree_dataset `Simulation) ;
item ["tree.diffsel" ] (Tree_dataset.diffsel_tree rd.tree_dataset) ;
item ["simulated_sequences.fna"] rd.fna ;
item ["simulated_sequences.faa"] rd.faa ;
]
|> Repo.shift "ready_dataset"
let to_raw { input_tree ; fna } =
{ Raw_dataset.input_tree ; fna }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment