Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit 35a594d0 authored by Carine Rey's avatar Carine Rey Committed by Philippe Veber
Browse files

change realdata inputdir tree

parent cb149119
......@@ -39,6 +39,16 @@ test:
mv dag.dot dagtest_val.dot && \
dot -Tsvg dagtest_val.dot -o dagtest_val.svg
# -----------------------------------------------------------------------
# Test
# -----------------------------------------------------------------------
.PHONY: realdata_test
realdata_test:
cd example && \
reviewphiltrans realdata --outdir outdir_realdata_test --indir real_data --np 4 --seed 4256073781403810077
# -----------------------------------------------------------------------
# big experiments
# -----------------------------------------------------------------------
......@@ -61,6 +71,7 @@ clean:
clean-test:
rm -rf example/_bistro
rm -rf example/outdir_test
rm -rf example/outdir_realdata_test
rm -rf example/report.log
rm -rf example/dot.dag
......
This diff is collapsed.
......@@ -9,22 +9,35 @@ type t = {
seed : int ;
}
let repo dataset_l =
List.map dataset_l ~f:(fun dataset ->
let model_prefix = dataset.model_prefix in
let tree_prefix = dataset.tree_prefix in
let repo_ready_data = Ready_dataset.repo dataset.dataset in
let repo_raw_data = Raw_dataset.repo ~prefix:model_prefix (Ready_dataset.to_raw dataset.dataset) in
List.concat [
Repo.shift "minimal" (Repo.shift (tree_prefix ^ "_" ^ model_prefix) repo_raw_data);
Repo.shift "debug" repo_ready_data;
]
|> Repo.shift "dataset"
|> Repo.shift model_prefix
|> Repo.shift "Results_per_hypothesis"
if dataset.is_real then
let repo_realdata = Raw_dataset.repo_realdata ~ali_prefix:model_prefix ~tree_prefix (Ready_dataset.to_raw dataset.dataset) in
repo_realdata
|> Repo.shift "Dataset"
|> Repo.shift tree_prefix
else
let repo_ready_data = Ready_dataset.repo dataset.dataset in
let repo_raw_data = Raw_dataset.repo ~prefix:model_prefix (Ready_dataset.to_raw dataset.dataset) in
List.concat [
Repo.shift "minimal" (Repo.shift (tree_prefix ^ "_" ^ model_prefix) repo_raw_data);
Repo.shift "debug" repo_ready_data;
]
|> Repo.shift "dataset"
|> Repo.shift model_prefix
|> Repo.shift "Results_per_hypothesis"
)
|> List.concat
let add_indels_to_dataset d ~seed =
let p = 0.33 in
let model_prefix = sprintf "%s_0.33_i" d.model_prefix in
......
......@@ -6,8 +6,13 @@ open Convergence_hypothesis
open Profile
let parse_input_data ~seed indir =
let error_message = {|
I need a file "tree.nhx" containing the annotated tree and
a directory "Alignments" containing fasta alignments (in nt)
with the format "gene1.fna", "gene2.fna",... |} in
let datasets = Array.to_list @@ Sys.readdir indir in
List.map datasets ~f:(fun dataset_prefix ->
printf "Real dataset:\n\tTree: %s\n" dataset_prefix;
let files = Array.to_list @@ Sys.readdir (Filename.concat indir dataset_prefix ) in
if List.length files = 2 then
let h_file = List.hd files in
......@@ -22,25 +27,34 @@ let parse_input_data ~seed indir =
| None -> ""
in
let t_file_ext = Filename.split_extension t_file in
let fna, input_tree = match (h_file_ext, t_file_ext) with
| ( _ , Some "fna") , ( _ , Some "nhx") -> h_file, t_file
| ( _ , Some "nhx"), ( _ , Some "fna") -> t_file, h_file
| _ -> failwith ({|Syntax error: extension errors in |} ^ (Filename.concat indir dataset_prefix ) ^ " nhx: " ^ (h_file) ^ " fna: " ^ (t_file))
let fna_dir, input_tree = match (h_file_ext, t_file_ext, h_file, t_file) with
| _ , ( _ , Some "nhx"), "Alignments" , _ -> h_file, t_file
| ( _ , Some "nhx"), _, _, "Alignments" -> t_file, h_file
| _, _, _, "Alignments" -> failwith ({|Syntax error: Naming errors in |})
| _,_,_,_ -> failwith ({|Syntax error: Naming errors in |} ^ (Filename.concat indir dataset_prefix ) ^ "
1st file: " ^ (h_file) ^ "
2nd file: " ^ (t_file) ^ error_message)
in
let tree_prefix = Filename.chop_extension input_tree in
let input_tree = Workflow.input (Filename.concat indir (Filename.concat dataset_prefix input_tree)) in
let fna = Workflow.input (Filename.concat indir (Filename.concat dataset_prefix fna)) in
let fna_infos = None in
let raw_dataset = Raw_dataset.{input_tree; fna; fna_infos} in
let dataset = {Dataset.model_prefix = tree_prefix;
is_real = true;
tree_prefix = dataset_prefix;
dataset = Ready_dataset.of_raw ~descr:("real_data." ^ tree_prefix) raw_dataset;
seed;
} in
[dataset]
let fna_l = Array.to_list @@ Sys.readdir (Filename.concat indir (dataset_prefix ^ "/" ^ fna_dir)) in
printf "%i files detected in %s\n" (List.length fna_l) fna_dir;
List.map fna_l ~f:(function fna ->
let fna_prefix = Filename.chop_extension fna in
printf "%s: %s\n" fna_prefix (Filename.concat indir (Filename.concat (dataset_prefix ^ "/" ^ fna_dir) fna));
let fna = Workflow.input (Filename.concat indir (Filename.concat (dataset_prefix ^ "/" ^ fna_dir) fna)) in
let fna_infos = None in
let raw_dataset = Raw_dataset.{input_tree; fna; fna_infos} in
let dataset = {Dataset.model_prefix = fna_prefix;
is_real = true;
tree_prefix = tree_prefix;
dataset = Ready_dataset.of_raw ~descr:("real_data." ^ tree_prefix) raw_dataset;
seed;
} in
dataset
)
else
failwith ({|More than 2 files in |} ^ (Filename.concat indir dataset_prefix ))
failwith ({|More than 2 files in |} ^ (Filename.concat indir dataset_prefix) ^ error_message)
)
|> List.concat
......@@ -131,7 +145,6 @@ let repo_of_detection_result res =
| _ -> []
] |> List.concat
|> Repo.shift det_meth_prefix
|> Repo.shift "Detection_tools"
let repo_of_dataset_results_l ~dataset_results_l =
List.map dataset_results_l ~f:(fun dataset_results ->
......@@ -145,13 +158,35 @@ let repo_of_dataset_results_l ~dataset_results_l =
let repo =
merged_results_item ::
plot_merged_results_item ::
(List.map det_results_l ~f:repo_of_detection_result |> List.concat)
(List.map det_results_l ~f:repo_of_detection_result |> List.concat |> Repo.shift "Detection_tools" )
in
repo
|> Repo.shift dataset_results.model_prefix
)
|> List.concat
let repo_of_real_dataset_results_l ~dataset_results_l =
List.map dataset_results_l ~f:(fun dataset_results ->
let det_results_l = dataset_results.res_by_tools in
let merged_results = dataset_results.merged_results in
let plot_merge_results = dataset_results.plot_merged_results in
let model_prefix = dataset_results.model_prefix in
let tree_prefix = dataset_results.tree_prefix in
let merged_results_item = Repo.item [tree_prefix ^"."^model_prefix^".merged_results.tsv"] merged_results in
let plot_merged_results_item = Repo.item [tree_prefix ^"."^model_prefix^".plot_merged_results.svg"] plot_merge_results in
List.concat [
[merged_results_item ;
plot_merged_results_item ;
]|> Repo.shift "Merged_Results" ;
List.map det_results_l ~f:repo_of_detection_result
|> List.concat
|> Repo.shift model_prefix
|> Repo.shift "Results_per_Detection_tool" ;
]
|> Repo.shift dataset_results.tree_prefix
)
|> List.concat
let derive_from_det_meth ~det_meth ~(dataset : Dataset.t) ~preview =
let model_prefix = dataset.model_prefix in
let tree_prefix = dataset.tree_prefix in
......@@ -302,7 +337,7 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe
in
let repo_real_trees = [
Dataset.repo dataset_l ;
repo_of_dataset_results_l ~dataset_results_l ;
repo_of_real_dataset_results_l ~dataset_results_l ;
] |> List.concat
in
let repo = (Repo.shift "Simulated_datasets" sim_repo_l#repo) @ (Repo.shift "Real_datasets" repo_real_trees) in
......@@ -358,7 +393,7 @@ let realdata_main ~outdir ~indir ~preview ~use_diffsel ~use_c60 ?(np = 2) ?(mem
let dataset_results_l = derive_det ~dataset_l ~preview ~use_diffsel ~use_c60 in
let repo_real_trees = [
Dataset.repo dataset_l ;
repo_of_dataset_results_l ~dataset_results_l ;
repo_of_real_dataset_results_l ~dataset_results_l ;
] |> List.concat (* list of repos *)
in
let repo = Repo.shift "Real_datasets" repo_real_trees in
......
......@@ -13,3 +13,13 @@ let repo ~prefix rd =
item [prefix ^ ".nhx"] rd.input_tree ;
item [prefix ^ ".fna"] rd.fna ;
]
let repo_realdata ~tree_prefix ~ali_prefix rd =
List.concat [
Repo.[
item [tree_prefix ^ ".nhx"] rd.input_tree ;
];
Repo.[
item [ali_prefix ^ ".fna"] rd.fna ;
] |> Repo.shift "Alignments"
]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment