Commit 35a594d0 authored by Carine Rey's avatar Carine Rey Committed by Philippe Veber
Browse files

change realdata inputdir tree

parent cb149119
......@@ -39,6 +39,16 @@ test:
mv dag.dot dagtest_val.dot && \
dot -Tsvg dagtest_val.dot -o dagtest_val.svg
# -----------------------------------------------------------------------
# Test
# -----------------------------------------------------------------------
.PHONY: realdata_test
realdata_test:
cd example && \
reviewphiltrans realdata --outdir outdir_realdata_test --indir real_data --np 4 --seed 4256073781403810077
# -----------------------------------------------------------------------
# big experiments
# -----------------------------------------------------------------------
......@@ -61,6 +71,7 @@ clean:
clean-test:
rm -rf example/_bistro
rm -rf example/outdir_test
rm -rf example/outdir_realdata_test
rm -rf example/report.log
rm -rf example/dot.dag
......
This diff is collapsed.
......@@ -9,10 +9,19 @@ type t = {
seed : int ;
}
let repo dataset_l =
List.map dataset_l ~f:(fun dataset ->
let model_prefix = dataset.model_prefix in
let tree_prefix = dataset.tree_prefix in
if dataset.is_real then
let repo_realdata = Raw_dataset.repo_realdata ~ali_prefix:model_prefix ~tree_prefix (Ready_dataset.to_raw dataset.dataset) in
repo_realdata
|> Repo.shift "Dataset"
|> Repo.shift tree_prefix
else
let repo_ready_data = Ready_dataset.repo dataset.dataset in
let repo_raw_data = Raw_dataset.repo ~prefix:model_prefix (Ready_dataset.to_raw dataset.dataset) in
List.concat [
......@@ -25,6 +34,10 @@ let repo dataset_l =
)
|> List.concat
let add_indels_to_dataset d ~seed =
let p = 0.33 in
let model_prefix = sprintf "%s_0.33_i" d.model_prefix in
......
......@@ -6,8 +6,13 @@ open Convergence_hypothesis
open Profile
let parse_input_data ~seed indir =
let error_message = {|
I need a file "tree.nhx" containing the annotated tree and
a directory "Alignments" containing fasta alignments (in nt)
with the format "gene1.fna", "gene2.fna",... |} in
let datasets = Array.to_list @@ Sys.readdir indir in
List.map datasets ~f:(fun dataset_prefix ->
printf "Real dataset:\n\tTree: %s\n" dataset_prefix;
let files = Array.to_list @@ Sys.readdir (Filename.concat indir dataset_prefix ) in
if List.length files = 2 then
let h_file = List.hd files in
......@@ -22,25 +27,34 @@ let parse_input_data ~seed indir =
| None -> ""
in
let t_file_ext = Filename.split_extension t_file in
let fna, input_tree = match (h_file_ext, t_file_ext) with
| ( _ , Some "fna") , ( _ , Some "nhx") -> h_file, t_file
| ( _ , Some "nhx"), ( _ , Some "fna") -> t_file, h_file
| _ -> failwith ({|Syntax error: extension errors in |} ^ (Filename.concat indir dataset_prefix ) ^ " nhx: " ^ (h_file) ^ " fna: " ^ (t_file))
let fna_dir, input_tree = match (h_file_ext, t_file_ext, h_file, t_file) with
| _ , ( _ , Some "nhx"), "Alignments" , _ -> h_file, t_file
| ( _ , Some "nhx"), _, _, "Alignments" -> t_file, h_file
| _, _, _, "Alignments" -> failwith ({|Syntax error: Naming errors in |})
| _,_,_,_ -> failwith ({|Syntax error: Naming errors in |} ^ (Filename.concat indir dataset_prefix ) ^ "
1st file: " ^ (h_file) ^ "
2nd file: " ^ (t_file) ^ error_message)
in
let tree_prefix = Filename.chop_extension input_tree in
let input_tree = Workflow.input (Filename.concat indir (Filename.concat dataset_prefix input_tree)) in
let fna = Workflow.input (Filename.concat indir (Filename.concat dataset_prefix fna)) in
let fna_l = Array.to_list @@ Sys.readdir (Filename.concat indir (dataset_prefix ^ "/" ^ fna_dir)) in
printf "%i files detected in %s\n" (List.length fna_l) fna_dir;
List.map fna_l ~f:(function fna ->
let fna_prefix = Filename.chop_extension fna in
printf "%s: %s\n" fna_prefix (Filename.concat indir (Filename.concat (dataset_prefix ^ "/" ^ fna_dir) fna));
let fna = Workflow.input (Filename.concat indir (Filename.concat (dataset_prefix ^ "/" ^ fna_dir) fna)) in
let fna_infos = None in
let raw_dataset = Raw_dataset.{input_tree; fna; fna_infos} in
let dataset = {Dataset.model_prefix = tree_prefix;
let dataset = {Dataset.model_prefix = fna_prefix;
is_real = true;
tree_prefix = dataset_prefix;
tree_prefix = tree_prefix;
dataset = Ready_dataset.of_raw ~descr:("real_data." ^ tree_prefix) raw_dataset;
seed;
} in
[dataset]
dataset
)
else
failwith ({|More than 2 files in |} ^ (Filename.concat indir dataset_prefix ))
failwith ({|More than 2 files in |} ^ (Filename.concat indir dataset_prefix) ^ error_message)
)
|> List.concat
......@@ -131,7 +145,6 @@ let repo_of_detection_result res =
| _ -> []
] |> List.concat
|> Repo.shift det_meth_prefix
|> Repo.shift "Detection_tools"
let repo_of_dataset_results_l ~dataset_results_l =
List.map dataset_results_l ~f:(fun dataset_results ->
......@@ -145,13 +158,35 @@ let repo_of_dataset_results_l ~dataset_results_l =
let repo =
merged_results_item ::
plot_merged_results_item ::
(List.map det_results_l ~f:repo_of_detection_result |> List.concat)
(List.map det_results_l ~f:repo_of_detection_result |> List.concat |> Repo.shift "Detection_tools" )
in
repo
|> Repo.shift dataset_results.model_prefix
)
|> List.concat
let repo_of_real_dataset_results_l ~dataset_results_l =
List.map dataset_results_l ~f:(fun dataset_results ->
let det_results_l = dataset_results.res_by_tools in
let merged_results = dataset_results.merged_results in
let plot_merge_results = dataset_results.plot_merged_results in
let model_prefix = dataset_results.model_prefix in
let tree_prefix = dataset_results.tree_prefix in
let merged_results_item = Repo.item [tree_prefix ^"."^model_prefix^".merged_results.tsv"] merged_results in
let plot_merged_results_item = Repo.item [tree_prefix ^"."^model_prefix^".plot_merged_results.svg"] plot_merge_results in
List.concat [
[merged_results_item ;
plot_merged_results_item ;
]|> Repo.shift "Merged_Results" ;
List.map det_results_l ~f:repo_of_detection_result
|> List.concat
|> Repo.shift model_prefix
|> Repo.shift "Results_per_Detection_tool" ;
]
|> Repo.shift dataset_results.tree_prefix
)
|> List.concat
let derive_from_det_meth ~det_meth ~(dataset : Dataset.t) ~preview =
let model_prefix = dataset.model_prefix in
let tree_prefix = dataset.tree_prefix in
......@@ -302,7 +337,7 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe
in
let repo_real_trees = [
Dataset.repo dataset_l ;
repo_of_dataset_results_l ~dataset_results_l ;
repo_of_real_dataset_results_l ~dataset_results_l ;
] |> List.concat
in
let repo = (Repo.shift "Simulated_datasets" sim_repo_l#repo) @ (Repo.shift "Real_datasets" repo_real_trees) in
......@@ -358,7 +393,7 @@ let realdata_main ~outdir ~indir ~preview ~use_diffsel ~use_c60 ?(np = 2) ?(mem
let dataset_results_l = derive_det ~dataset_l ~preview ~use_diffsel ~use_c60 in
let repo_real_trees = [
Dataset.repo dataset_l ;
repo_of_dataset_results_l ~dataset_results_l ;
repo_of_real_dataset_results_l ~dataset_results_l ;
] |> List.concat (* list of repos *)
in
let repo = Repo.shift "Real_datasets" repo_real_trees in
......
......@@ -13,3 +13,13 @@ let repo ~prefix rd =
item [prefix ^ ".nhx"] rd.input_tree ;
item [prefix ^ ".fna"] rd.fna ;
]
let repo_realdata ~tree_prefix ~ali_prefix rd =
List.concat [
Repo.[
item [tree_prefix ^ ".nhx"] rd.input_tree ;
];
Repo.[
item [ali_prefix ^ ".fna"] rd.fna ;
] |> Repo.shift "Alignments"
]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment