Commit 35a594d0 authored by Carine Rey's avatar Carine Rey Committed by Philippe Veber
Browse files

change realdata inputdir tree

parent cb149119
...@@ -39,6 +39,16 @@ test: ...@@ -39,6 +39,16 @@ test:
mv dag.dot dagtest_val.dot && \ mv dag.dot dagtest_val.dot && \
dot -Tsvg dagtest_val.dot -o dagtest_val.svg dot -Tsvg dagtest_val.dot -o dagtest_val.svg
# -----------------------------------------------------------------------
# Test
# -----------------------------------------------------------------------
.PHONY: realdata_test
realdata_test:
cd example && \
reviewphiltrans realdata --outdir outdir_realdata_test --indir real_data --np 4 --seed 4256073781403810077
# ----------------------------------------------------------------------- # -----------------------------------------------------------------------
# big experiments # big experiments
# ----------------------------------------------------------------------- # -----------------------------------------------------------------------
...@@ -61,6 +71,7 @@ clean: ...@@ -61,6 +71,7 @@ clean:
clean-test: clean-test:
rm -rf example/_bistro rm -rf example/_bistro
rm -rf example/outdir_test rm -rf example/outdir_test
rm -rf example/outdir_realdata_test
rm -rf example/report.log rm -rf example/report.log
rm -rf example/dot.dag rm -rf example/dot.dag
......
This diff is collapsed.
...@@ -9,22 +9,35 @@ type t = { ...@@ -9,22 +9,35 @@ type t = {
seed : int ; seed : int ;
} }
let repo dataset_l = let repo dataset_l =
List.map dataset_l ~f:(fun dataset -> List.map dataset_l ~f:(fun dataset ->
let model_prefix = dataset.model_prefix in let model_prefix = dataset.model_prefix in
let tree_prefix = dataset.tree_prefix in let tree_prefix = dataset.tree_prefix in
let repo_ready_data = Ready_dataset.repo dataset.dataset in if dataset.is_real then
let repo_raw_data = Raw_dataset.repo ~prefix:model_prefix (Ready_dataset.to_raw dataset.dataset) in let repo_realdata = Raw_dataset.repo_realdata ~ali_prefix:model_prefix ~tree_prefix (Ready_dataset.to_raw dataset.dataset) in
List.concat [ repo_realdata
Repo.shift "minimal" (Repo.shift (tree_prefix ^ "_" ^ model_prefix) repo_raw_data); |> Repo.shift "Dataset"
Repo.shift "debug" repo_ready_data; |> Repo.shift tree_prefix
] else
|> Repo.shift "dataset" let repo_ready_data = Ready_dataset.repo dataset.dataset in
|> Repo.shift model_prefix let repo_raw_data = Raw_dataset.repo ~prefix:model_prefix (Ready_dataset.to_raw dataset.dataset) in
|> Repo.shift "Results_per_hypothesis" List.concat [
Repo.shift "minimal" (Repo.shift (tree_prefix ^ "_" ^ model_prefix) repo_raw_data);
Repo.shift "debug" repo_ready_data;
]
|> Repo.shift "dataset"
|> Repo.shift model_prefix
|> Repo.shift "Results_per_hypothesis"
) )
|> List.concat |> List.concat
let add_indels_to_dataset d ~seed = let add_indels_to_dataset d ~seed =
let p = 0.33 in let p = 0.33 in
let model_prefix = sprintf "%s_0.33_i" d.model_prefix in let model_prefix = sprintf "%s_0.33_i" d.model_prefix in
......
...@@ -6,8 +6,13 @@ open Convergence_hypothesis ...@@ -6,8 +6,13 @@ open Convergence_hypothesis
open Profile open Profile
let parse_input_data ~seed indir = let parse_input_data ~seed indir =
let error_message = {|
I need a file "tree.nhx" containing the annotated tree and
a directory "Alignments" containing fasta alignments (in nt)
with the format "gene1.fna", "gene2.fna",... |} in
let datasets = Array.to_list @@ Sys.readdir indir in let datasets = Array.to_list @@ Sys.readdir indir in
List.map datasets ~f:(fun dataset_prefix -> List.map datasets ~f:(fun dataset_prefix ->
printf "Real dataset:\n\tTree: %s\n" dataset_prefix;
let files = Array.to_list @@ Sys.readdir (Filename.concat indir dataset_prefix ) in let files = Array.to_list @@ Sys.readdir (Filename.concat indir dataset_prefix ) in
if List.length files = 2 then if List.length files = 2 then
let h_file = List.hd files in let h_file = List.hd files in
...@@ -22,25 +27,34 @@ let parse_input_data ~seed indir = ...@@ -22,25 +27,34 @@ let parse_input_data ~seed indir =
| None -> "" | None -> ""
in in
let t_file_ext = Filename.split_extension t_file in let t_file_ext = Filename.split_extension t_file in
let fna, input_tree = match (h_file_ext, t_file_ext) with let fna_dir, input_tree = match (h_file_ext, t_file_ext, h_file, t_file) with
| ( _ , Some "fna") , ( _ , Some "nhx") -> h_file, t_file | _ , ( _ , Some "nhx"), "Alignments" , _ -> h_file, t_file
| ( _ , Some "nhx"), ( _ , Some "fna") -> t_file, h_file | ( _ , Some "nhx"), _, _, "Alignments" -> t_file, h_file
| _ -> failwith ({|Syntax error: extension errors in |} ^ (Filename.concat indir dataset_prefix ) ^ " nhx: " ^ (h_file) ^ " fna: " ^ (t_file)) | _, _, _, "Alignments" -> failwith ({|Syntax error: Naming errors in |})
| _,_,_,_ -> failwith ({|Syntax error: Naming errors in |} ^ (Filename.concat indir dataset_prefix ) ^ "
1st file: " ^ (h_file) ^ "
2nd file: " ^ (t_file) ^ error_message)
in in
let tree_prefix = Filename.chop_extension input_tree in let tree_prefix = Filename.chop_extension input_tree in
let input_tree = Workflow.input (Filename.concat indir (Filename.concat dataset_prefix input_tree)) in let input_tree = Workflow.input (Filename.concat indir (Filename.concat dataset_prefix input_tree)) in
let fna = Workflow.input (Filename.concat indir (Filename.concat dataset_prefix fna)) in let fna_l = Array.to_list @@ Sys.readdir (Filename.concat indir (dataset_prefix ^ "/" ^ fna_dir)) in
let fna_infos = None in printf "%i files detected in %s\n" (List.length fna_l) fna_dir;
let raw_dataset = Raw_dataset.{input_tree; fna; fna_infos} in List.map fna_l ~f:(function fna ->
let dataset = {Dataset.model_prefix = tree_prefix; let fna_prefix = Filename.chop_extension fna in
is_real = true; printf "%s: %s\n" fna_prefix (Filename.concat indir (Filename.concat (dataset_prefix ^ "/" ^ fna_dir) fna));
tree_prefix = dataset_prefix; let fna = Workflow.input (Filename.concat indir (Filename.concat (dataset_prefix ^ "/" ^ fna_dir) fna)) in
dataset = Ready_dataset.of_raw ~descr:("real_data." ^ tree_prefix) raw_dataset; let fna_infos = None in
seed; let raw_dataset = Raw_dataset.{input_tree; fna; fna_infos} in
} in let dataset = {Dataset.model_prefix = fna_prefix;
[dataset] is_real = true;
tree_prefix = tree_prefix;
dataset = Ready_dataset.of_raw ~descr:("real_data." ^ tree_prefix) raw_dataset;
seed;
} in
dataset
)
else else
failwith ({|More than 2 files in |} ^ (Filename.concat indir dataset_prefix )) failwith ({|More than 2 files in |} ^ (Filename.concat indir dataset_prefix) ^ error_message)
) )
|> List.concat |> List.concat
...@@ -131,7 +145,6 @@ let repo_of_detection_result res = ...@@ -131,7 +145,6 @@ let repo_of_detection_result res =
| _ -> [] | _ -> []
] |> List.concat ] |> List.concat
|> Repo.shift det_meth_prefix |> Repo.shift det_meth_prefix
|> Repo.shift "Detection_tools"
let repo_of_dataset_results_l ~dataset_results_l = let repo_of_dataset_results_l ~dataset_results_l =
List.map dataset_results_l ~f:(fun dataset_results -> List.map dataset_results_l ~f:(fun dataset_results ->
...@@ -145,13 +158,35 @@ let repo_of_dataset_results_l ~dataset_results_l = ...@@ -145,13 +158,35 @@ let repo_of_dataset_results_l ~dataset_results_l =
let repo = let repo =
merged_results_item :: merged_results_item ::
plot_merged_results_item :: plot_merged_results_item ::
(List.map det_results_l ~f:repo_of_detection_result |> List.concat) (List.map det_results_l ~f:repo_of_detection_result |> List.concat |> Repo.shift "Detection_tools" )
in in
repo repo
|> Repo.shift dataset_results.model_prefix |> Repo.shift dataset_results.model_prefix
) )
|> List.concat |> List.concat
let repo_of_real_dataset_results_l ~dataset_results_l =
List.map dataset_results_l ~f:(fun dataset_results ->
let det_results_l = dataset_results.res_by_tools in
let merged_results = dataset_results.merged_results in
let plot_merge_results = dataset_results.plot_merged_results in
let model_prefix = dataset_results.model_prefix in
let tree_prefix = dataset_results.tree_prefix in
let merged_results_item = Repo.item [tree_prefix ^"."^model_prefix^".merged_results.tsv"] merged_results in
let plot_merged_results_item = Repo.item [tree_prefix ^"."^model_prefix^".plot_merged_results.svg"] plot_merge_results in
List.concat [
[merged_results_item ;
plot_merged_results_item ;
]|> Repo.shift "Merged_Results" ;
List.map det_results_l ~f:repo_of_detection_result
|> List.concat
|> Repo.shift model_prefix
|> Repo.shift "Results_per_Detection_tool" ;
]
|> Repo.shift dataset_results.tree_prefix
)
|> List.concat
let derive_from_det_meth ~det_meth ~(dataset : Dataset.t) ~preview = let derive_from_det_meth ~det_meth ~(dataset : Dataset.t) ~preview =
let model_prefix = dataset.model_prefix in let model_prefix = dataset.model_prefix in
let tree_prefix = dataset.tree_prefix in let tree_prefix = dataset.tree_prefix in
...@@ -302,7 +337,7 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe ...@@ -302,7 +337,7 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe
in in
let repo_real_trees = [ let repo_real_trees = [
Dataset.repo dataset_l ; Dataset.repo dataset_l ;
repo_of_dataset_results_l ~dataset_results_l ; repo_of_real_dataset_results_l ~dataset_results_l ;
] |> List.concat ] |> List.concat
in in
let repo = (Repo.shift "Simulated_datasets" sim_repo_l#repo) @ (Repo.shift "Real_datasets" repo_real_trees) in let repo = (Repo.shift "Simulated_datasets" sim_repo_l#repo) @ (Repo.shift "Real_datasets" repo_real_trees) in
...@@ -358,7 +393,7 @@ let realdata_main ~outdir ~indir ~preview ~use_diffsel ~use_c60 ?(np = 2) ?(mem ...@@ -358,7 +393,7 @@ let realdata_main ~outdir ~indir ~preview ~use_diffsel ~use_c60 ?(np = 2) ?(mem
let dataset_results_l = derive_det ~dataset_l ~preview ~use_diffsel ~use_c60 in let dataset_results_l = derive_det ~dataset_l ~preview ~use_diffsel ~use_c60 in
let repo_real_trees = [ let repo_real_trees = [
Dataset.repo dataset_l ; Dataset.repo dataset_l ;
repo_of_dataset_results_l ~dataset_results_l ; repo_of_real_dataset_results_l ~dataset_results_l ;
] |> List.concat (* list of repos *) ] |> List.concat (* list of repos *)
in in
let repo = Repo.shift "Real_datasets" repo_real_trees in let repo = Repo.shift "Real_datasets" repo_real_trees in
......
...@@ -13,3 +13,13 @@ let repo ~prefix rd = ...@@ -13,3 +13,13 @@ let repo ~prefix rd =
item [prefix ^ ".nhx"] rd.input_tree ; item [prefix ^ ".nhx"] rd.input_tree ;
item [prefix ^ ".fna"] rd.fna ; item [prefix ^ ".fna"] rd.fna ;
] ]
let repo_realdata ~tree_prefix ~ali_prefix rd =
List.concat [
Repo.[
item [tree_prefix ^ ".nhx"] rd.input_tree ;
];
Repo.[
item [ali_prefix ^ ".fna"] rd.fna ;
] |> Repo.shift "Alignments"
]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment