Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit 57e06a9a authored by Carine Rey's avatar Carine Rey
Browse files

update post analyses

parent a4fc5571
......@@ -257,27 +257,36 @@ let simulation_main ~outdir ?(ns = 0) ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn
let repo = Dataset.repo dataset_l ~preview in
Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo
let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~preview ~fast_mode ~no_Ne ~no_HaPC ~tree_dir ~profile_fn ~use_concat () =
let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~preview ~fast_mode ~no_Ne ~no_HaPC ~tree_dir ~profile_fn ~use_concat ~only_simu () =
let trees = Array.to_list @@ Sys.readdir tree_dir in
let repo = List.map trees ~f:(fun tree ->
let simu_dataset_l = derive_sim ~tree_dir ~trees ~profile_fn ~preview ~use_concat ~ns ~no_Ne ~no_HaPC in
let post_analyses_simu = Post_analyses.post_analyses_simu_of_simu_dataset_l ~simu_dataset_l in
let repo_of_post_analyses_simu = Post_analyses.repo_of_post_analyses_simu ~post_analyses_simu in
let repo_per_tree = List.map trees ~f:(fun tree ->
let trees = [tree] in
let tree_prefix = Filename.chop_extension tree in
let indir_dataset_l = if indir = "" then [] else parse_input_data indir in
let dataset_l =
derive_sim ~tree_dir ~trees ~profile_fn ~preview ~use_concat ~ns ~no_Ne ~no_HaPC
@ indir_dataset_l in
let dataset_results_l = derive_det ~dataset_l ~preview ~fast_mode in
let post_analyses = Post_analyses.post_analyses_of_dataset_results_l ~dataset_results_l in
let dataset_results_l =
if only_simu then
[]
else
derive_det ~dataset_l ~preview ~fast_mode
in
let post_analyses_res = Post_analyses.post_analyses_res_of_dataset_results_l ~dataset_results_l in
let repo_per_tree = [
Dataset.repo dataset_l ~preview ;
repo_of_dataset_results_l ~dataset_results_l ;
Repo.shift tree_prefix (Post_analyses.repo_of_post_analyses ~prefix:tree_prefix ~post_analyses);
Repo.shift tree_prefix (Post_analyses.repo_of_post_analyses_res ~prefix:tree_prefix ~post_analyses_res);
] |> List.concat
in
repo_per_tree
)
|> List.concat
|> List.concat
in
let repo = repo_of_post_analyses_simu @ repo_per_tree in
Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo
let simulation_command =
......@@ -347,6 +356,8 @@ let validation_command =
flag "--no-ne" no_arg ~doc:" mode without hypothesis including different Ne"
and no_HaPC =
flag "--no-hapc" no_arg ~doc:" mode without ~HaPC hypothesis"
and only_simu =
flag "--only-simu" no_arg ~doc:" mode only simulation"
and use_concat =
flag "--use-concat" no_arg ~doc:" Use concatenation H0+Ha_pcoc"
and ns =
......@@ -360,5 +371,5 @@ let validation_command =
and profile_fn =
flag "--profile-fn" (required string) ~doc:"PATH Path to profile file"
in
validation_main ~outdir ?indir ?ns ?np ?mem ~preview ~fast_mode ~no_Ne ~no_HaPC ~tree_dir ~profile_fn ~use_concat
validation_main ~outdir ?indir ?ns ?np ?mem ~preview ~fast_mode ~no_Ne ~no_HaPC ~tree_dir ~profile_fn ~use_concat ~only_simu
]
......@@ -20,8 +20,10 @@ type simu_infos = {
tree_prefix: string ;
}
type post_analyses = {
type post_analyses_res = {
t_choices : t_choices option;
}
type post_analyses_simu = {
simu_infos_l : simu_infos list;
simu_infos_plot : text_file workflow ;
}
......@@ -63,7 +65,7 @@ let group_simu_infos ~simu_infos_l : simu_infos directory workflow =
let env = docker_image ~account:"carinerey" ~name:"r_basics" ~tag:"07162018" () in
let cmd_cp_l = List.map simu_infos_l ~f:(fun s ->
match s.simu_infos with
| Some w -> [cmd "cp" [dep w ; tmp // (s.tree_prefix ^"."^ s.model_prefix ^ ".tsv")]]
| Some w -> [cmd "cp" [dep w ; tmp // (s.tree_prefix ^"@"^ s.model_prefix ^ ".tsv")]]
| None -> []
) |> List.concat
in
......@@ -98,9 +100,9 @@ let get_t_choices ~(dataset_results_l: dataset_res list) : t_choices option =
Some {t_choices_max; t_choices_complete ; t_choices_plot}
| _ -> None
let get_simu_infos ~dataset_results =
let model_prefix = dataset_results.dataset.model_prefix in
let ready_dataset = dataset_results.dataset.dataset in
let get_simu_infos ~(dataset:Dataset.t) =
let model_prefix = dataset.model_prefix in
let ready_dataset = dataset.dataset in
let faa = ready_dataset.faa in
let tree_sc = Tree_dataset.tree ready_dataset.tree_dataset `Detection in
let fna_infos = ready_dataset.fna_infos in
......@@ -109,39 +111,44 @@ let get_simu_infos ~dataset_results =
| None -> None (*make_simu_infos ~faa ~tree_sc*)
let post_analyses_of_dataset_results_l ~dataset_results_l =
let post_analyses_res_of_dataset_results_l ~dataset_results_l =
let t_choices = get_t_choices ~dataset_results_l in
let simu_infos_l = List.map dataset_results_l ~f:(fun dataset_results ->
{simu_infos = (get_simu_infos ~dataset_results);
tree_prefix =dataset_results.tree_prefix ;
model_prefix = dataset_results.model_prefix
{t_choices}
let post_analyses_simu_of_simu_dataset_l ~simu_dataset_l =
let simu_infos_l = List.map simu_dataset_l ~f:(fun dataset ->
{simu_infos = (get_simu_infos ~dataset);
tree_prefix = dataset.tree_prefix ;
model_prefix = dataset.model_prefix
}
) in
let simu_infos_plot = group_simu_infos ~simu_infos_l / selector ["out.pdf"] in
{t_choices ; simu_infos_l; simu_infos_plot}
{simu_infos_l; simu_infos_plot}
let repo_of_post_analyses ~prefix ~post_analyses =
let repo_of_post_analyses_simu ~post_analyses_simu =
[
Repo.[
item [prefix ^ ".pdf"] post_analyses.simu_infos_plot
item ["hypothesis_validation.pdf"] post_analyses_simu.simu_infos_plot
] |> Repo.shift "simu_infos"
;
(match post_analyses.t_choices with
| None -> []
| Some w ->
Repo.[
item [prefix ^ ".t_choices.max_mcc_per_meth.tsv"] w.t_choices_max ;
item [prefix ^ ".t_choices.complete.tsv"] w.t_choices_complete ;
item [prefix ^ ".t_choices.pdf"] w.t_choices_plot ;
] |> Repo.shift "t_choices"
);
(List.map post_analyses.simu_infos_l ~f:(fun simu_infos ->
(List.map post_analyses_simu.simu_infos_l ~f:(fun simu_infos ->
match simu_infos.simu_infos with
| None -> []
| Some w ->
Repo.[
item [prefix ^ "." ^ simu_infos.model_prefix ^ ".tsv"] w
item [simu_infos.tree_prefix ^ "@" ^ simu_infos.model_prefix ^ ".tsv"] w
] |> Repo.shift "simu_infos"
) |> List.concat
);
] |> List.concat
let repo_of_post_analyses_res ~prefix ~post_analyses_res =
match post_analyses_res.t_choices with
| None -> []
| Some w ->
Repo.[
item [prefix ^ ".t_choices.max_mcc_per_meth.tsv"] w.t_choices_max ;
item [prefix ^ ".t_choices.complete.tsv"] w.t_choices_complete ;
item [prefix ^ ".t_choices.pdf"] w.t_choices_plot ;
] |> Repo.shift "t_choices"
......@@ -177,11 +177,11 @@ OnlyNonConv_ConvAA = [0] * n_sites #B = nb AA only in (NonConvLeaves AND ConvLea
for i in range(n_sites):
logger.info(i)
ConvAA = set([aa for aa, v in Conv_AA_per_site_l[i].items() if v > 0])
logger.info( Conv_AA_per_site_l[i])
logger.info(ConvAA)
logger.debug( Conv_AA_per_site_l[i])
logger.debug(ConvAA)
NonConvAA = set([aa for aa, v in NonConv_AA_per_site_l[i].items() if v > 0])
logger.info(NonConv_AA_per_site_l[i])
logger.info(NonConvAA)
logger.debug(NonConv_AA_per_site_l[i])
logger.debug(NonConvAA)
OnlyConvAA[i] = len(ConvAA - NonConvAA)
OnlyNonConvAA[i] = len(NonConvAA - ConvAA)
......@@ -203,7 +203,11 @@ if AliInfoFile:
except Exception as exc:
logger.error(str(exc))
sys.exit(1)
logger.info(profil_df.shape)
logger.info(len(Sites))
logger.info(profil_df.shape[0]-n_sites)
profil_df = profil_df[profil_df.shape[0]-n_sites:]
logger.info(profil_df.shape)
profil_df["Sites"] = Sites
......
......@@ -6,7 +6,7 @@ library("ggplot2")
library("cowplot")
option_list = list(
make_option(c("--input_dir"), type="character", default=NULL,
make_option(c("-i","--input_dir"), type="character", default=NULL,
help="Input dir", metavar="character"),
make_option(c("-o","--out"), type="character", default="out",
help="output prefix [default= %default]", metavar="character")
......@@ -27,7 +27,7 @@ input_dir = opt$input_dir
## program...
files = paste0(list.files(input_dir))
files_split = strsplit(files, ".", fixed = T)
files_split = strsplit(files, "@", fixed = T)
files_df = as.data.frame(do.call(rbind, files_split))
files_df_ok = data.frame(files= paste0(input_dir,"/",files), tree = files_df$V1, hyp = files_df$V2)
......@@ -35,7 +35,7 @@ files_df_ok = data.frame(files= paste0(input_dir,"/",files), tree = files_df$V1,
read_dir = function(x) {
file = x["files"]
tree = x["tree"]
hyp = x["hyp"]
hyp = gsub(".tsv","",x["hyp"])
df = read.csv(file, sep = "\t", header = T)
df$tree = tree
df$hyp = hyp
......@@ -57,7 +57,7 @@ plot = plot + facet_grid(hyp ~ tree)
output_pdf = paste0(opt$out,".pdf")
save_plot(output_pdf,
plot,
ncol = 0.4 * length(unique(df$tree)),
ncol = 0.8 * length(unique(df$tree)),
nrow = 0.35 * length(unique(df$hyp)),
base_aspect_ratio = 2,
limitsize = FALSE
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment