Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit fb684a8d authored by Carine Rey's avatar Carine Rey
Browse files

add a post analyse: plot of the simu hyp validation

parent 9e609b48
......@@ -17,11 +17,13 @@ type t_choices = {
type simu_infos = {
simu_infos: text_file workflow option ;
model_prefix: string ;
tree_prefix: string ;
}
type post_analyses = {
t_choices : t_choices option;
simu_infos_l : simu_infos list;
simu_infos_plot : text_file workflow ;
}
......@@ -57,6 +59,31 @@ let make_simu_infos ?(descr="") ?(fna_infos) ~faa ~tree_sc : text_file workflow
];
]
let group_simu_infos ~simu_infos_l : simu_infos directory workflow =
let env = docker_image ~account:"carinerey" ~name:"r_basics" ~tag:"07162018" () in
let cmd_cp_l = List.map simu_infos_l ~f:(fun s ->
match s.simu_infos with
| Some w -> [cmd "cp" [dep w ; tmp // (s.tree_prefix ^"."^ s.model_prefix ^ ".tsv")]]
| None -> []
) |> List.concat
in
let out = dest // "out" in
workflow ~descr:"post_analyses.plot_simu_infos" [
docker env (
and_list ([
[mkdir_p dest];
[mkdir_p tmp ];
cmd_cp_l;
[cmd "Rscript" [
file_dump (string Scripts.plot_hyp_simu_validation) ;
opt "--input_dir" ident tmp;
opt "--out " ident out;
];]
]
|> List.concat)
)
]
let get_t_choices ~(dataset_results_l: dataset_res list) : t_choices option =
let h0_res = List.find dataset_results_l (is_hyp ~hyp: "H0") in
let ha_res = List.find dataset_results_l (is_hyp ~hyp: "HaPCOC") in
......@@ -85,11 +112,20 @@ let get_simu_infos ~dataset_results =
let post_analyses_of_dataset_results_l ~dataset_results_l =
let t_choices = get_t_choices ~dataset_results_l in
let simu_infos_l = List.map dataset_results_l ~f:(fun dataset_results ->
{simu_infos = (get_simu_infos ~dataset_results); model_prefix = dataset_results.model_prefix} )in
{t_choices ; simu_infos_l}
{simu_infos = (get_simu_infos ~dataset_results);
tree_prefix =dataset_results.tree_prefix ;
model_prefix = dataset_results.model_prefix
}
) in
let simu_infos_plot = group_simu_infos ~simu_infos_l / selector ["out.pdf"] in
{t_choices ; simu_infos_l; simu_infos_plot}
let repo_of_post_analyses ~prefix ~post_analyses =
[
Repo.[
item [prefix ^ ".pdf"] post_analyses.simu_infos_plot
] |> Repo.shift "simu_infos"
;
(match post_analyses.t_choices with
| None -> []
| Some w ->
......@@ -99,7 +135,7 @@ let repo_of_post_analyses ~prefix ~post_analyses =
item [prefix ^ ".t_choices.pdf"] w.t_choices_plot ;
] |> Repo.shift "t_choices"
);
List.map post_analyses.simu_infos_l ~f:(fun simu_infos ->
(List.map post_analyses.simu_infos_l ~f:(fun simu_infos ->
match simu_infos.simu_infos with
| None -> []
| Some w ->
......@@ -107,4 +143,5 @@ let repo_of_post_analyses ~prefix ~post_analyses =
item [prefix ^ "." ^ simu_infos.model_prefix ^ ".tsv"] w
] |> Repo.shift "simu_infos"
) |> List.concat
);
] |> List.concat
#!/usr/bin/env Rscript
library("optparse")
library("reshape2")
library("ggplot2")
library("cowplot")
option_list = list(
make_option(c("--input_dir"), type="character", default=NULL,
help="Input dir", metavar="character"),
make_option(c("-o","--out"), type="character", default="out",
help="output prefix [default= %default]", metavar="character")
);
opt_parser = OptionParser(option_list=option_list);
opt = parse_args(opt_parser);
if (is.null(opt$input_dir)){
print_help(opt_parser)
stop("At least one argument must be supplied (input_dir)", call.=FALSE)
}
input_dir = opt$input_dir
## fun...
## program...
files = paste0(list.files(input_dir))
files_split = strsplit(files, ".", fixed = T)
files_df = as.data.frame(do.call(rbind, files_split))
files_df_ok = data.frame(files= paste0(input_dir,"/",files), tree = files_df$V1, hyp = files_df$V2)
read_dir = function(x) {
file = x["files"]
tree = x["tree"]
hyp = x["hyp"]
df = read.csv(file, sep = "\t", header = T)
df$tree = tree
df$hyp = hyp
return(df)
}
df = do.call(rbind, apply(files_df_ok, 1, read_dir))
df$CommonRate = df$NbCommonAA / (df$NbCommonAA+df$NbOnlyConvAA+df$NbOnlyNonConvAA)
alpha = 0.7
x_labs = "# of common AA between Conv and not Conv Leaves / # of AA"
y_labs = "# of sites"
plot = ggplot(df, aes(x=CommonRate, fill=hyp)) + theme_bw() + labs(x=x_labs, y=y_labs)
plot = plot + geom_histogram(binwidth = 0.05)
plot = plot + facet_grid(hyp ~ tree)
output_pdf = paste0(opt$out,".pdf")
save_plot(output_pdf,
plot,
ncol = 0.4 * length(unique(df$tree)),
nrow = 0.35 * length(unique(df$hyp)),
base_aspect_ratio = 2,
limitsize = FALSE
)
output_tsv = paste0(opt$out,".tsv")
write.table(df, file=output_tsv, row.names=FALSE, quote=F, sep = "\t")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment