Commit 73471609 authored by Carine Rey's avatar Carine Rey
Browse files

forgotten script

parent 7d889316
#!/usr/bin/env Rscript
library("optparse")
library("reshape2")
library("ggplot2")
library("cowplot")
library("plyr")
option_list = list(
make_option(c("-i","--input_dir"), type="character", default=NULL,
help="Input dir", metavar="character"),
make_option(c("-I","--input_dir2"), type="character", default=NULL,
help="Input dir2", metavar="character"),
make_option(c("-o","--out"), type="character", default="out",
help="output prefix [default= %default]", metavar="character"),
make_option(c("-p","--profil"), type="character", default="None",
help="Profil name [default= %default]", metavar="character")
);
opt_parser = OptionParser(option_list=option_list);
opt = parse_args(opt_parser);
date = format(Sys.time(), format="%Y-%m-%d %X")
if (is.null(opt$input_dir)){
print_help(opt_parser)
stop("At least one argument must be supplied (input_dir)", call.=FALSE)
}
input_dir = opt$input_dir
input_dir2 = opt$input_dir2
## fun...
## program... max_t_per_tree
files = paste0(list.files(input_dir))
print(files)
files = files[grep("tsv", files)]
files_split = strsplit(files, ".", fixed = T)
files_df = as.data.frame(do.call(rbind, files_split))
print(files_df)
files_df_ok = data.frame(files= paste0(input_dir,"/",files), tree = files_df$V1, bl = files_df$V1, profil = opt$profil)
read_dir = function(x) {
file = x["files"]
tree = x["tree"]
profil = x["profil"]
bl = x["bl"]
df = read.csv(file, sep = "\t", header = T)
df$tree = tree
df$profil = profil
df$bl = bl
return(df)
}
df = do.call(rbind, apply(files_df_ok, 1, read_dir))
alpha = 0.7
x_labs = ""
y_labs = "Threshold"
df_max_mcc_per_method = do.call(rbind, lapply(split(df,df$methode),
function(x) {return(x[which.max(x$threshold),c("methode", "threshold","tree","profil")])}))
df_max_mcc_per_method_2 = df_max_mcc_per_method
df_max_mcc_per_method_2 [ df_max_mcc_per_method_2$tree == unique(df_max_mcc_per_method_2$tree)[1],]
plot = ggplot(df, aes(x=tree, y=threshold, col=bl, shape = profil)) + theme_bw() + labs(x=x_labs, y=y_labs) + ylim(c(0,1.25))
plot = plot + theme(axis.text.x = element_text(angle = 0, hjust = 1))
plot = plot + geom_point()
plot = plot + geom_hline( data = df_max_mcc_per_method, aes(yintercept = threshold), col="red", alpha=alpha, show.legend = NA,linetype="dotted")
plot = plot + geom_label( data = df_max_mcc_per_method_2, nudge_y = 0.15, aes(label = threshold), col="red", alpha=alpha, show.legend = NA)
plot = plot + facet_grid(methode ~ .) + coord_flip()
plot = plot + ggtitle("t max per tree")
output_pdf = paste0(opt$out,".max_t_per_tree.pdf")
save_plot(output_pdf,
plot,
ncol = 1,
nrow = 0.3 * length(unique(df$methode)),
base_aspect_ratio = 1.5,
limitsize = FALSE
)
output_tsv = paste0(opt$out,".max_t_per_tree.tsv")
write.table(df, file=output_tsv, row.names=FALSE, quote=F, sep = "\t")
####################
# Spe en sens
####################
df_t = df
parse_file = function(df_m, df_t) {
df_m_melt = melt(df_m)
df_m_melt$t = mapvalues(df_m_melt$variable, from=df_t$methode, to=df_t$threshold)
df_m_melt$t = as.numeric(as.character(df_m_melt$t))
df_m_melt$is_P = df_m_melt$value > df_m_melt$t
P = tapply(df_m_melt$is_P, df_m_melt$variable , function(x) {sum(x, na.rm=T)})
n_sites = dim(df_m)[1]
df = data.frame(methode = names(P), P = P)
df =merge ( df, df_t, by="methode")
df$P = df$P / n_sites
return(df)
}
## program... sens_spe_plot
files = paste0(list.files(input_dir2))
files = files[grep("tsv", files)]
files_split = strsplit(files, "@", fixed = T)
files_df = as.data.frame(do.call(rbind, files_split))
print(files_df)
files_df_ok = data.frame(files= paste0(input_dir2,"/",files), tree = files_df$V1, hyp = gsub(".tsv","",files_df$V2), bl = files_df$V1, profil = opt$profil)
read_dir = function(x) {
file = x["files"]
tree = x["tree"]
profil = x["profil"]
bl = x["bl"]
hyp = x["hyp"]
df_m = read.csv(file, sep = "\t", header = T, na.strings=c("NA","NaN", " ", ""))
df = parse_file(df_m[,colnames(df_m) %in% df_t$methode], df_t[df_t$tree == tree, c("methode","threshold")])
df$tree = tree
df$profil = profil
df$bl = bl
df$hyp = hyp
return(df)
}
df = do.call(rbind, apply(files_df_ok, 1, read_dir))
rownames(df) <- NULL
hyps_from = unique(df$hyp)
hyps_to = rep("Specificity",length(hyps))
hyps_to[grep("Ha",hyps_from)] = "Sensitivity"
df$variable = mapvalues(df$hyp, from=hyps_from,
to = hyps_to)
df$P[df$variable == "Specificity"] = 1-df$P[df$variable == "Specificity"]
x_labs = ""
y_labs = "Value"
plot = ggplot(df, aes(x=methode, y=P, fill=hyp)) + theme_bw() + labs(x=x_labs, y=y_labs) + ylim(c(0,1.1))
plot = plot + theme(axis.text.x = element_text(angle = 30, hjust = 1))
plot = plot + geom_bar(stat="identity", position=position_dodge(0.9), color= "black", alpha = 0.7)
plot = plot + geom_text( aes(y = P + 0.05, label = round(P,2)), angle = 90, hjust = 0, position=position_dodge(0.9))
plot = plot + geom_text( aes(y = 0.01, label = paste0("t=",threshold)), size = 3, angle = 90, hjust = 0, position=position_dodge(0.9))
plot = plot + facet_grid(variable ~ tree)
plot = plot + ggtitle(paste0("Compa methodes - ",opt$profil, " - (", date , ")" ))
plot = plot + scale_color_manual(values=c('gray','black'), guide=FALSE)
plot = plot + scale_alpha(range=c(0.7,1), guide=FALSE)
output_pdf2 = paste0(opt$out,".sens_spe_auto_t.pdf")
save_plot(output_pdf2,
plot,
ncol = length(unique(df$hyp)) / 3 * length(unique(df$tree)),
nrow = 2,
base_aspect_ratio = 1.5,
limitsize = FALSE
)
output_tsv = paste0(opt$out,".sens_spe_auto_t.tsv")
write.table(df, file=output_tsv, row.names=FALSE, quote=F, sep = "\t")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment