Commit 82fd212c authored by Carine Rey's avatar Carine Rey
Browse files

plot also precistion with ratio 50/50

parent bfb4b028
......@@ -11,17 +11,17 @@ library("cowplot")
date = format(Sys.time(), format="%Y-%m-%d %X")
option_list = list(
make_option(c("--H0") , type="character", default=NULL, help="merged_results H0", metavar="character"),
make_option(c("--H0NeBig") , type="character", default=NULL, help="merged_results H0NeBig", metavar="character"),
make_option(c("--H0NeSmall") , type="character", default=NULL, help="merged_results H0NeSmall", metavar="character"),
make_option(c("--H0NeBigInSmall") , type="character", default=NULL, help="merged_results H0NeBigInSmall", metavar="character"),
make_option(c("--H0NeSmallInBig") , type="character", default=NULL, help="merged_results H0NeSmallInBig", metavar="character"),
make_option(c("--HaPCOC") , type="character", default=NULL, help="merged_results HaPCOC", metavar="character"),
make_option(c("--HaPC") , type="character", default=NULL, help="merged_results HaPC", metavar="character"),
make_option(c("--HaPCNeBig") , type="character", default=NULL, help="merged_results HaPCNeBig", metavar="character"),
make_option(c("--HaPCNeSmall") , type="character", default=NULL, help="merged_results HaPCNeSmall", metavar="character"),
make_option(c("--HaPCNeBigInSmall") , type="character", default=NULL, help="merged_results HaPCNeBigInSmall", metavar="character"),
make_option(c("--HaPCNeSmallInBig") , type="character", default=NULL, help="merged_results HaPCNeSmallInBig", metavar="character"),
make_option(c("--H0") , type="character", default=NA, help="merged_results H0", metavar="character"),
make_option(c("--H0NeBig") , type="character", default=NA, help="merged_results H0NeBig", metavar="character"),
make_option(c("--H0NeSmall") , type="character", default=NA, help="merged_results H0NeSmall", metavar="character"),
make_option(c("--H0NeBigInSmall") , type="character", default=NA, help="merged_results H0NeBigInSmall", metavar="character"),
make_option(c("--H0NeSmallInBig") , type="character", default=NA, help="merged_results H0NeSmallInBig", metavar="character"),
make_option(c("--HaPCOC") , type="character", default=NA, help="merged_results HaPCOC", metavar="character"),
make_option(c("--HaPC") , type="character", default=NA, help="merged_results HaPC", metavar="character"),
make_option(c("--HaPCNeBig") , type="character", default=NA, help="merged_results HaPCNeBig", metavar="character"),
make_option(c("--HaPCNeSmall") , type="character", default=NA, help="merged_results HaPCNeSmall", metavar="character"),
make_option(c("--HaPCNeBigInSmall") , type="character", default=NA, help="merged_results HaPCNeBigInSmall", metavar="character"),
make_option(c("--HaPCNeSmallInBig") , type="character", default=NA, help="merged_results HaPCNeSmallInBig", metavar="character"),
make_option(c("-o","--out"), type="character", default="out",
help="output prefix [default= %default]", metavar="character")
......@@ -30,6 +30,8 @@ option_list = list(
opt_parser = OptionParser(option_list=option_list);
opt = parse_args(opt_parser);
print(opt)
if (is.null(opt$H0)){
print_help(opt_parser)
stop("At least one argument must be supplied (H0 input file)", call.=FALSE)
......@@ -46,7 +48,8 @@ if (is.null(opt$HaPCOC)){
print("prep input files")
read_hyp = function(opt_name) {
if (! is.null(opt_name)){
print(opt_name)
if (! is.na(opt_name)){
df = read.table(opt_name, header=TRUE, sep = '\t',na.strings = "NA")
df = df[, ! colnames(df) %in% c("Indel_prop", "Indel_prop.ConvLeaves.")]
id_vars = if ("P_distance" %in% colnames(df)) {c("Sites", "P_distance")} else {c("Sites")}
......@@ -64,12 +67,17 @@ df_H0NeBigInSmall_melt = read_hyp(opt$H0NeBigInSmall)
df_H0NeSmallInBig_melt = read_hyp(opt$H0NeSmallInBig)
df_HaPCOC_melt = read_hyp(opt$HaPCOC)
print(df_HaPCOC_melt)
df_HaPC_melt = read_hyp(opt$HaPC)
print(df_HaPC_melt)
df_HaPCNeBig_melt = read_hyp(opt$HaPCNeBig)
df_HaPCNeSmall_melt = read_hyp(opt$HaPCNeSmall)
df_HaPCNeBigInSmall_melt = read_hyp(opt$HaPCNeBigInSmall)
df_HaPCNeSmallInBig_melt = read_hyp(opt$HaPCNeSmallInBig)
########################################################################
print("prep df_d")
......@@ -85,16 +93,15 @@ build_df_dist_couple = function (df_h0,df_ha,name) {
return(df_melt)
}
df_d_H0HaPCOC = build_df_dist_couple(df_H0_melt, df_HaPCOC_melt, "H0/HaPCOC")
df_d_H0HaPC = build_df_dist_couple(df_H0_melt, df_HaPC_melt, "H0/HaPC")
df_d_H0HaPC_NeBig = build_df_dist_couple(df_H0NeBig_melt, df_HaPCNeBig_melt, "H0/HaPC NeBig")
df_d_H0HaPC_NeSmall = build_df_dist_couple(df_H0NeSmall_melt, df_HaPCNeSmall_melt, "H0/HaPC NeSmall")
df_d_H0HaPC_NeBigInSmall = build_df_dist_couple(df_H0NeBigInSmall_melt, df_HaPCNeBigInSmall_melt, "H0/HaPC NeBigInSmall")
df_d_H0HaPC_NeSmallInBig = build_df_dist_couple(df_H0NeSmallInBig_melt, df_HaPCNeSmallInBig_melt, "H0/HaPC NeSmallInBig")
df_d_H0HaPCOC = build_df_dist_couple(df_H0_melt, df_HaPCOC_melt, "H0/HaPCOC")
df_d_H0NeBigHaPCOC = build_df_dist_couple(df_H0NeBig_melt, df_HaPCOC_melt, "H0NeBig/HaPCOC")
df_d = rbind.data.frame(df_d_H0HaPC, df_d_H0HaPCOC,df_d_H0HaPC_NeBig,df_d_H0HaPC_NeSmall,
df_d_H0HaPC_NeBigInSmall,df_d_H0HaPC_NeSmallInBig,df_d_H0NeBigHaPCOC)
df_d_H0HaPC_NeBigInSmall,df_d_H0HaPC_NeSmallInBig)
df_d = df_d[order(df_d$methode),]
......@@ -155,11 +162,11 @@ df_H0HaPC_NeSmall = build_df_couple(df_H0NeSmall_melt, df_HaPCNeSmall_melt, "H0/
df_H0HaPC_NeBigInSmall = build_df_couple(df_H0NeBigInSmall_melt, df_HaPCNeBigInSmall_melt, "H0/HaPC NeBigInSmall")
df_H0HaPC_NeSmallInBig = build_df_couple(df_H0NeSmallInBig_melt, df_HaPCNeSmallInBig_melt, "H0/HaPC NeSmallInBig")
df_H0HaPCOC = build_df_couple(df_H0_melt, df_HaPCOC_melt, "H0/HaPCOC")
df_H0NeBigHaPCOC = build_df_couple(df_H0NeBig_melt, df_HaPCOC_melt, "H0NeBig/HaPCOC")
df = rbind.data.frame(df_H0HaPC, df_H0HaPCOC,df_H0HaPC_NeBig,df_H0HaPC_NeSmall,
df_H0HaPC_NeBigInSmall,df_H0HaPC_NeSmallInBig,
df_H0NeBigHaPCOC)
df_H0HaPC_NeBigInSmall,df_H0HaPC_NeSmallInBig
)
print(head(df))
print(tail(df))
......@@ -188,14 +195,15 @@ df$mcc[is.na(df$mcc)] = 0
## Precision
df$precision = (df$sens * 0.02) / (df$sens * 0.02 + (1-df$spe) * 0.98)
df$precision98_02 = (df$sens * 0.02) / (df$sens * 0.02 + (1-df$spe) * 0.98)
df$precision50_50 = (df$sens * 0.5) / (df$sens * 0.5 + (1-df$spe) * 0.5)
print(head(df))
print(tail(df))
df_out = df[,c("Row.names", "t", "sens", "spe", "mcc","precision","couple")]
colnames(df_out) = c("methode", "threshold", "sensitivity", "specificity", "MCC", "precision","couple")
df_out = df[,c("Row.names", "t", "sens", "spe", "mcc", "precision98_02","precision50_50","couple")]
colnames(df_out) = c("methode", "threshold", "sensitivity", "specificity", "MCC", "precision98_02","precision50_50","couple")
df_out = df_out[order(df_out$methode),]
......@@ -206,7 +214,7 @@ print("prep plot max mcc")
df_max_mcc_per_method = do.call(rbind, lapply(split(df_out,paste0(df_out$methode,df_out$couple)),
function(x) {
return(x[which.max(x$MCC),c("couple","methode", "threshold", "MCC","sensitivity","specificity","precision")])
return(x[which.max(x$MCC),c("couple","methode", "threshold", "MCC","sensitivity","specificity","precision98_02","precision50_50")])
}))
print(df_max_mcc_per_method)
......@@ -226,13 +234,13 @@ print(table(df_out$methode, df_out$couple))
df_recall_sup09_per_meth = do.call(rbind, lapply(split(df_out,paste0(df_out$methode," ", df_out$couple)),
function(x) {
x$precision[is.na(x$precision)] = 0
x2 = x[x$precision > 0.9,]
x$precision98_02[is.na(x$precision98_02)] = 0
x2 = x[x$precision98_02 > 0.9,]
if (nrow(x2) > 0) {
res = x2[which.max(x2$sensitivity),c("couple","methode", "threshold", "MCC","sensitivity","specificity","precision")]
res = x2[which.max(x2$sensitivity),c("couple","methode", "threshold", "MCC","sensitivity","specificity","precision98_02","precision50_50")]
} else {
x = x[1,c("couple","methode", "threshold", "MCC","sensitivity","specificity","precision")]
x[,c("threshold", "MCC","sensitivity","specificity","precision")] = NA
x = x[1,c("couple","methode", "threshold", "MCC","sensitivity","specificity","precision98_02","precision50_50")]
x[,c("threshold", "MCC","sensitivity","specificity","precision98_02","precision50_50")] = NA
res = x
}
return(res)
......@@ -273,14 +281,14 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
df_out_melt = melt(df_out, id.vars = c("couple", "methode","threshold"))
df_out_melt$methode = factor(df_out_melt$methode, levels = meths)
df_out_melt$variable = factor(df_out_melt$variable, levels = c("sensitivity", "specificity", "precision", "MCC"))
df_out_melt$variable = factor(df_out_melt$variable, levels = c("sensitivity", "specificity", "precision98_02","precision50_50", "MCC"))
df_out_melt$couple = factor(df_out_melt$couple, levels = couple_levels)
print("plot recall_precision_per_meth")
plot = ggplot(df_out, aes(x=sensitivity, y=precision, col = couple))
plot = ggplot(df_out, aes(x=sensitivity, y=precision98_02, col = couple))
plot = plot + theme_bw()
plot = plot + labs(x="Sensitivity (= Recall)", y="Precision")
plot = plot + labs(x="Sensitivity (= Recall)", y="Precision (98/2)")
plot = plot + theme(legend.position="top")
plot = plot + ylim(c(0,1)) + xlim(c(0,1))
plot = plot + guides(fill=FALSE) + guides(col=FALSE)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment