Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit 3cc18d50 authored by Carine Rey's avatar Carine Rey
Browse files

new plots

parent f9e97ed4
......@@ -57,33 +57,33 @@ read_hyp = function(opt_name) {
if (! is.na(opt_name)){
df = read.table(opt_name, header=TRUE, sep = '\t',na.strings = "NA")
df = df[, ! colnames(df) %in% c("Indel_prop", "Indel_prop.ConvLeaves.")]
id_vars = if ("P_JSD" %in% colnames(df)) {c("Sites","P_JSD","P_ED", "C1", "C2", "entropy_C1","entropy_C2")} else {c("Sites")}
df_melt = melt(df, id.vars = id_vars, variable.name = "methode")
return(df_melt)
#id_vars = if ("P_JSD" %in% colnames(df)) {c("Sites","P_JSD","P_ED", "C1", "C2", "entropy_C1","entropy_C2")} else {c("Sites")}
#df_melt = melt(df, id.vars = id_vars, variable.name = "methode")
return(df)
} else {
return(NULL)
}
}
df_H0_NeG1_melt = read_hyp(opt$H0_NeG1 )
df_H0_NeG2_melt = read_hyp(opt$H0_NeG2 )
df_H0_NeG3_melt = read_hyp(opt$H0_NeG3 )
df_H0_NeG4_melt = read_hyp(opt$H0_NeG4 )
df_H0_NeG5_melt = read_hyp(opt$H0_NeG5 )
df_HaPC_NeG1_melt = read_hyp(opt$HaPC_NeG1 )
df_HaPC_NeG2_melt = read_hyp(opt$HaPC_NeG2 )
df_HaPC_NeG3_melt = read_hyp(opt$HaPC_NeG3 )
df_HaPC_NeG4_melt = read_hyp(opt$HaPC_NeG4 )
df_HaPC_NeG5_melt = read_hyp(opt$HaPC_NeG5 )
df_H0_NeG1 = read_hyp(opt$H0_NeG1 )
df_H0_NeG2 = read_hyp(opt$H0_NeG2 )
df_H0_NeG3 = read_hyp(opt$H0_NeG3 )
df_H0_NeG4 = read_hyp(opt$H0_NeG4 )
df_H0_NeG5 = read_hyp(opt$H0_NeG5 )
df_HaPC_NeG1 = read_hyp(opt$HaPC_NeG1 )
df_HaPC_NeG2 = read_hyp(opt$HaPC_NeG2 )
df_HaPC_NeG3 = read_hyp(opt$HaPC_NeG3 )
df_HaPC_NeG4 = read_hyp(opt$HaPC_NeG4 )
df_HaPC_NeG5 = read_hyp(opt$HaPC_NeG5 )
df_HaPC_NeG5_NeC_div2_melt = read_hyp(opt$HaPC_NeG5_NeC_div2)
df_HaPC_NeG5_NeC_x2_melt = read_hyp(opt$HaPC_NeG5_NeC_x2 )
df_H0_NeG5_NeC_div2_melt = read_hyp(opt$H0_NeG5_NeC_div2 )
df_H0_NeG5_NeC_x2_melt = read_hyp(opt$H0_NeG5_NeC_x2 )
df_HaPC_NeG5_NeC_div2 = read_hyp(opt$HaPC_NeG5_NeC_div2)
df_HaPC_NeG5_NeC_x2 = read_hyp(opt$HaPC_NeG5_NeC_x2 )
df_H0_NeG5_NeC_div2 = read_hyp(opt$H0_NeG5_NeC_div2 )
df_H0_NeG5_NeC_x2 = read_hyp(opt$H0_NeG5_NeC_x2 )
df_HaPCOC_melt = read_hyp(opt$HaPCOC)
df_HaPCOC = read_hyp(opt$HaPCOC)
......@@ -95,6 +95,9 @@ print("prep df_d")
build_df_dist_couple = function (df_h0,df_ha,name) {
if ((! is.null(df_h0)) & (! is.null(df_ha))) {
id_vars = if ("P_JSD" %in% colnames(df_h0)) {c("Sites","P_JSD","P_ED", "C1", "C2", "entropy_C1","entropy_C2")} else {c("Sites")}
df_h0 = melt(df_h0, id.vars = id_vars, variable.name = "methode")
df_ha = melt(df_ha, id.vars = id_vars, variable.name = "methode")
print(head(df_h0))
df = merge(df_h0, df_ha, by = c("Sites","P_JSD","P_ED", "C1", "C2", "entropy_C1","entropy_C2", "methode"), suffix = c("_H0","_Ha"))
df_melt = melt(df, id.vars = c("Sites","P_JSD","P_ED", "C1", "C2", "entropy_C1","entropy_C2", "methode"), variable.name = "val_H0Ha")
......@@ -105,15 +108,15 @@ build_df_dist_couple = function (df_h0,df_ha,name) {
return(df_melt)
}
df_d_H0HaPCOC_NeG1 = build_df_dist_couple(df_H0_NeG1_melt, df_HaPCOC_melt, "H0/HaPCOC NeG1")
df_d_H0HaPC_NeG1 = build_df_dist_couple(df_H0_NeG1_melt, df_HaPC_NeG1_melt, "H0/HaPC NeG1")
df_d_H0HaPC_NeG2 = build_df_dist_couple(df_H0_NeG2_melt, df_HaPC_NeG2_melt, "H0/HaPC NeG2")
df_d_H0HaPC_NeG3 = build_df_dist_couple(df_H0_NeG3_melt, df_HaPC_NeG3_melt, "H0/HaPC NeG3")
df_d_H0HaPC_NeG4 = build_df_dist_couple(df_H0_NeG4_melt, df_HaPC_NeG4_melt, "H0/HaPC NeG4")
df_d_H0HaPC_NeG5 = build_df_dist_couple(df_H0_NeG5_melt, df_HaPC_NeG5_melt, "H0/HaPC NeG5")
df_d_H0HaPCOC_NeG1 = build_df_dist_couple(df_H0_NeG1, df_HaPCOC, "H0/HaPCOC NeG1")
df_d_H0HaPC_NeG1 = build_df_dist_couple(df_H0_NeG1, df_HaPC_NeG1, "H0/HaPC NeG1")
df_d_H0HaPC_NeG2 = build_df_dist_couple(df_H0_NeG2, df_HaPC_NeG2, "H0/HaPC NeG2")
df_d_H0HaPC_NeG3 = build_df_dist_couple(df_H0_NeG3, df_HaPC_NeG3, "H0/HaPC NeG3")
df_d_H0HaPC_NeG4 = build_df_dist_couple(df_H0_NeG4, df_HaPC_NeG4, "H0/HaPC NeG4")
df_d_H0HaPC_NeG5 = build_df_dist_couple(df_H0_NeG5, df_HaPC_NeG5, "H0/HaPC NeG5")
df_d_H0HaPC_NeG5_NeC_div2 = build_df_dist_couple(df_H0_NeG5_NeC_div2_melt, df_HaPC_NeG5_NeC_div2_melt, "H0/HaPC NeG5_NeC_div2")
df_d_H0HaPC_NeG5_NeC_x2 = build_df_dist_couple(df_H0_NeG5_NeC_x2_melt, df_HaPC_NeG5_NeC_x2_melt, "H0/HaPC NeG5_NeC_x2")
df_d_H0HaPC_NeG5_NeC_div2 = build_df_dist_couple(df_H0_NeG5_NeC_div2, df_HaPC_NeG5_NeC_div2, "H0/HaPC NeG5_NeC_div2")
df_d_H0HaPC_NeG5_NeC_x2 = build_df_dist_couple(df_H0_NeG5_NeC_x2, df_HaPC_NeG5_NeC_x2, "H0/HaPC NeG5_NeC_x2")
df_d = rbind.data.frame(
df_d_H0HaPCOC_NeG1,
......@@ -136,7 +139,7 @@ print("prep df")
## fun...
calc_TN_FP = function(vals, t){
calc_TN_FP = function(t, vals){
TN = 0
FP = 0
vals = na.omit(vals)
......@@ -144,10 +147,10 @@ calc_TN_FP = function(vals, t){
TN = sum(vals < t)
FP = sum(vals >= t)
}
return(data.frame(TN = TN, FP = FP))
return(data.frame(t=t, TN = TN, FP = FP))
}
calc_TP_FN = function(vals, t){
calc_TP_FN = function(t, vals){
FN = 0
TP = 0
vals = na.omit(vals)
......@@ -155,15 +158,20 @@ calc_TP_FN = function(vals, t){
FN = sum(vals < t)
TP = sum(vals >= t)
}
return(data.frame(TP = TP, FN = FN))
return(data.frame(t=t, TP = TP, FN = FN))
}
calc_TN_FP_TP_FN = function(t, df_H0_melt, df_Ha_melt){
df_TN_FP = do.call(rbind.data.frame,tapply(df_H0_melt$value, df_H0_melt$methode, calc_TN_FP, t=t))
df_TP_FN = do.call(rbind.data.frame,tapply(df_Ha_melt$value, df_Ha_melt$methode, calc_TP_FN, t=t))
calc_TN_FP_TP_FN = function(meth, df_H0, df_Ha){
t_list =c(0,1,df_H0[,meth],df_Ha[,meth])
t_list = sort(t_list)
t_list = unique(t_list)
df_TN_FP = do.call(rbind.data.frame,lapply(t_list, calc_TN_FP, vals=df_H0[,meth]))
df_TP_FN = do.call(rbind.data.frame,lapply(t_list, calc_TP_FN, vals=df_Ha[,meth]))
df_TN_FP_TP_FN = merge(df_TN_FP, df_TP_FN, by="t")
df_TN_FP_TP_FN$methode = meth
df_TN_FP_TP_FN = merge(df_TN_FP, df_TP_FN, by=0)
df_TN_FP_TP_FN$t = t
return(df_TN_FP_TP_FN)
}
......@@ -171,7 +179,9 @@ calc_TN_FP_TP_FN = function(t, df_H0_melt, df_Ha_melt){
build_df_couple = function (df_h0,df_ha,name) {
print(name)
if ((! is.null(df_h0)) & (! is.null(df_ha))) {
df = do.call(rbind.data.frame,lapply(unique(c(0,1,df_h0$value, df_ha$value)) , calc_TN_FP_TP_FN, df_H0_melt = df_h0, df_Ha_melt = df_ha))
id_vars = if ("P_JSD" %in% colnames(df_h0)) {c("Sites","P_JSD","P_ED", "C1", "C2", "entropy_C1","entropy_C2")} else {c("Sites")}
meth_l = colnames(df_h0)[ ! colnames(df_h0) %in% id_vars]
df = do.call(rbind.data.frame,lapply(unique(meth_l) , calc_TN_FP_TP_FN, df_H0 = df_h0, df_Ha = df_ha))
df$couple = name
print(head(df))
} else {
......@@ -182,20 +192,20 @@ build_df_couple = function (df_h0,df_ha,name) {
return(df)
}
df_H0HaPCOC_NeG1 = build_df_couple(df_H0_NeG1_melt, df_HaPCOC_melt , "H0/HaPCOC NeG1")
df_H0HaPC_NeG1 = build_df_couple(df_H0_NeG1_melt, df_HaPC_NeG1_melt, "H0/HaPC NeG1")
df_H0HaPC_NeG2 = build_df_couple(df_H0_NeG2_melt, df_HaPC_NeG2_melt, "H0/HaPC NeG2")
df_H0HaPC_NeG3 = build_df_couple(df_H0_NeG3_melt, df_HaPC_NeG3_melt, "H0/HaPC NeG3")
df_H0HaPC_NeG4 = build_df_couple(df_H0_NeG4_melt, df_HaPC_NeG4_melt, "H0/HaPC NeG4")
df_H0HaPC_NeG5 = build_df_couple(df_H0_NeG5_melt, df_HaPC_NeG5_melt, "H0/HaPC NeG5")
df_H0HaPCOC_NeG1 = build_df_couple(df_H0_NeG1, df_HaPCOC , "H0/HaPCOC NeG1")
df_H0HaPC_NeG1 = build_df_couple(df_H0_NeG1, df_HaPC_NeG1, "H0/HaPC NeG1")
df_H0HaPC_NeG2 = build_df_couple(df_H0_NeG2, df_HaPC_NeG2, "H0/HaPC NeG2")
df_H0HaPC_NeG3 = build_df_couple(df_H0_NeG3, df_HaPC_NeG3, "H0/HaPC NeG3")
df_H0HaPC_NeG4 = build_df_couple(df_H0_NeG4, df_HaPC_NeG4, "H0/HaPC NeG4")
df_H0HaPC_NeG5 = build_df_couple(df_H0_NeG5, df_HaPC_NeG5, "H0/HaPC NeG5")
df_H0HaPC_NeG5_NeC_div2 = build_df_couple(df_H0_NeG5_NeC_div2_melt, df_HaPC_NeG5_NeC_div2_melt, "H0/HaPC NeG5_NeC_div2")
df_H0HaPC_NeG5_NeC_x2 = build_df_couple(df_H0_NeG5_NeC_x2_melt, df_HaPC_NeG5_NeC_x2_melt, "H0/HaPC NeG5_NeC_x2")
df_H0HaPC_NeG5_NeC_div2 = build_df_couple(df_H0_NeG5_NeC_div2, df_HaPC_NeG5_NeC_div2, "H0/HaPC NeG5_NeC_div2")
df_H0HaPC_NeG5_NeC_x2 = build_df_couple(df_H0_NeG5_NeC_x2, df_HaPC_NeG5_NeC_x2, "H0/HaPC NeG5_NeC_x2")
df_l = list(df_H0HaPCOC_NeG1,
df_H0HaPC_NeG1, df_H0HaPC_NeG2, df_H0HaPC_NeG3, df_H0HaPC_NeG4, df_H0HaPC_NeG5, df_H0HaPC_NeG5_NeC_div2,
df_H0HaPC_NeG5_NeC_x2)
df_l = list(df_H0HaPCOC_NeG1, df_H0HaPC_NeG1, df_H0HaPC_NeG2, df_H0HaPC_NeG3,
df_H0HaPC_NeG4, df_H0HaPC_NeG5, df_H0HaPC_NeG5_NeC_div2,
df_H0HaPC_NeG5_NeC_x2)
df_l = df_l[-which(sapply(df_l, is.null))]
df = do.call("rbind",df_l)
......@@ -233,7 +243,7 @@ print(head(df))
print(tail(df))
df_out = df[,c("Row.names", "t", "sens", "spe", "mcc", "precision98_02","precision50_50","couple")]
df_out = df[,c("methode", "t", "sens", "spe", "mcc", "precision98_02","precision50_50","couple")]
colnames(df_out) = c("methode", "threshold", "sensitivity", "specificity", "MCC", "precision98_02","precision50_50","couple")
df_out = df_out[order(df_out$methode),]
......@@ -258,11 +268,6 @@ df_max_mcc_per_method_2$variable="MCC"
print("prep plot recall_precision_per_meth")
print(df_out[is.na(df_out$methode),])
print(table(df_out$methode))
print(table(df_out$methode, df_out$couple))
df_recall_sup09_per_meth = do.call(rbind, lapply(split(df_out,paste0(df_out$methode," ", df_out$couple)),
function(x) {
x$precision98_02[is.na(x$precision98_02)] = 0
......@@ -296,7 +301,7 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
meths = sort(unique(df_out$methode))
}
alpha = 0.5
alpha = 0.7
df_out$methode = factor(df_out$methode, levels = meths)
......@@ -324,9 +329,9 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
plot = plot + ylim(c(0,1)) + xlim(c(0,1))
plot = plot + guides(fill=FALSE) + guides(col=FALSE)
plot = plot + scale_color_manual(values=colors)
plot = plot + geom_point(size=0.5, alpha=alpha)
plot = plot + geom_step(direction="vh")
plot = plot + geom_hline( aes(yintercept = 0.9), col="black" , size = 1, show.legend = NA,linetype="dashed")
#plot = plot + geom_point(size=1, alpha=alpha)
plot = plot + geom_step(direction="vh", size=1, alpha=alpha)
plot = plot + geom_hline( aes(yintercept = 0.9), col="black" , size = 0.5, show.legend = NA,linetype="dashed")
plot = plot + facet_grid(couple ~ methode)
plot = plot + theme(axis.text.x = element_text(angle = 45, hjust = 1))
plot = plot + ggtitle(paste0("Threshold definition such as precision > 0.9 - (", date , ")" ))
......@@ -340,6 +345,45 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
base_aspect_ratio = 1,
limitsize = FALSE
)
print("plot recall_precision")
plot = ggplot(df_out, aes(x=sensitivity, y=precision98_02, col = methode))
plot = plot + theme_bw()
plot = plot + labs(x="Sensitivity (= Recall)", y="Precision (98/2)")
plot = plot + theme(legend.position="top")
plot = plot + ylim(c(0,1)) + xlim(c(0,1))
plot = plot + guides(fill=FALSE)
plot = plot + scale_color_manual(values=colors)
#plot = plot + geom_point(size=1, alpha=alpha)
plot = plot + geom_step(direction="vh", size=0.5, alpha=alpha)
plot = plot + geom_hline( aes(yintercept = 0.9), col="black" , size = 0.5, show.legend = NA,linetype="dashed")
plot = plot + facet_grid(. ~ couple )
plot = plot + theme(axis.text.x = element_text(angle = 45, hjust = 1))
legend_PR = get_legend(plot + theme(legend.position="top",
legend.text = element_text(size=10),
legend.title = element_text(size=0),
plot.margin = unit(x = c(0, 0, 0, 0), units = "cm"),
legend.background = element_rect(fill="white", size=0.5, linetype="solid",
colour ="black")) +
guides( colour = guide_legend(override.aes = list(alpha = 1,size = 3), nrow=2))
)
plot_recall_precision_papier = plot_grid(legend_PR, plot + theme(legend.position="none"), ncol = 1, scale = 1,
labels = c("",""),
align = "h",
rel_heights = c( 0.3, 0.7),
hjust = 0, vjust = 1)
save_plot(paste0(opt$out,suffix,".recall_precision.pdf"),
plot_recall_precision_papier,
ncol = 0.6* length(unique(df_out_melt$couple)),
nrow = 1,
base_aspect_ratio = 1,
limitsize = FALSE
)
print("plot per indicator")
......@@ -347,13 +391,18 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
y_labs = ""
plot = ggplot(df_out_melt, aes(x=threshold, y=value, col=couple))
plot = plot + theme_bw()
plot = plot + theme(legend.position="top")
plot = plot + theme(legend.position="top",
plot.margin = unit(x = c(0, 0, 0, 0), units = "cm"),
legend.background = element_rect(fill="white", size=0.5, linetype="solid", colour ="black")) +
guides(colour = guide_legend(override.aes = list(alpha = 1), nrow=2))
plot = plot + guides(fill=FALSE) + theme(legend.position="top")
plot = plot + labs(x=x_labs, y=y_labs) #+ ylim(y_lim)
plot = plot + geom_point(size=0.5, alpha=alpha)
#plot = plot + geom_point(size=0.5, alpha=alpha)
plot = plot + geom_step(size=1, alpha=alpha)
plot = plot + scale_color_manual(values=colors)
#plot = plot + geom_hline( data = df_max_mcc_per_method_2, aes(yintercept = MCC), alpha=alpha, show.legend = NA)
plot = plot + geom_vline( data = df_recall_sup09_per_meth, aes(xintercept = threshold, col=couple), size = 1, show.legend = NA,linetype="dashed")
plot = plot + geom_vline( data = df_recall_sup09_per_meth, aes(xintercept = threshold, col=couple), size = 0.5, show.legend = NA,linetype="dashed")
plot = plot + facet_grid(variable ~ methode, scales="free")
plot = plot + theme(axis.text.x = element_text(angle = 45, hjust = 1))
......@@ -449,21 +498,29 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
limitsize = FALSE
)
plot = plot_grid(plot_recall_precision,plot_max_MCC,plot_value_JSD,plot_value_ED,plot_entropy_C2_C1,plot_entropy_C1,plot_entropy_C2,
labels = c("A", "B","C","D","E","F"),
rel_heights = c(length(unique(df_out$couple))*0.8,3,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8),
nrow=7
#plot = plot_grid(plot_recall_precision,plot_max_MCC,plot_value_JSD,plot_value_ED,plot_entropy_C2_C1,plot_entropy_C1,plot_entropy_C2,
# labels = c("A", "B","C","D","E","F"),
# rel_heights = c(length(unique(df_out$couple))*0.8,3,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8),
# nrow=7
# )
plot = plot_grid(plot_recall_precision,plot_recall_precision_papier,plot_max_MCC,
labels = c("A", "A'", "B"),
rel_heights = c(length(unique(df_out$couple))*0.8,2,4),
nrow=3
)
save_plot(paste0(opt$out,suffix,".pdf"),
plot,
ncol = 0.4* length(unique(df_out_melt$methode)),
nrow = length(unique(df_out$couple)) * 0.5 * 6 + 1 ,
nrow = length(unique(df_out$couple)) * 0.5 + 1 + 1 ,
base_aspect_ratio = 1,
limitsize = FALSE
)
}
plot_out(df_out, df_d, df_recall_sup09_per_meth, suffix="")
condensed_meths = c("PCOC","Diffsel_mean","Identical_LG08","Mutinomial_1MinusLRT","Tdg09_1MinusFDR","Msd_1MinusP","Topological_LG08")
plot_out(df_out, df_d, df_recall_sup09_per_meth, meths=condensed_meths, suffix="_condensed")
......
......@@ -68,7 +68,7 @@ y_labs = "Threshold"
print(head(df))
df_tmp = subset(df, couple == "H0/HaPCOC NeG1")
df_tmp = subset(df, couple == "H0/HaPC NeG5")
df_t_per_method = do.call(rbind, lapply(split(df_tmp, paste0(df_tmp$methode, df_tmp$tree)),
function(x) {return(x[which.max(x$threshold),c("methode", "threshold","tree","profil","couple")])}))
......@@ -78,7 +78,7 @@ df_t_per_method_2 [ df_t_per_method_2$tree == unique(df_t_per_method_2$tree)[1],
df2 = df
df2$retained_t = "no"
df2$retained_t[df2$couple == "H0/HaPCOC"] = "yes"
df2$retained_t[df2$couple == "H0/HaPC NeG5"] = "yes"
plot = ggplot(df2, aes(x=tree, y=threshold, col=couple, shape = retained_t)) + theme_bw() + labs(x=x_labs, y=y_labs) + ylim(c(0,1))
plot = plot + theme(axis.text.x = element_text(angle = 0, hjust = 1))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment