Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit e76c98eb authored by Carine Rey's avatar Carine Rey
Browse files

add plot to look to entropy

parent 030f93f8
......@@ -52,7 +52,7 @@ read_hyp = function(opt_name) {
if (! is.na(opt_name)){
df = read.table(opt_name, header=TRUE, sep = '\t',na.strings = "NA")
df = df[, ! colnames(df) %in% c("Indel_prop", "Indel_prop.ConvLeaves.")]
id_vars = if ("P_distance" %in% colnames(df)) {c("Sites", "P_distance")} else {c("Sites")}
id_vars = if ("P_distance" %in% colnames(df)) {c("Sites","P_distance", "C1", "C2", "entropy_C1","entropy_C2")} else {c("Sites")}
df_melt = melt(df, id.vars = id_vars, variable.name = "methode")
return(df_melt)
} else {
......@@ -84,8 +84,9 @@ print("prep df_d")
build_df_dist_couple = function (df_h0,df_ha,name) {
if ((! is.null(df_h0)) & (! is.null(df_ha))) {
df = merge(df_h0, df_ha, by = c("Sites","P_distance", "methode"), suffix = c("_H0","_Ha"))
df_melt = melt(df, id.vars = c("Sites","P_distance", "methode"), variable.name = "val_H0Ha")
print(head(df_h0))
df = merge(df_h0, df_ha, by = c("Sites","P_distance", "C1", "C2", "entropy_C1","entropy_C2", "methode"), suffix = c("_H0","_Ha"))
df_melt = melt(df, id.vars = c("Sites","P_distance", "C1", "C2", "entropy_C1","entropy_C2", "methode"), variable.name = "val_H0Ha")
df_melt$couple = name
} else {
df_melt = NULL
......@@ -354,6 +355,47 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
plot = plot + theme(axis.text.x = element_text(angle = 45, hjust = 1))
plot_value_distance = plot
plot = ggplot(subset(df_d,val_H0Ha=="value_Ha"), aes(y=entropy_C2, x=value, shape = val_H0Ha, col = val_H0Ha))
plot = plot + theme_bw()
plot = plot + theme(legend.position="top")
plot = plot + labs(y="Entropy C2", x="Value")
plot = plot + xlim(c(0,1)) + ylim(c(0, max(max(df_d$entropy_C2), 1)))
plot = plot + guides(col=FALSE)
plot = plot + scale_color_manual(values=colors)
plot = plot + geom_vline( data = df_recall_sup09_per_meth, aes(xintercept = threshold, col=couple), size = 1, show.legend = NA, linetype="dashed")
plot = plot + geom_point(size=1.2, alpha=0.7, stroke=0.01)
plot = plot + scale_shape_manual(values = c(16,17))
plot = plot + facet_grid(couple ~ methode)
plot = plot + theme(axis.text.x = element_text(angle = 45, hjust = 1))
plot_entropy_C2 = plot
plot = ggplot(df_d, aes(y=entropy_C1, x=value, shape = val_H0Ha, col = val_H0Ha))
plot = plot + theme_bw()
plot = plot + theme(legend.position="top")
plot = plot + labs(y="Entropy C1", x="Value")
plot = plot + xlim(c(0,1)) + ylim(c(0, max(max(df_d$entropy_C2), 1)))
plot = plot + guides(col=FALSE)
plot = plot + scale_color_manual(values=colors)
plot = plot + geom_vline( data = df_recall_sup09_per_meth, aes(xintercept = threshold, col=couple), size = 1, show.legend = NA, linetype="dashed")
plot = plot + geom_point(size=1.2, alpha=0.7, stroke=0.01)
plot = plot + scale_shape_manual(values = c(16,17))
plot = plot + facet_grid(couple ~ methode)
plot = plot + theme(axis.text.x = element_text(angle = 45, hjust = 1))
plot_entropy_C1 = plot
plot = ggplot(subset(df_d,val_H0Ha=="value_Ha") , aes(y=entropy_C2, x=entropy_C1, shape = val_H0Ha, col = value))
plot = plot + theme_bw()
plot = plot + theme(legend.position="top")
plot = plot + labs(y="Entropy C2", x="Entropy C1")
plot = plot + xlim(c(0, max(max(df_d$entropy_C1), 1))) + ylim(c(0, max(max(df_d$entropy_C2), 1)))
plot = plot + geom_point(size=1.2, alpha=0.7, stroke=0.01)
plot = plot + scale_shape_manual(values = c(16,17))
plot = plot + scale_colour_gradientn(colours = c("red","yellow","green","lightblue","darkblue"),
values=c(1.0,0.8,0.6,0.4,0.2,0))
plot = plot + facet_grid(couple ~ methode)
plot = plot + theme(axis.text.x = element_text(angle = 45, hjust = 1))
plot_entropy_C2_C1 = plot
save_plot(paste0(opt$out,suffix,".value_distance_per_meth.pdf"),
plot_value_distance,
......@@ -363,15 +405,15 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
limitsize = FALSE
)
plot = plot_grid(plot_recall_precision,plot_max_MCC,plot_value_distance,
labels = c("A", "B","C"),
rel_heights = c(length(unique(df_out$couple))*0.8,3,length(unique(df_out$couple))*0.8),
nrow=3)
plot = plot_grid(plot_recall_precision,plot_max_MCC,plot_value_distance,plot_entropy_C2_C1,plot_entropy_C1,plot_entropy_C2,
labels = c("A", "B","C","D","E","F"),
rel_heights = c(length(unique(df_out$couple))*0.8,3,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8),
nrow=6)
save_plot(paste0(opt$out,suffix,".pdf"),
plot,
ncol = 0.4* length(unique(df_out_melt$methode)),
nrow = length(unique(df_out$couple)) * 0.5 * 2 + 1,
nrow = length(unique(df_out$couple)) * 0.5 * 5 + 1 ,
base_aspect_ratio = 1,
limitsize = FALSE
)
......@@ -382,6 +424,7 @@ condensed_meths = c("PCOC","Diffsel_mean","Identical_LG08","Mutinomial_1MinusLRT
plot_out(df_out, df_d, df_recall_sup09_per_meth, meths=condensed_meths, suffix="_condensed")
write.table(df_d, file=paste0(opt$out,".complete_d.tsv"), row.names=FALSE, quote=F, sep = "\t")
write.table(df_out, file=paste0(opt$out,".complete.tsv"), row.names=FALSE, quote=F, sep = "\t")
write.table(df_max_mcc_per_method, file=paste0(opt$out,".max_MCC_per_meth.tsv"), row.names=FALSE, quote=F, sep = "\t")
write.table(df_recall_sup09_per_meth, file=paste0(opt$out,".recall09_per_meth.tsv"), row.names=FALSE, quote=F, sep = "\t")
......@@ -146,9 +146,13 @@ def jensen_shannon_divergence(p1, p2): # https://stackoverflow.com/questions/158
def jensen_shannon_distance(p1, p2):
return sqrt(jensen_shannon_divergence(p1, p2))
def entropy_p(p):
print(p)
return entropy(p)
MESSAGE("Preparing a dataframe for every bin...")
# columns = ["p1_" + str(i) for i in range(20)] + ["p2_" + str(i) for i in range(20)] + ["distance"]
columns = ["p1", "p2", "distance"]
columns = ["p1", "p2", "distance","entropy_p1", "entropy_p2"]
pair_bins = [pd.DataFrame(columns = columns) for x in range(nb_bins)]
MESSAGE("Picking profile pairs and computing distances...")
......@@ -168,7 +172,7 @@ while min(map(lambda b: b.shape[0], pair_bins)) < binsize:
for i in range(nb_bins):
if in_bin(dist, i) and pair_bins[i].shape[0] < binsize:
new_row = [p1["i"], p2["i"], dist]
new_row = [p1["i"], p2["i"], dist, entropy_p(p1["p"]), entropy_p(p2["p"])]
pair_bins[i].loc[len(pair_bins[i])] = new_row
nb_ok += 1
break
......
......@@ -154,9 +154,9 @@ if args.multinomial :
if args.fna_infos :
df_fna_infos = pd.read_csv(args.fna_infos, sep="\t", names = ["C1","C2","P_distance"])
df_fna_infos = pd.read_csv(args.fna_infos, sep="\t", names = ["C1","C2","P_distance","entropy_C1","entropy_C2"])
df_fna_infos["Sites"] = df_fna_infos.index + 1
df_fna_infos = df_fna_infos[['Sites','P_distance']]
#df_fna_infos = df_fna_infos[['Sites','P_distance']]
df_list = [df for df in [df_pcoc, df_pcoc_gamma, df_pcoc_C60,
df_diffsel, df_diffsel_bis,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment