Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit cbbc83e0 authored by Carine Rey's avatar Carine Rey
Browse files

update

parent 702b72de
......@@ -238,12 +238,13 @@ let derive_from_dataset ~dataset ~preview ~fast_mode=
`Topological_LG;
`Multinomial;
`Pcoc;
`Tdg09;
] ;
if preview then
[]
else
[`Pcoc_gamma;
[ `Tdg09;
`Pcoc_gamma;
`Identical_WAG;
`Topological_WAG;
]
......
......@@ -62,8 +62,8 @@ let profile_l_of_splitted_profile ~nb_cat ~nb_sites profile_fn =
let p0 = splitted_profile / selector ["profile_0.tsv"] in
let p1 = splitted_profile / selector ["profile_1.tsv"] in
let p2 = splitted_profile / selector ["profile_2.tsv"] in
(*{profile_c=cat_file [p0;p1;p2] ; profile_n=prefix ^ "_3categories" ; profile_f} *)
{profile_c=p2 ; profile_n=prefix ^ "_1categorie_max_dist" ; profile_f};
{profile_c=cat_file [p0;p1;p2] ; profile_n=prefix ^ "_3categories" ; profile_f}
(*{profile_c=p2 ; profile_n=prefix ^ "_1categorie_max_dist" ; profile_f};*)
)
| 1 -> (let p0 = splitted_profile / selector ["profile_0.tsv"] in
{profile_c=p0; profile_n=prefix ; profile_f})
......
......@@ -52,7 +52,7 @@ read_hyp = function(opt_name) {
if (! is.na(opt_name)){
df = read.table(opt_name, header=TRUE, sep = '\t',na.strings = "NA")
df = df[, ! colnames(df) %in% c("Indel_prop", "Indel_prop.ConvLeaves.")]
id_vars = if ("P_distance" %in% colnames(df)) {c("Sites","P_distance", "C1", "C2", "entropy_C1","entropy_C2")} else {c("Sites")}
id_vars = if ("P_JSD" %in% colnames(df)) {c("Sites","P_JSD","P_ED", "C1", "C2", "entropy_C1","entropy_C2")} else {c("Sites")}
df_melt = melt(df, id.vars = id_vars, variable.name = "methode")
return(df_melt)
} else {
......@@ -85,8 +85,8 @@ print("prep df_d")
build_df_dist_couple = function (df_h0,df_ha,name) {
if ((! is.null(df_h0)) & (! is.null(df_ha))) {
print(head(df_h0))
df = merge(df_h0, df_ha, by = c("Sites","P_distance", "C1", "C2", "entropy_C1","entropy_C2", "methode"), suffix = c("_H0","_Ha"))
df_melt = melt(df, id.vars = c("Sites","P_distance", "C1", "C2", "entropy_C1","entropy_C2", "methode"), variable.name = "val_H0Ha")
df = merge(df_h0, df_ha, by = c("Sites","P_JSD","P_ED", "C1", "C2", "entropy_C1","entropy_C2", "methode"), suffix = c("_H0","_Ha"))
df_melt = melt(df, id.vars = c("Sites","P_JSD","P_ED", "C1", "C2", "entropy_C1","entropy_C2", "methode"), variable.name = "val_H0Ha")
df_melt$couple = name
} else {
df_melt = NULL
......@@ -341,11 +341,11 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
plot = ggplot(df_d, aes(y=P_distance, x=value, shape = val_H0Ha, col = val_H0Ha))
plot = ggplot(df_d, aes(y=P_JSD, x=value, shape = val_H0Ha, col = val_H0Ha))
plot = plot + theme_bw()
plot = plot + theme(legend.position="top")
plot = plot + labs(y="P distance", x="Value")
plot = plot + xlim(c(0,1)) + ylim(c(0, max(max(df_d$P_distance), 1)))
plot = plot + labs(y="P JSD", x="Value")
plot = plot + xlim(c(0,1)) + ylim(c(0, max(max(df_d$P_JSD), 1)))
plot = plot + guides(col=FALSE)
plot = plot + scale_color_manual(values=colors)
plot = plot + geom_vline( data = df_recall_sup09_per_meth, aes(xintercept = threshold, col=couple), size = 1, show.legend = NA, linetype="dashed")
......@@ -353,7 +353,21 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
plot = plot + scale_shape_manual(values = c(16,17))
plot = plot + facet_grid(couple ~ methode)
plot = plot + theme(axis.text.x = element_text(angle = 45, hjust = 1))
plot_value_distance = plot
plot_value_JSD = plot
plot = ggplot(df_d, aes(y=P_ED, x=value, shape = val_H0Ha, col = val_H0Ha))
plot = plot + theme_bw()
plot = plot + theme(legend.position="top")
plot = plot + labs(y="P ED", x="Value")
plot = plot + xlim(c(0,1)) + ylim(c(0, max(max(df_d$P_ED), 1)))
plot = plot + guides(col=FALSE)
plot = plot + scale_color_manual(values=colors)
plot = plot + geom_vline( data = df_recall_sup09_per_meth, aes(xintercept = threshold, col=couple), size = 1, show.legend = NA, linetype="dashed")
plot = plot + geom_point(size=1.2, alpha=0.7, stroke=0.01)
plot = plot + scale_shape_manual(values = c(16,17))
plot = plot + facet_grid(couple ~ methode)
plot = plot + theme(axis.text.x = element_text(angle = 45, hjust = 1))
plot_value_ED = plot
plot = ggplot(subset(df_d,val_H0Ha=="value_Ha"), aes(y=entropy_C2, x=value, shape = val_H0Ha, col = val_H0Ha))
plot = plot + theme_bw()
......@@ -398,22 +412,23 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
save_plot(paste0(opt$out,suffix,".value_distance_per_meth.pdf"),
plot_value_distance,
plot_value_JSD,
ncol = 0.4* length(unique(df_d$methode)),
nrow = 0.4* length(unique(df_d$couple)),
base_aspect_ratio = 1,
limitsize = FALSE
)
plot = plot_grid(plot_recall_precision,plot_max_MCC,plot_value_distance,plot_entropy_C2_C1,plot_entropy_C1,plot_entropy_C2,
plot = plot_grid(plot_recall_precision,plot_max_MCC,plot_value_JSD,plot_value_ED,plot_entropy_C2_C1,plot_entropy_C1,plot_entropy_C2,
labels = c("A", "B","C","D","E","F"),
rel_heights = c(length(unique(df_out$couple))*0.8,3,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8),
nrow=6)
rel_heights = c(length(unique(df_out$couple))*0.8,3,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8),
nrow=7
)
save_plot(paste0(opt$out,suffix,".pdf"),
plot,
ncol = 0.4* length(unique(df_out_melt$methode)),
nrow = length(unique(df_out$couple)) * 0.5 * 5 + 1 ,
nrow = length(unique(df_out$couple)) * 0.5 * 6 + 1 ,
base_aspect_ratio = 1,
limitsize = FALSE
)
......
......@@ -152,7 +152,7 @@ def entropy_p(p):
MESSAGE("Preparing a dataframe for every bin...")
# columns = ["p1_" + str(i) for i in range(20)] + ["p2_" + str(i) for i in range(20)] + ["distance"]
columns = ["p1", "p2", "distance","entropy_p1", "entropy_p2"]
columns = ["p1", "p2", "distance","entropy_p1", "entropy_p2", "distance_eucl"]
pair_bins = [pd.DataFrame(columns = columns) for x in range(nb_bins)]
MESSAGE("Picking profile pairs and computing distances...")
......@@ -172,7 +172,7 @@ while min(map(lambda b: b.shape[0], pair_bins)) < binsize:
for i in range(nb_bins):
if in_bin(dist, i) and pair_bins[i].shape[0] < binsize:
new_row = [p1["i"], p2["i"], dist, entropy_p(p1["p"]), entropy_p(p2["p"])]
new_row = [p1["i"], p2["i"], dist, entropy_p(p1["p"]), entropy_p(p2["p"]), euclidian_distance(p1["p"], p2["p"])]
pair_bins[i].loc[len(pair_bins[i])] = new_row
nb_ok += 1
break
......
......@@ -154,7 +154,7 @@ if args.multinomial :
if args.fna_infos :
df_fna_infos = pd.read_csv(args.fna_infos, sep="\t", names = ["C1","C2","P_distance","entropy_C1","entropy_C2"])
df_fna_infos = pd.read_csv(args.fna_infos, sep="\t", names = ["C1","C2","P_JSD","entropy_C1","entropy_C2", "P_ED"])
df_fna_infos["Sites"] = df_fna_infos.index + 1
#df_fna_infos = df_fna_infos[['Sites','P_distance']]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment