Commit cbbc83e0 authored by Carine Rey's avatar Carine Rey
Browse files

update

parent 702b72de
...@@ -238,12 +238,13 @@ let derive_from_dataset ~dataset ~preview ~fast_mode= ...@@ -238,12 +238,13 @@ let derive_from_dataset ~dataset ~preview ~fast_mode=
`Topological_LG; `Topological_LG;
`Multinomial; `Multinomial;
`Pcoc; `Pcoc;
`Tdg09;
] ; ] ;
if preview then if preview then
[] []
else else
[`Pcoc_gamma; [ `Tdg09;
`Pcoc_gamma;
`Identical_WAG; `Identical_WAG;
`Topological_WAG; `Topological_WAG;
] ]
......
...@@ -62,8 +62,8 @@ let profile_l_of_splitted_profile ~nb_cat ~nb_sites profile_fn = ...@@ -62,8 +62,8 @@ let profile_l_of_splitted_profile ~nb_cat ~nb_sites profile_fn =
let p0 = splitted_profile / selector ["profile_0.tsv"] in let p0 = splitted_profile / selector ["profile_0.tsv"] in
let p1 = splitted_profile / selector ["profile_1.tsv"] in let p1 = splitted_profile / selector ["profile_1.tsv"] in
let p2 = splitted_profile / selector ["profile_2.tsv"] in let p2 = splitted_profile / selector ["profile_2.tsv"] in
(*{profile_c=cat_file [p0;p1;p2] ; profile_n=prefix ^ "_3categories" ; profile_f} *) {profile_c=cat_file [p0;p1;p2] ; profile_n=prefix ^ "_3categories" ; profile_f}
{profile_c=p2 ; profile_n=prefix ^ "_1categorie_max_dist" ; profile_f}; (*{profile_c=p2 ; profile_n=prefix ^ "_1categorie_max_dist" ; profile_f};*)
) )
| 1 -> (let p0 = splitted_profile / selector ["profile_0.tsv"] in | 1 -> (let p0 = splitted_profile / selector ["profile_0.tsv"] in
{profile_c=p0; profile_n=prefix ; profile_f}) {profile_c=p0; profile_n=prefix ; profile_f})
......
...@@ -52,7 +52,7 @@ read_hyp = function(opt_name) { ...@@ -52,7 +52,7 @@ read_hyp = function(opt_name) {
if (! is.na(opt_name)){ if (! is.na(opt_name)){
df = read.table(opt_name, header=TRUE, sep = '\t',na.strings = "NA") df = read.table(opt_name, header=TRUE, sep = '\t',na.strings = "NA")
df = df[, ! colnames(df) %in% c("Indel_prop", "Indel_prop.ConvLeaves.")] df = df[, ! colnames(df) %in% c("Indel_prop", "Indel_prop.ConvLeaves.")]
id_vars = if ("P_distance" %in% colnames(df)) {c("Sites","P_distance", "C1", "C2", "entropy_C1","entropy_C2")} else {c("Sites")} id_vars = if ("P_JSD" %in% colnames(df)) {c("Sites","P_JSD","P_ED", "C1", "C2", "entropy_C1","entropy_C2")} else {c("Sites")}
df_melt = melt(df, id.vars = id_vars, variable.name = "methode") df_melt = melt(df, id.vars = id_vars, variable.name = "methode")
return(df_melt) return(df_melt)
} else { } else {
...@@ -85,8 +85,8 @@ print("prep df_d") ...@@ -85,8 +85,8 @@ print("prep df_d")
build_df_dist_couple = function (df_h0,df_ha,name) { build_df_dist_couple = function (df_h0,df_ha,name) {
if ((! is.null(df_h0)) & (! is.null(df_ha))) { if ((! is.null(df_h0)) & (! is.null(df_ha))) {
print(head(df_h0)) print(head(df_h0))
df = merge(df_h0, df_ha, by = c("Sites","P_distance", "C1", "C2", "entropy_C1","entropy_C2", "methode"), suffix = c("_H0","_Ha")) df = merge(df_h0, df_ha, by = c("Sites","P_JSD","P_ED", "C1", "C2", "entropy_C1","entropy_C2", "methode"), suffix = c("_H0","_Ha"))
df_melt = melt(df, id.vars = c("Sites","P_distance", "C1", "C2", "entropy_C1","entropy_C2", "methode"), variable.name = "val_H0Ha") df_melt = melt(df, id.vars = c("Sites","P_JSD","P_ED", "C1", "C2", "entropy_C1","entropy_C2", "methode"), variable.name = "val_H0Ha")
df_melt$couple = name df_melt$couple = name
} else { } else {
df_melt = NULL df_melt = NULL
...@@ -341,11 +341,11 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi ...@@ -341,11 +341,11 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
plot = ggplot(df_d, aes(y=P_distance, x=value, shape = val_H0Ha, col = val_H0Ha)) plot = ggplot(df_d, aes(y=P_JSD, x=value, shape = val_H0Ha, col = val_H0Ha))
plot = plot + theme_bw() plot = plot + theme_bw()
plot = plot + theme(legend.position="top") plot = plot + theme(legend.position="top")
plot = plot + labs(y="P distance", x="Value") plot = plot + labs(y="P JSD", x="Value")
plot = plot + xlim(c(0,1)) + ylim(c(0, max(max(df_d$P_distance), 1))) plot = plot + xlim(c(0,1)) + ylim(c(0, max(max(df_d$P_JSD), 1)))
plot = plot + guides(col=FALSE) plot = plot + guides(col=FALSE)
plot = plot + scale_color_manual(values=colors) plot = plot + scale_color_manual(values=colors)
plot = plot + geom_vline( data = df_recall_sup09_per_meth, aes(xintercept = threshold, col=couple), size = 1, show.legend = NA, linetype="dashed") plot = plot + geom_vline( data = df_recall_sup09_per_meth, aes(xintercept = threshold, col=couple), size = 1, show.legend = NA, linetype="dashed")
...@@ -353,7 +353,21 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi ...@@ -353,7 +353,21 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
plot = plot + scale_shape_manual(values = c(16,17)) plot = plot + scale_shape_manual(values = c(16,17))
plot = plot + facet_grid(couple ~ methode) plot = plot + facet_grid(couple ~ methode)
plot = plot + theme(axis.text.x = element_text(angle = 45, hjust = 1)) plot = plot + theme(axis.text.x = element_text(angle = 45, hjust = 1))
plot_value_distance = plot plot_value_JSD = plot
plot = ggplot(df_d, aes(y=P_ED, x=value, shape = val_H0Ha, col = val_H0Ha))
plot = plot + theme_bw()
plot = plot + theme(legend.position="top")
plot = plot + labs(y="P ED", x="Value")
plot = plot + xlim(c(0,1)) + ylim(c(0, max(max(df_d$P_ED), 1)))
plot = plot + guides(col=FALSE)
plot = plot + scale_color_manual(values=colors)
plot = plot + geom_vline( data = df_recall_sup09_per_meth, aes(xintercept = threshold, col=couple), size = 1, show.legend = NA, linetype="dashed")
plot = plot + geom_point(size=1.2, alpha=0.7, stroke=0.01)
plot = plot + scale_shape_manual(values = c(16,17))
plot = plot + facet_grid(couple ~ methode)
plot = plot + theme(axis.text.x = element_text(angle = 45, hjust = 1))
plot_value_ED = plot
plot = ggplot(subset(df_d,val_H0Ha=="value_Ha"), aes(y=entropy_C2, x=value, shape = val_H0Ha, col = val_H0Ha)) plot = ggplot(subset(df_d,val_H0Ha=="value_Ha"), aes(y=entropy_C2, x=value, shape = val_H0Ha, col = val_H0Ha))
plot = plot + theme_bw() plot = plot + theme_bw()
...@@ -398,22 +412,23 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi ...@@ -398,22 +412,23 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
save_plot(paste0(opt$out,suffix,".value_distance_per_meth.pdf"), save_plot(paste0(opt$out,suffix,".value_distance_per_meth.pdf"),
plot_value_distance, plot_value_JSD,
ncol = 0.4* length(unique(df_d$methode)), ncol = 0.4* length(unique(df_d$methode)),
nrow = 0.4* length(unique(df_d$couple)), nrow = 0.4* length(unique(df_d$couple)),
base_aspect_ratio = 1, base_aspect_ratio = 1,
limitsize = FALSE limitsize = FALSE
) )
plot = plot_grid(plot_recall_precision,plot_max_MCC,plot_value_distance,plot_entropy_C2_C1,plot_entropy_C1,plot_entropy_C2, plot = plot_grid(plot_recall_precision,plot_max_MCC,plot_value_JSD,plot_value_ED,plot_entropy_C2_C1,plot_entropy_C1,plot_entropy_C2,
labels = c("A", "B","C","D","E","F"), labels = c("A", "B","C","D","E","F"),
rel_heights = c(length(unique(df_out$couple))*0.8,3,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8), rel_heights = c(length(unique(df_out$couple))*0.8,3,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8,length(unique(df_out$couple))*0.8),
nrow=6) nrow=7
)
save_plot(paste0(opt$out,suffix,".pdf"), save_plot(paste0(opt$out,suffix,".pdf"),
plot, plot,
ncol = 0.4* length(unique(df_out_melt$methode)), ncol = 0.4* length(unique(df_out_melt$methode)),
nrow = length(unique(df_out$couple)) * 0.5 * 5 + 1 , nrow = length(unique(df_out$couple)) * 0.5 * 6 + 1 ,
base_aspect_ratio = 1, base_aspect_ratio = 1,
limitsize = FALSE limitsize = FALSE
) )
......
...@@ -152,7 +152,7 @@ def entropy_p(p): ...@@ -152,7 +152,7 @@ def entropy_p(p):
MESSAGE("Preparing a dataframe for every bin...") MESSAGE("Preparing a dataframe for every bin...")
# columns = ["p1_" + str(i) for i in range(20)] + ["p2_" + str(i) for i in range(20)] + ["distance"] # columns = ["p1_" + str(i) for i in range(20)] + ["p2_" + str(i) for i in range(20)] + ["distance"]
columns = ["p1", "p2", "distance","entropy_p1", "entropy_p2"] columns = ["p1", "p2", "distance","entropy_p1", "entropy_p2", "distance_eucl"]
pair_bins = [pd.DataFrame(columns = columns) for x in range(nb_bins)] pair_bins = [pd.DataFrame(columns = columns) for x in range(nb_bins)]
MESSAGE("Picking profile pairs and computing distances...") MESSAGE("Picking profile pairs and computing distances...")
...@@ -172,7 +172,7 @@ while min(map(lambda b: b.shape[0], pair_bins)) < binsize: ...@@ -172,7 +172,7 @@ while min(map(lambda b: b.shape[0], pair_bins)) < binsize:
for i in range(nb_bins): for i in range(nb_bins):
if in_bin(dist, i) and pair_bins[i].shape[0] < binsize: if in_bin(dist, i) and pair_bins[i].shape[0] < binsize:
new_row = [p1["i"], p2["i"], dist, entropy_p(p1["p"]), entropy_p(p2["p"])] new_row = [p1["i"], p2["i"], dist, entropy_p(p1["p"]), entropy_p(p2["p"]), euclidian_distance(p1["p"], p2["p"])]
pair_bins[i].loc[len(pair_bins[i])] = new_row pair_bins[i].loc[len(pair_bins[i])] = new_row
nb_ok += 1 nb_ok += 1
break break
......
...@@ -154,7 +154,7 @@ if args.multinomial : ...@@ -154,7 +154,7 @@ if args.multinomial :
if args.fna_infos : if args.fna_infos :
df_fna_infos = pd.read_csv(args.fna_infos, sep="\t", names = ["C1","C2","P_distance","entropy_C1","entropy_C2"]) df_fna_infos = pd.read_csv(args.fna_infos, sep="\t", names = ["C1","C2","P_JSD","entropy_C1","entropy_C2", "P_ED"])
df_fna_infos["Sites"] = df_fna_infos.index + 1 df_fna_infos["Sites"] = df_fna_infos.index + 1
#df_fna_infos = df_fna_infos[['Sites','P_distance']] #df_fna_infos = df_fna_infos[['Sites','P_distance']]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment