plot_sens_spe_all_trees.R 6.8 KB
Newer Older
Carine Rey's avatar
Carine Rey committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
#!/usr/bin/env Rscript

library("optparse")
library("reshape2")
library("ggplot2")
library("cowplot")
library("plyr")

option_list = list(
  make_option(c("-i","--input_dir"), type="character", default=NULL,
              help="Input dir", metavar="character"),
  make_option(c("-I","--input_dir2"), type="character", default=NULL,
              help="Input dir2", metavar="character"),
    make_option(c("-o","--out"), type="character", default="out",
              help="output prefix [default= %default]", metavar="character"),
    make_option(c("-p","--profil"), type="character", default="None",
              help="Profil name [default= %default]", metavar="character")
);

opt_parser = OptionParser(option_list=option_list);
opt = parse_args(opt_parser);

date = format(Sys.time(), format="%Y-%m-%d %X")

if (is.null(opt$input_dir)){
  print_help(opt_parser)
  stop("At least one argument must be supplied (input_dir)", call.=FALSE)
}

input_dir = opt$input_dir
input_dir2 = opt$input_dir2
## fun...


## program... max_t_per_tree

files = paste0(list.files(input_dir))
print(files)
files = files[grep("tsv", files)]
files_split = strsplit(files, ".", fixed = T)
files_df = as.data.frame(do.call(rbind, files_split))
print(files_df)
Carine Rey's avatar
Carine Rey committed
43
files_df_ok = data.frame(files= paste0(input_dir,"/",files), tree = gsub(".tsv", "",files),  profil = opt$profil)
44
#files_df_ok = data.frame(files= paste0(input_dir,"/",files), tree = files_df$V1, bl = files_df$V1,  profil = opt$profil)
Carine Rey's avatar
Carine Rey committed
45

Carine Rey's avatar
Carine Rey committed
46
condensed_meths = c("PCOC","diffsel","Identical_LG08","Mutinomial_1MinusLRT","Tdg09_1MinusFDR","Msd_1MinusP","Topological_LG08")
Carine Rey's avatar
Carine Rey committed
47 48 49 50 51 52 53 54

read_dir = function(x) {
  file = x["files"]
  tree = x["tree"]
  profil = x["profil"]
  df = read.csv(file, sep = "\t", header = T)
  df$tree = tree
  df$profil = profil
Carine Rey's avatar
Carine Rey committed
55
  df = df[df$methode %in% condensed_meths, ]
Carine Rey's avatar
Carine Rey committed
56 57 58 59 60 61 62 63 64
  return(df)
}

df = do.call(rbind, apply(files_df_ok, 1, read_dir))

alpha = 0.7
x_labs = ""
y_labs = "Threshold"

Carine Rey's avatar
Carine Rey committed
65
print(head(df))
Carine Rey's avatar
Carine Rey committed
66 67


Carine Rey's avatar
Carine Rey committed
68 69 70
df_tmp = subset(df, couple == "H0/HaPCOC")
df_t_per_method = do.call(rbind, lapply(split(df_tmp, paste0(df_tmp$methode, df_tmp$tree)),
                                            function(x) {return(x[which.max(x$threshold),c("methode", "threshold","tree","profil","couple")])}))
Carine Rey's avatar
Carine Rey committed
71 72


Carine Rey's avatar
Carine Rey committed
73 74
df_t_per_method_2 = df_t_per_method
df_t_per_method_2 [ df_t_per_method_2$tree == unique(df_t_per_method_2$tree)[1],]
Carine Rey's avatar
Carine Rey committed
75

Carine Rey's avatar
Carine Rey committed
76 77 78
df2 = df
df2$retained_t = "no"
df2$retained_t[df2$couple == "H0/HaPCOC"] = "yes"
Carine Rey's avatar
Carine Rey committed
79

Carine Rey's avatar
Carine Rey committed
80
plot = ggplot(df2, aes(x=tree, y=threshold, col=couple, shape = retained_t)) + theme_bw()  +  labs(x=x_labs, y=y_labs) + ylim(c(0,1))
Carine Rey's avatar
Carine Rey committed
81
plot = plot + theme(axis.text.x = element_text(angle = 0, hjust = 1))
Carine Rey's avatar
Carine Rey committed
82 83 84 85 86 87 88 89
plot = plot + geom_point(position= position_dodge(.5), size = 3)
plot = plot + theme(legend.position="top")
#plot = plot + guides(shape=FALSE)
plot = plot + scale_shape_manual(values = c(19,17))
plot = plot + guides(col=guide_legend(ncol=2))
plot = plot + guides(shape=guide_legend(nrow=2))
#plot = plot + geom_point( data = df_t_per_method, aes(x=tree, y=threshold, group = couple), size = 5, shape = 1, col="red", alpha=alpha, position= position_dodge(.5))
#plot = plot + geom_label( data = df_t_per_method_2, nudge_y = 0.25, aes(label = threshold), col="red", alpha=alpha, show.legend = NA)
Carine Rey's avatar
Carine Rey committed
90
plot = plot + facet_grid(methode ~ .) + coord_flip()
Carine Rey's avatar
Carine Rey committed
91
plot = plot + ggtitle("Threshold definition such as\nprecision > 0.9 per H0*/Ha* couple ")
Carine Rey's avatar
Carine Rey committed
92

Carine Rey's avatar
Carine Rey committed
93
output_pdf = paste0(opt$out,".t_per_tree.pdf")
Carine Rey's avatar
Carine Rey committed
94 95 96
save_plot(output_pdf,
          plot,
          ncol = 1,
Carine Rey's avatar
Carine Rey committed
97
          nrow = 0.5 * length(unique(df$methode)),
Carine Rey's avatar
Carine Rey committed
98 99 100 101
          base_aspect_ratio = 1.5,
          limitsize = FALSE
          )

Carine Rey's avatar
Carine Rey committed
102
output_tsv = paste0(opt$out,".t_per_tree.tsv")
Carine Rey's avatar
Carine Rey committed
103 104 105 106 107 108
write.table(df, file=output_tsv, row.names=FALSE, quote=F, sep = "\t")

####################
# Spe en sens
####################

Carine Rey's avatar
Carine Rey committed
109 110 111 112 113 114
df_t = df_tmp

print("df_t")
print(df_t)


Carine Rey's avatar
Carine Rey committed
115 116 117 118 119 120 121

parse_file = function(df_m, df_t) {
    df_m_melt = melt(df_m)
    df_m_melt$t = mapvalues(df_m_melt$variable, from=df_t$methode, to=df_t$threshold)
    df_m_melt$t = as.numeric(as.character(df_m_melt$t))
    df_m_melt$is_P = df_m_melt$value > df_m_melt$t
    P = tapply(df_m_melt$is_P, df_m_melt$variable , function(x) {sum(x, na.rm=T)})
Carine Rey's avatar
Carine Rey committed
122
    P[df_m_melt$variable[is.na(df_m_melt$t)]] = NA
Carine Rey's avatar
Carine Rey committed
123 124 125 126 127 128 129 130 131 132 133 134 135 136
    n_sites = dim(df_m)[1]
    df = data.frame(methode = names(P), P = P)
    df =merge ( df, df_t, by="methode")
    df$P = df$P / n_sites
return(df)
}

## program... sens_spe_plot

files = paste0(list.files(input_dir2))
files = files[grep("tsv", files)]
files_split = strsplit(files, "@", fixed = T)
files_df = as.data.frame(do.call(rbind, files_split))
print(files_df)
Carine Rey's avatar
Carine Rey committed
137
files_df_ok = data.frame(files= paste0(input_dir2,"/",files), tree = files_df$V1, hyp = gsub(".tsv","",files_df$V2), profil = opt$profil)
Carine Rey's avatar
Carine Rey committed
138 139 140 141 142 143 144 145


read_dir = function(x) {
  file = x["files"]
  tree = x["tree"]
  profil = x["profil"]
  hyp = x["hyp"]
  df_m = read.csv(file, sep = "\t", header = T, na.strings=c("NA","NaN", " ", ""))
Carine Rey's avatar
Carine Rey committed
146
  print(head(df_m))
Carine Rey's avatar
Carine Rey committed
147 148 149 150
  df = parse_file(df_m[,colnames(df_m) %in% df_t$methode], df_t[df_t$tree == tree, c("methode","threshold")])
  df$tree = tree
  df$profil = profil
  df$hyp = hyp
Carine Rey's avatar
Carine Rey committed
151
  df = df[df$methode %in% condensed_meths, ]
Carine Rey's avatar
Carine Rey committed
152 153 154 155 156 157
  return(df)
}

df = do.call(rbind, apply(files_df_ok, 1, read_dir))
rownames(df) <- NULL

Carine Rey's avatar
Carine Rey committed
158 159
print(df)

Carine Rey's avatar
Carine Rey committed
160
hyps_from = unique(df$hyp)
Carine Rey's avatar
typoq  
Carine Rey committed
161
hyps_to = rep("Specificity",length(hyps_from))
Carine Rey's avatar
Carine Rey committed
162 163
hyps_to[grep("Ha",hyps_from)] = "Sensitivity"

Carine Rey's avatar
typoq  
Carine Rey committed
164 165
df$variable = mapvalues(df$hyp, from=hyps_from, to = hyps_to)

Carine Rey's avatar
Carine Rey committed
166 167 168 169 170 171 172
df$P[df$variable == "Specificity"] = 1-df$P[df$variable == "Specificity"]


x_labs = ""
y_labs = "Value"


Carine Rey's avatar
Carine Rey committed
173 174 175 176 177
print(df)

plot_tree = function(tree) {
df_tmp = df[df$tree == tree, ]
plot = ggplot(df_tmp, aes(x=methode, y=P, fill=hyp)) + theme_bw() +  labs(x=x_labs, y=y_labs) + ylim(c(0,1.1)) 
Carine Rey's avatar
Carine Rey committed
178 179 180 181 182 183 184 185
plot = plot + theme(axis.text.x = element_text(angle = 30, hjust = 1))
plot = plot + geom_bar(stat="identity", position=position_dodge(0.9), color= "black", alpha = 0.7)
plot = plot + geom_text( aes(y = P + 0.05, label = round(P,2)), angle = 90,  hjust = 0, position=position_dodge(0.9))
plot = plot + geom_text( aes(y = 0.01, label = paste0("t=",threshold)), size = 3, angle = 90,  hjust = 0, position=position_dodge(0.9))
plot = plot + facet_grid(variable ~ tree)
plot = plot + ggtitle(paste0("Compa methodes - ",opt$profil, " - (", date , ")" ))
plot = plot + scale_color_manual(values=c('gray','black'), guide=FALSE)
plot = plot + scale_alpha(range=c(0.7,1), guide=FALSE)
Carine Rey's avatar
Carine Rey committed
186 187 188 189
}

plot_all <- plot_grid(plotlist = lapply(unique(df$tree),plot_tree),
                      labels="AUTO", nrow =  length(unique(df$tree)))
Carine Rey's avatar
Carine Rey committed
190 191 192

output_pdf2 = paste0(opt$out,".sens_spe_auto_t.pdf")
save_plot(output_pdf2,
Carine Rey's avatar
Carine Rey committed
193 194 195
          plot_all,
          ncol = length(unique(df$hyp)) * length(unique(df$methode)) / 30,
          nrow = 2 * length(unique(df$tree)),
Carine Rey's avatar
Carine Rey committed
196 197 198 199 200 201 202 203
          base_aspect_ratio = 1.5,
          limitsize = FALSE
          )

output_tsv = paste0(opt$out,".sens_spe_auto_t.tsv")
write.table(df, file=output_tsv, row.names=FALSE, quote=F, sep = "\t")