Commit 0030e37e authored by Carine Rey's avatar Carine Rey
Browse files

repo refactoring

parent 03815065
......@@ -13,10 +13,13 @@ let repo ~preview dataset_l =
let model_prefix = dataset.model_prefix in
let tree_prefix = dataset.tree_prefix in
let repo_ready_data = Ready_dataset.repo dataset.dataset in
let repo_raw_data = if preview then Raw_dataset.repo ~prefix:model_prefix (Ready_dataset.to_raw dataset.dataset) else [] in
let repo_raw_data = Raw_dataset.repo ~prefix:model_prefix (Ready_dataset.to_raw dataset.dataset) in
List.concat [
Repo.shift "simulated_data" (Repo.shift (tree_prefix ^"_"^model_prefix) repo_raw_data);
Repo.shift "simulated_data_debug" (Repo.shift tree_prefix (Repo.shift model_prefix repo_ready_data));
]
Repo.shift "minimal" (Repo.shift (tree_prefix ^"_"^model_prefix) repo_raw_data);
Repo.shift "debug" repo_ready_data;
]
|> Repo.shift "dataset"
|> Repo.shift model_prefix
|> Repo.shift "Results_per_hypothesis"
)
|> List.concat
......@@ -119,10 +119,10 @@ let derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~ns ~no_Ne ~n
);
(if ne_test then
[
H0_BigNeInSmallNe;
H0_SmallNeInBigNe;
HaPC_BigNeInSmallNe;
HaPC_SmallNeInBigNe;
H0_BigNeInSmallNe;
H0_SmallNeInBigNe;
HaPC_BigNeInSmallNe;
HaPC_SmallNeInBigNe;
]
else
[]
......@@ -200,7 +200,6 @@ let repo_of_dataset_results_l ~dataset_results_l =
in
repo
|> Repo.shift dataset_results.model_prefix
|> Repo.shift dataset_results.tree_prefix
)
|> List.concat
......@@ -287,9 +286,12 @@ let derive_profile ?(indir = "") ?(ns = 0) ~preview ~fast_mode ~no_Ne ~ne_test ~
let post_analyses_res = Post_analyses.post_analyses_res_of_dataset_results_l ~dataset_results_l in
let repo_per_tree = [
Dataset.repo dataset_l ~preview ;
repo_of_dataset_results_l ~dataset_results_l ;
Repo.shift tree_prefix (Post_analyses.repo_of_post_analyses_res ~prefix:tree_prefix ~post_analyses_res);
] |> List.concat
Repo.shift "Results_per_hypothesis" (repo_of_dataset_results_l ~dataset_results_l);
Post_analyses.repo_of_post_analyses_res ~prefix:tree_prefix ~post_analyses_res;
]
|> List.concat
|> Repo.shift tree_prefix
|> Repo.shift "Results_per_tree"
in
(repo_per_tree, post_analyses_res)
)
......@@ -347,7 +349,7 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe
repo_of_dataset_results_l ~dataset_results_l ;
] |> List.concat
in
let repo = sim_repo_l @ repo_real_trees in
let repo = (Repo.shift "Simulated_datasets" sim_repo_l) @ (Repo.shift "Real_datasets" repo_real_trees) in
Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo
let simulation_command =
......
......@@ -59,37 +59,37 @@ let is_hyp ~hyp (dataset_results :dataset_res) =
let build_cmd_t_choices (opt_name : string) mr_option =
match mr_option with
| Some x -> [opt opt_name dep x]
| None -> []
| Some x -> [opt opt_name dep x]
| None -> []
let make_t_choices ~h0_mr ~h0_NeBig_mr ~h0_NeSmall_mr ~haPCOC_mr ~haPC_mr ~haPC_NeBig_mr ~haPC_NeSmall_mr ~h0_NeBigInSmall_mr
~h0_NeSmallInBig_mr ~haPC_NeBigInSmall_mr ~haPC_NeSmallInBig_mr () : post_analyses_dir directory workflow =
let env = r_env in
let out = dest // "out" in
let cmd_mr = List.map [
("--H0" , h0_mr );
("--H0NeBig" , h0_NeBig_mr );
("--H0NeSmall" , h0_NeSmall_mr );
("--H0NeBigInSmall" , h0_NeBigInSmall_mr );
("--H0NeSmallInBig" , h0_NeSmallInBig_mr );
("--HaPCOC" , haPCOC_mr );
("--HaPC" , haPC_mr );
("--HaPCNeBig" , haPC_NeBig_mr );
("--HaPCNeSmall" , haPC_NeSmall_mr);
("--HaPCNeBigInSmall" , haPC_NeBigInSmall_mr );
("--HaPCNeSmallInBig" , haPC_NeSmallInBig_mr);
] ~f:(fun (opt_name, mr_option) -> build_cmd_t_choices opt_name mr_option)
|> List.concat in
("--H0" , h0_mr );
("--H0NeBig" , h0_NeBig_mr );
("--H0NeSmall" , h0_NeSmall_mr );
("--H0NeBigInSmall" , h0_NeBigInSmall_mr );
("--H0NeSmallInBig" , h0_NeSmallInBig_mr );
("--HaPCOC" , haPCOC_mr );
("--HaPC" , haPC_mr );
("--HaPCNeBig" , haPC_NeBig_mr );
("--HaPCNeSmall" , haPC_NeSmall_mr);
("--HaPCNeBigInSmall" , haPC_NeBigInSmall_mr );
("--HaPCNeSmallInBig" , haPC_NeSmallInBig_mr);
] ~f:(fun (opt_name, mr_option) -> build_cmd_t_choices opt_name mr_option)
|> List.concat in
workflow ~descr:"post_analyses.t_choices" [
docker env (
and_list [
mkdir_p dest ;
cmd "Rscript" ([
[file_dump (string Scripts.calc_t_per_meth) ;
opt "--out " ident out;
] ;
cmd_mr ;
] |> List.concat) ;
[file_dump (string Scripts.calc_t_per_meth) ;
opt "--out " ident out;
] ;
cmd_mr ;
] |> List.concat) ;
])
]
......@@ -133,8 +133,8 @@ let group_simu_infos ~simu_infos_l : simu_infos directory workflow =
let plot_trees ~reinfered_tree_l : plot_trees directory workflow =
let env = r_env in
let cmd_cp_l = List.map reinfered_tree_l ~f:(fun rt -> [
cmd "cp" [dep rt.reinfered_tree ; tmp // (rt.tree_prefix ^"@"^ rt.model_prefix ^ ".nw")];
cmd "cp" [dep rt.input_tree ; tmp // (rt.tree_prefix ^"@input_tree.nw")]
cmd "cp" [dep rt.reinfered_tree ; tmp // (rt.tree_prefix ^"@"^ rt.model_prefix ^ ".nw")];
cmd "cp" [dep rt.input_tree ; tmp // (rt.tree_prefix ^"@input_tree.nw")]
]) |> List.concat
in
let out = dest // "out" in
......@@ -169,26 +169,26 @@ type res_all_hyp = {
h0_NeSmallInBig_res : dataset_res option ;
ha_PC_NeBigInSmall_res : dataset_res option ;
ha_PC_NeSmallInBig_res : dataset_res option ;
}
}
let make_t_choices_per_couple {h0_res; h0_NeBig_res; h0_NeSmall_res; ha_PC_res; ha_PCOC_res; ha_PC_NeBig_res ; ha_PC_NeSmall_res;
h0_NeBigInSmall_res; h0_NeSmallInBig_res; ha_PC_NeBigInSmall_res; ha_PC_NeSmallInBig_res} =
let h0_mr = get_merged_results_opt h0_res in
let h0_NeBig_mr = get_merged_results_opt h0_NeBig_res in
let h0_NeSmall_mr = get_merged_results_opt h0_NeSmall_res in
let h0_NeBigInSmall_mr = get_merged_results_opt h0_NeBigInSmall_res in
let h0_NeSmallInBig_mr = get_merged_results_opt h0_NeSmallInBig_res in
let h0_mr = get_merged_results_opt h0_res in
let h0_NeBig_mr = get_merged_results_opt h0_NeBig_res in
let h0_NeSmall_mr = get_merged_results_opt h0_NeSmall_res in
let h0_NeBigInSmall_mr = get_merged_results_opt h0_NeBigInSmall_res in
let h0_NeSmallInBig_mr = get_merged_results_opt h0_NeSmallInBig_res in
let haPCOC_mr = get_merged_results_opt ha_PCOC_res in
let haPCOC_mr = get_merged_results_opt ha_PCOC_res in
let haPC_mr = get_merged_results_opt ha_PC_res in
let haPC_NeBig_mr = get_merged_results_opt ha_PC_NeBig_res in
let haPC_NeSmall_mr = get_merged_results_opt ha_PC_NeSmall_res in
let haPC_NeBigInSmall_mr = get_merged_results_opt ha_PC_NeBigInSmall_res in
let haPC_NeSmallInBig_mr = get_merged_results_opt ha_PC_NeSmallInBig_res in
let haPC_mr = get_merged_results_opt ha_PC_res in
let haPC_NeBig_mr = get_merged_results_opt ha_PC_NeBig_res in
let haPC_NeSmall_mr = get_merged_results_opt ha_PC_NeSmall_res in
let haPC_NeBigInSmall_mr = get_merged_results_opt ha_PC_NeBigInSmall_res in
let haPC_NeSmallInBig_mr = get_merged_results_opt ha_PC_NeSmallInBig_res in
make_t_choices ~h0_mr ~h0_NeBig_mr ~h0_NeSmall_mr ~haPCOC_mr ~haPC_mr ~haPC_NeBig_mr ~haPC_NeSmall_mr ~h0_NeBigInSmall_mr
make_t_choices ~h0_mr ~h0_NeBig_mr ~h0_NeSmall_mr ~haPCOC_mr ~haPC_mr ~haPC_NeBig_mr ~haPC_NeSmall_mr ~h0_NeBigInSmall_mr
~h0_NeSmallInBig_mr ~haPC_NeBigInSmall_mr ~haPC_NeSmallInBig_mr ()
......@@ -286,7 +286,7 @@ let plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile_prefix : se
let merged_results_dir = tmp // "merged_results_dir" in
let out = dest // "out" in
let cmd_cp_t_choices_l = List.map t_choices_l ~f:(fun t_choices ->
cmd "cp" [dep t_choices.t_choices_max ; t_choices_dir // (t_choices.tree_prefix ^ ".tsv")]
cmd "cp" [dep t_choices.t_choices_recall09 ; t_choices_dir // (t_choices.tree_prefix ^ ".tsv")]
)
in
let cmd_cp_merged_results_l = List.map dataset_results_l ~f:(fun dataset_results ->
......@@ -324,20 +324,22 @@ let repo_post_analyses_all_trees_of_all_post_analyses_per_tree ~profile_prefix ~
post_analyses_res.dataset_results_l) |> List.concat
in
let sens_spe_t_choices_plot = plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile_prefix in
Repo.[
item ["sens_spe.pdf"] (sens_spe_t_choices_plot / selector ["out.sens_spe_auto_t.pdf"]);
item ["all_t_choices.pdf"] (sens_spe_t_choices_plot / selector ["out.max_t_per_tree.pdf"]);
item ["sens_spe.tsv"] (sens_spe_t_choices_plot / selector ["out.sens_spe_auto_t.tsv"]);
item ["all_t_choices.tsv"] (sens_spe_t_choices_plot / selector ["out.max_t_per_tree.tsv"]);
]
[Repo.[
item ["sens_spe.tsv"] (sens_spe_t_choices_plot / selector ["out.sens_spe_auto_t.tsv"]);
item ["all_t_choices.tsv"] (sens_spe_t_choices_plot / selector ["out.t_per_tree.tsv"]);
] |> Repo.shift "pdf_tsv" ;
Repo.[
item ["sens_spe.pdf"] (sens_spe_t_choices_plot / selector ["out.sens_spe_auto_t.pdf"]);
item ["all_t_choices.pdf"] (sens_spe_t_choices_plot / selector ["out.t_per_tree.pdf"]);
]
] |> List.concat
let repo_of_post_analyses_simu ~post_analyses_simu =
[
Repo.[
item ["hypothesis_validation.pdf"] post_analyses_simu.simu_infos_plot ;
item ["trees_validation.pdf"] post_analyses_simu.trees_plot;
] |> Repo.shift "simu_infos"
]
;
(List.map post_analyses_simu.simu_infos_l ~f:(fun simu_infos ->
match simu_infos.simu_infos with
......@@ -345,10 +347,12 @@ let repo_of_post_analyses_simu ~post_analyses_simu =
| Some w ->
Repo.[
item [simu_infos.tree_prefix ^ "@" ^ simu_infos.model_prefix ^ ".tsv"] w
] |> Repo.shift "simu_infos"
] |> Repo.shift simu_infos.tree_prefix
|> Repo.shift "tsv"
) |> List.concat
);
] |> List.concat
|> Repo.shift "Simulation_details"
let repo_of_post_analyses_res ~prefix ~post_analyses_res =
[
......@@ -361,7 +365,7 @@ let repo_of_post_analyses_res ~prefix ~post_analyses_res =
item [prefix ^ ".t_choices.complete.tsv"] w.t_choices_complete ;
item [prefix ^ ".t_choices.pdf"] w.t_choices_plot ;
item [prefix ^ ".t_choices.condensed.pdf"] w.t_choices_condensed_plot ;
] |> Repo.shift "t_choices"
]
);
(*(
match post_analyses_res.auto_t_plot_l with
......@@ -374,6 +378,6 @@ let repo_of_post_analyses_res ~prefix ~post_analyses_res =
]
)|> List.concat
|> Repo.shift "auto_t_pdf"
);*)
);*)
] |> List.concat
......@@ -50,7 +50,7 @@ let cat_file ~(f_l: text_file workflow list) : text_file workflow =
let profile_l_of_splitted_profile ~nb_cat ~nb_sites profile_fn =
let profile_f = input profile_fn in
let prefix = Filename.chop_extension profile_fn in
let prefix = Filename.chop_extension (Filename.basename profile_fn) in
let dist_bins = match nb_cat with
| 3 -> "[0.01,0.4],[0.4,0.6],[0.6,2]"
| 1 -> "[0.01,2]"
......
......@@ -45,7 +45,6 @@ let repo rd =
;
]
|> List.concat
|> Repo.shift "ready_dataset"
let to_raw { input_tree ; fna ; fna_infos} =
{ Raw_dataset.input_tree ; fna ; fna_infos}
......
......@@ -8,6 +8,8 @@ library("reshape2")
library("ggplot2")
library("cowplot")
date = format(Sys.time(), format="%Y-%m-%d %X")
option_list = list(
make_option(c("--H0") , type="character", default=NULL, help="merged_results H0", metavar="character"),
make_option(c("--H0NeBig") , type="character", default=NULL, help="merged_results H0NeBig", metavar="character"),
......@@ -89,9 +91,10 @@ df_d_H0HaPC_NeSmall = build_df_dist_couple(df_H0NeSmall_melt, df_HaPCNeSmall_mel
df_d_H0HaPC_NeBigInSmall = build_df_dist_couple(df_H0NeBigInSmall_melt, df_HaPCNeBigInSmall_melt, "H0/HaPC NeBigInSmall")
df_d_H0HaPC_NeSmallInBig = build_df_dist_couple(df_H0NeSmallInBig_melt, df_HaPCNeSmallInBig_melt, "H0/HaPC NeSmallInBig")
df_d_H0HaPCOC = build_df_dist_couple(df_H0_melt, df_HaPCOC_melt, "H0/HaPCOC")
df_d_H0NeBigHaPCOC = build_df_dist_couple(df_H0NeBig_melt, df_HaPCOC_melt, "H0NeBig/HaPCOC")
df_d = rbind.data.frame(df_d_H0HaPC, df_d_H0HaPCOC,df_d_H0HaPC_NeBig,df_d_H0HaPC_NeSmall,
df_d_H0HaPC_NeBigInSmall,df_d_H0HaPC_NeSmallInBig)
df_d_H0HaPC_NeBigInSmall,df_d_H0HaPC_NeSmallInBig,df_d_H0NeBigHaPCOC)
df_d = df_d[order(df_d$methode),]
......@@ -152,9 +155,11 @@ df_H0HaPC_NeSmall = build_df_couple(df_H0NeSmall_melt, df_HaPCNeSmall_melt, "H0/
df_H0HaPC_NeBigInSmall = build_df_couple(df_H0NeBigInSmall_melt, df_HaPCNeBigInSmall_melt, "H0/HaPC NeBigInSmall")
df_H0HaPC_NeSmallInBig = build_df_couple(df_H0NeSmallInBig_melt, df_HaPCNeSmallInBig_melt, "H0/HaPC NeSmallInBig")
df_H0HaPCOC = build_df_couple(df_H0_melt, df_HaPCOC_melt, "H0/HaPCOC")
df_H0NeBigHaPCOC = build_df_couple(df_H0NeBig_melt, df_HaPCOC_melt, "H0NeBig/HaPCOC")
df = rbind.data.frame(df_H0HaPC, df_H0HaPCOC,df_H0HaPC_NeBig,df_H0HaPC_NeSmall,
df_H0HaPC_NeBigInSmall,df_H0HaPC_NeSmallInBig)
df_H0HaPC_NeBigInSmall,df_H0HaPC_NeSmallInBig,
df_H0NeBigHaPCOC)
print(head(df))
print(tail(df))
......@@ -200,7 +205,9 @@ print(summary(df_out))
print("prep plot max mcc")
df_max_mcc_per_method = do.call(rbind, lapply(split(df_out,paste0(df_out$methode,df_out$couple)),
function(x) {return(x[which.max(x$MCC),c("couple","methode", "threshold", "MCC","sensitivity","specificity","precision")])}))
function(x) {
return(x[which.max(x$MCC),c("couple","methode", "threshold", "MCC","sensitivity","specificity","precision")])
}))
print(df_max_mcc_per_method)
......@@ -211,17 +218,24 @@ df_max_mcc_per_method_2$variable="MCC"
########################################################################
print("prep plot recall_precision_per_meth")
df_recall_sup09_per_meth = do.call(rbind, lapply(split(df_out,paste0(df_out$methode,df_out$couple)),
print(df_out[is.na(df_out$methode),])
print(table(df_out$methode))
print(table(df_out$methode, df_out$couple))
df_recall_sup09_per_meth = do.call(rbind, lapply(split(df_out,paste0(df_out$methode," ", df_out$couple)),
function(x) {
x$precision[is.na(x$precision)] = 0
x2 = x[x$precision > 0.9,]
if (nrow(x2) > 0) {
return(x2[which.max(x2$sensitivity),c("couple","methode", "threshold", "MCC","sensitivity","specificity","precision")])
res = x2[which.max(x2$sensitivity),c("couple","methode", "threshold", "MCC","sensitivity","specificity","precision")]
} else {
x = x[1,c("couple","methode", "threshold", "MCC","sensitivity","specificity","precision")]
print(x)
x[,c("threshold", "MCC","sensitivity","specificity","precision")] = NA
return(x)
res = x
}
return(res)
}))
print(df_recall_sup09_per_meth)
......@@ -276,6 +290,7 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
plot = plot + geom_hline( aes(yintercept = 0.9), col="black" , size = 1, show.legend = NA,linetype="dashed")
plot = plot + facet_grid(couple ~ methode)
plot = plot + theme(axis.text.x = element_text(angle = 45, hjust = 1))
plot = plot + ggtitle(paste0("Threshold definition such as precision > 0.9 - (", date , ")" ))
plot_recall_precision = plot
......@@ -305,7 +320,7 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
plot_max_MCC = plot
save_plot(paste0(opt$out,suffix,".max_MCC_per_meth.pdf"),
save_plot(paste0(opt$out,suffix,".indicator_per_meth.pdf"),
plot_max_MCC,
ncol = 0.4 * length(unique(df_out_melt$methode)),
nrow = 1.7,
......
......@@ -40,19 +40,19 @@ files = files[grep("tsv", files)]
files_split = strsplit(files, ".", fixed = T)
files_df = as.data.frame(do.call(rbind, files_split))
print(files_df)
files_df_ok = data.frame(files= paste0(input_dir,"/",files), tree = gsub(".tsv", "",files), bl = "NA", profil = opt$profil)
files_df_ok = data.frame(files= paste0(input_dir,"/",files), tree = gsub(".tsv", "",files), profil = opt$profil)
#files_df_ok = data.frame(files= paste0(input_dir,"/",files), tree = files_df$V1, bl = files_df$V1, profil = opt$profil)
condensed_meths = c("PCOC","diffsel","Identical_LG08","Mutinomial_1MinusLRT","Tdg09_1MinusFDR","Msd_1MinusP","Topological_LG08")
read_dir = function(x) {
file = x["files"]
tree = x["tree"]
profil = x["profil"]
bl = x["bl"]
df = read.csv(file, sep = "\t", header = T)
df$tree = tree
df$profil = profil
df$bl = bl
df = df[df$methode %in% condensed_meths, ]
return(df)
}
......@@ -62,41 +62,56 @@ alpha = 0.7
x_labs = ""
y_labs = "Threshold"
print(head(df))
df_max_mcc_per_method = do.call(rbind, lapply(split(df,df$methode),
function(x) {return(x[which.max(x$threshold),c("methode", "threshold","tree","profil","couple")])}))
df_tmp = subset(df, couple == "H0/HaPCOC")
df_t_per_method = do.call(rbind, lapply(split(df_tmp, paste0(df_tmp$methode, df_tmp$tree)),
function(x) {return(x[which.max(x$threshold),c("methode", "threshold","tree","profil","couple")])}))
df_max_mcc_per_method_2 = df_max_mcc_per_method
df_max_mcc_per_method_2 [ df_max_mcc_per_method_2$tree == unique(df_max_mcc_per_method_2$tree)[1],]
df_t_per_method_2 = df_t_per_method
df_t_per_method_2 [ df_t_per_method_2$tree == unique(df_t_per_method_2$tree)[1],]
df2 = df
df2$retained_t = "no"
df2$retained_t[df2$couple == "H0/HaPCOC"] = "yes"
plot = ggplot(df, aes(x=tree, y=threshold, col=couple, shape = profil)) + theme_bw() + labs(x=x_labs, y=y_labs) + ylim(c(0,1.5))
plot = ggplot(df2, aes(x=tree, y=threshold, col=couple, shape = retained_t)) + theme_bw() + labs(x=x_labs, y=y_labs) + ylim(c(0,1))
plot = plot + theme(axis.text.x = element_text(angle = 0, hjust = 1))
plot = plot + geom_point()
plot = plot + geom_hline( data = df_max_mcc_per_method, aes(yintercept = threshold), col="red", alpha=alpha, show.legend = NA,linetype="dotted")
plot = plot + geom_label( data = df_max_mcc_per_method_2, nudge_y = 0.25, aes(label = threshold), col="red", alpha=alpha, show.legend = NA)
plot = plot + geom_point(position= position_dodge(.5), size = 3)
plot = plot + theme(legend.position="top")
#plot = plot + guides(shape=FALSE)
plot = plot + scale_shape_manual(values = c(19,17))
plot = plot + guides(col=guide_legend(ncol=2))
plot = plot + guides(shape=guide_legend(nrow=2))
#plot = plot + geom_point( data = df_t_per_method, aes(x=tree, y=threshold, group = couple), size = 5, shape = 1, col="red", alpha=alpha, position= position_dodge(.5))
#plot = plot + geom_label( data = df_t_per_method_2, nudge_y = 0.25, aes(label = threshold), col="red", alpha=alpha, show.legend = NA)
plot = plot + facet_grid(methode ~ .) + coord_flip()
plot = plot + ggtitle("t max per tree")
plot = plot + ggtitle("Threshold definition such as\nprecision > 0.9 per H0*/Ha* couple ")
output_pdf = paste0(opt$out,".max_t_per_tree.pdf")
output_pdf = paste0(opt$out,".t_per_tree.pdf")
save_plot(output_pdf,
plot,
ncol = 1,
nrow = 0.3 * length(unique(df$methode)),
nrow = 0.5 * length(unique(df$methode)),
base_aspect_ratio = 1.5,
limitsize = FALSE
)
output_tsv = paste0(opt$out,".max_t_per_tree.tsv")
output_tsv = paste0(opt$out,".t_per_tree.tsv")
write.table(df, file=output_tsv, row.names=FALSE, quote=F, sep = "\t")
####################
# Spe en sens
####################
df_t = df
df_t = df_tmp
print("df_t")
print(df_t)
parse_file = function(df_m, df_t) {
df_m_melt = melt(df_m)
......@@ -104,10 +119,10 @@ parse_file = function(df_m, df_t) {
df_m_melt$t = as.numeric(as.character(df_m_melt$t))
df_m_melt$is_P = df_m_melt$value > df_m_melt$t
P = tapply(df_m_melt$is_P, df_m_melt$variable , function(x) {sum(x, na.rm=T)})
P[df_m_melt$variable[is.na(df_m_melt$t)]] = NA
n_sites = dim(df_m)[1]
df = data.frame(methode = names(P), P = P)
df =merge ( df, df_t, by="methode")
df$P = df$P / n_sites
return(df)
}
......@@ -119,27 +134,29 @@ files = files[grep("tsv", files)]
files_split = strsplit(files, "@", fixed = T)
files_df = as.data.frame(do.call(rbind, files_split))
print(files_df)
files_df_ok = data.frame(files= paste0(input_dir2,"/",files), tree = files_df$V1, hyp = gsub(".tsv","",files_df$V2), bl = files_df$V1, profil = opt$profil)
files_df_ok = data.frame(files= paste0(input_dir2,"/",files), tree = files_df$V1, hyp = gsub(".tsv","",files_df$V2), profil = opt$profil)
read_dir = function(x) {
file = x["files"]
tree = x["tree"]
profil = x["profil"]
bl = x["bl"]
hyp = x["hyp"]
df_m = read.csv(file, sep = "\t", header = T, na.strings=c("NA","NaN", " ", ""))
print(head(df_m))
df = parse_file(df_m[,colnames(df_m) %in% df_t$methode], df_t[df_t$tree == tree, c("methode","threshold")])
df$tree = tree
df$profil = profil
df$bl = bl
df$hyp = hyp
df = df[df$methode %in% condensed_meths, ]
return(df)
}
df = do.call(rbind, apply(files_df_ok, 1, read_dir))
rownames(df) <- NULL
print(df)
hyps_from = unique(df$hyp)
hyps_to = rep("Specificity",length(hyps_from))
hyps_to[grep("Ha",hyps_from)] = "Sensitivity"
......@@ -153,7 +170,11 @@ x_labs = ""
y_labs = "Value"
plot = ggplot(df, aes(x=methode, y=P, fill=hyp)) + theme_bw() + labs(x=x_labs, y=y_labs) + ylim(c(0,1.1))
print(df)
plot_tree = function(tree) {
df_tmp = df[df$tree == tree, ]
plot = ggplot(df_tmp, aes(x=methode, y=P, fill=hyp)) + theme_bw() + labs(x=x_labs, y=y_labs) + ylim(c(0,1.1))
plot = plot + theme(axis.text.x = element_text(angle = 30, hjust = 1))
plot = plot + geom_bar(stat="identity", position=position_dodge(0.9), color= "black", alpha = 0.7)
plot = plot + geom_text( aes(y = P + 0.05, label = round(P,2)), angle = 90, hjust = 0, position=position_dodge(0.9))
......@@ -162,12 +183,16 @@ plot = plot + facet_grid(variable ~ tree)
plot = plot + ggtitle(paste0("Compa methodes - ",opt$profil, " - (", date , ")" ))
plot = plot + scale_color_manual(values=c('gray','black'), guide=FALSE)
plot = plot + scale_alpha(range=c(0.7,1), guide=FALSE)
}
plot_all <- plot_grid(plotlist = lapply(unique(df$tree),plot_tree),
labels="AUTO", nrow = length(unique(df$tree)))
output_pdf2 = paste0(opt$out,".sens_spe_auto_t.pdf")
save_plot(output_pdf2,
plot,
ncol = length(unique(df$hyp)) / 3 * length(unique(df$tree)),
nrow = 2,
plot_all,
ncol = length(unique(df$hyp)) * length(unique(df$methode)) / 30,
nrow = 2 * length(unique(df$tree)),
base_aspect_ratio = 1.5,
limitsize = FALSE
)
......
......@@ -78,13 +78,13 @@ plotlist = lapply(split(files_df_ok,paste0(files_df_ok$tree_prefix)), plot_tree_
plot_all <- plot_grid(plotlist = plotlist,
labels=NA, ncol = length(unique(files_df_ok$tree_prefix)))
labels=NA, nrow = length(unique(files_df_ok$tree_prefix)))
output_pdf = paste0(opt$out,".pdf")
save_plot(output_pdf,
plot_all,
ncol = length(unique(files_df_ok$tree_prefix)),
nrow = 5,
ncol = 2 ,
nrow = 10 * length(unique(files_df_ok$tree_prefix)),
base_aspect_ratio = 2,
limitsize = FALSE
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment