Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
0030e37e
Commit
0030e37e
authored
Aug 02, 2018
by
Carine Rey
Browse files
repo refactoring
parent
03815065
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
145 additions
and
97 deletions
+145
-97
lib/dataset.ml
lib/dataset.ml
+7
-4
lib/pipeline.ml
lib/pipeline.ml
+11
-9
lib/post_analyses.ml
lib/post_analyses.ml
+51
-47
lib/profile.ml
lib/profile.ml
+1
-1
lib/ready_dataset.ml
lib/ready_dataset.ml
+0
-1
lib/scripts/calc_t_per_meth.R
lib/scripts/calc_t_per_meth.R
+23
-8
lib/scripts/plot_sens_spe_all_trees.R
lib/scripts/plot_sens_spe_all_trees.R
+49
-24
lib/scripts/plot_trees.R
lib/scripts/plot_trees.R
+3
-3
No files found.
lib/dataset.ml
View file @
0030e37e
...
...
@@ -13,10 +13,13 @@ let repo ~preview dataset_l =
let
model_prefix
=
dataset
.
model_prefix
in
let
tree_prefix
=
dataset
.
tree_prefix
in
let
repo_ready_data
=
Ready_dataset
.
repo
dataset
.
dataset
in
let
repo_raw_data
=
if
preview
then
Raw_dataset
.
repo
~
prefix
:
model_prefix
(
Ready_dataset
.
to_raw
dataset
.
dataset
)
else
[]
in
let
repo_raw_data
=
Raw_dataset
.
repo
~
prefix
:
model_prefix
(
Ready_dataset
.
to_raw
dataset
.
dataset
)
in
List
.
concat
[
Repo
.
shift
"
simulated_data
"
(
Repo
.
shift
(
tree_prefix
^
"_"
^
model_prefix
)
repo_raw_data
);
Repo
.
shift
"
simulated_data_debug"
(
Repo
.
shift
tree_prefix
(
Repo
.
shift
model_prefix
repo_ready_data
))
;
Repo
.
shift
"
minimal
"
(
Repo
.
shift
(
tree_prefix
^
"_"
^
model_prefix
)
repo_raw_data
);
Repo
.
shift
"
debug"
repo_ready_data
;
]
|>
Repo
.
shift
"dataset"
|>
Repo
.
shift
model_prefix
|>
Repo
.
shift
"Results_per_hypothesis"
)
|>
List
.
concat
lib/pipeline.ml
View file @
0030e37e
...
...
@@ -200,7 +200,6 @@ let repo_of_dataset_results_l ~dataset_results_l =
in
repo
|>
Repo
.
shift
dataset_results
.
model_prefix
|>
Repo
.
shift
dataset_results
.
tree_prefix
)
|>
List
.
concat
...
...
@@ -287,9 +286,12 @@ let derive_profile ?(indir = "") ?(ns = 0) ~preview ~fast_mode ~no_Ne ~ne_test ~
let
post_analyses_res
=
Post_analyses
.
post_analyses_res_of_dataset_results_l
~
dataset_results_l
in
let
repo_per_tree
=
[
Dataset
.
repo
dataset_l
~
preview
;
repo_of_dataset_results_l
~
dataset_results_l
;
Repo
.
shift
tree_prefix
(
Post_analyses
.
repo_of_post_analyses_res
~
prefix
:
tree_prefix
~
post_analyses_res
);
]
|>
List
.
concat
Repo
.
shift
"Results_per_hypothesis"
(
repo_of_dataset_results_l
~
dataset_results_l
);
Post_analyses
.
repo_of_post_analyses_res
~
prefix
:
tree_prefix
~
post_analyses_res
;
]
|>
List
.
concat
|>
Repo
.
shift
tree_prefix
|>
Repo
.
shift
"Results_per_tree"
in
(
repo_per_tree
,
post_analyses_res
)
)
...
...
@@ -347,7 +349,7 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe
repo_of_dataset_results_l
~
dataset_results_l
;
]
|>
List
.
concat
in
let
repo
=
sim_repo_l
@
repo_real_trees
in
let
repo
=
(
Repo
.
shift
"Simulated_datasets"
sim_repo_l
)
@
(
Repo
.
shift
"Real_datasets"
repo_real_trees
)
in
Repo
.
build
~
outdir
~
np
~
mem
:
(
`GB
mem
)
~
logger
repo
let
simulation_command
=
...
...
lib/post_analyses.ml
View file @
0030e37e
...
...
@@ -169,7 +169,7 @@ type res_all_hyp = {
h0_NeSmallInBig_res
:
dataset_res
option
;
ha_PC_NeBigInSmall_res
:
dataset_res
option
;
ha_PC_NeSmallInBig_res
:
dataset_res
option
;
}
}
let
make_t_choices_per_couple
{
h0_res
;
h0_NeBig_res
;
h0_NeSmall_res
;
ha_PC_res
;
ha_PCOC_res
;
ha_PC_NeBig_res
;
ha_PC_NeSmall_res
;
h0_NeBigInSmall_res
;
h0_NeSmallInBig_res
;
ha_PC_NeBigInSmall_res
;
ha_PC_NeSmallInBig_res
}
=
...
...
@@ -286,7 +286,7 @@ let plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile_prefix : se
let
merged_results_dir
=
tmp
//
"merged_results_dir"
in
let
out
=
dest
//
"out"
in
let
cmd_cp_t_choices_l
=
List
.
map
t_choices_l
~
f
:
(
fun
t_choices
->
cmd
"cp"
[
dep
t_choices
.
t_choices_
max
;
t_choices_dir
//
(
t_choices
.
tree_prefix
^
".tsv"
)]
cmd
"cp"
[
dep
t_choices
.
t_choices_
recall09
;
t_choices_dir
//
(
t_choices
.
tree_prefix
^
".tsv"
)]
)
in
let
cmd_cp_merged_results_l
=
List
.
map
dataset_results_l
~
f
:
(
fun
dataset_results
->
...
...
@@ -324,20 +324,22 @@ let repo_post_analyses_all_trees_of_all_post_analyses_per_tree ~profile_prefix ~
post_analyses_res
.
dataset_results_l
)
|>
List
.
concat
in
let
sens_spe_t_choices_plot
=
plot_sens_spe_t_choices
~
t_choices_l
~
dataset_results_l
~
profile_prefix
in
[
Repo
.[
item
[
"sens_spe.tsv"
]
(
sens_spe_t_choices_plot
/
selector
[
"out.sens_spe_auto_t.tsv"
]);
item
[
"all_t_choices.tsv"
]
(
sens_spe_t_choices_plot
/
selector
[
"out.t_per_tree.tsv"
]);
]
|>
Repo
.
shift
"pdf_tsv"
;
Repo
.[
item
[
"sens_spe.pdf"
]
(
sens_spe_t_choices_plot
/
selector
[
"out.sens_spe_auto_t.pdf"
]);
item
[
"all_t_choices.pdf"
]
(
sens_spe_t_choices_plot
/
selector
[
"out.max_t_per_tree.pdf"
]);
item
[
"sens_spe.tsv"
]
(
sens_spe_t_choices_plot
/
selector
[
"out.sens_spe_auto_t.tsv"
]);
item
[
"all_t_choices.tsv"
]
(
sens_spe_t_choices_plot
/
selector
[
"out.max_t_per_tree.tsv"
]);
item
[
"all_t_choices.pdf"
]
(
sens_spe_t_choices_plot
/
selector
[
"out.t_per_tree.pdf"
]);
]
]
|>
List
.
concat
let
repo_of_post_analyses_simu
~
post_analyses_simu
=
[
Repo
.[
item
[
"hypothesis_validation.pdf"
]
post_analyses_simu
.
simu_infos_plot
;
item
[
"trees_validation.pdf"
]
post_analyses_simu
.
trees_plot
;
]
|>
Repo
.
shift
"simu_infos"
]
;
(
List
.
map
post_analyses_simu
.
simu_infos_l
~
f
:
(
fun
simu_infos
->
match
simu_infos
.
simu_infos
with
...
...
@@ -345,10 +347,12 @@ let repo_of_post_analyses_simu ~post_analyses_simu =
|
Some
w
->
Repo
.[
item
[
simu_infos
.
tree_prefix
^
"@"
^
simu_infos
.
model_prefix
^
".tsv"
]
w
]
|>
Repo
.
shift
"simu_infos"
]
|>
Repo
.
shift
simu_infos
.
tree_prefix
|>
Repo
.
shift
"tsv"
)
|>
List
.
concat
);
]
|>
List
.
concat
|>
Repo
.
shift
"Simulation_details"
let
repo_of_post_analyses_res
~
prefix
~
post_analyses_res
=
[
...
...
@@ -361,7 +365,7 @@ let repo_of_post_analyses_res ~prefix ~post_analyses_res =
item
[
prefix
^
".t_choices.complete.tsv"
]
w
.
t_choices_complete
;
item
[
prefix
^
".t_choices.pdf"
]
w
.
t_choices_plot
;
item
[
prefix
^
".t_choices.condensed.pdf"
]
w
.
t_choices_condensed_plot
;
]
|>
Repo
.
shift
"t_choices"
]
);
(*(
match post_analyses_res.auto_t_plot_l with
...
...
lib/profile.ml
View file @
0030e37e
...
...
@@ -50,7 +50,7 @@ let cat_file ~(f_l: text_file workflow list) : text_file workflow =
let
profile_l_of_splitted_profile
~
nb_cat
~
nb_sites
profile_fn
=
let
profile_f
=
input
profile_fn
in
let
prefix
=
Filename
.
chop_extension
profile_fn
in
let
prefix
=
Filename
.
chop_extension
(
Filename
.
basename
profile_fn
)
in
let
dist_bins
=
match
nb_cat
with
|
3
->
"[0.01,0.4],[0.4,0.6],[0.6,2]"
|
1
->
"[0.01,2]"
...
...
lib/ready_dataset.ml
View file @
0030e37e
...
...
@@ -45,7 +45,6 @@ let repo rd =
;
]
|>
List
.
concat
|>
Repo
.
shift
"ready_dataset"
let
to_raw
{
input_tree
;
fna
;
fna_infos
}
=
{
Raw_dataset
.
input_tree
;
fna
;
fna_infos
}
...
...
lib/scripts/calc_t_per_meth.R
View file @
0030e37e
...
...
@@ -8,6 +8,8 @@ library("reshape2")
library
(
"ggplot2"
)
library
(
"cowplot"
)
date
=
format
(
Sys.time
(),
format
=
"%Y-%m-%d %X"
)
option_list
=
list
(
make_option
(
c
(
"--H0"
)
,
type
=
"character"
,
default
=
NULL
,
help
=
"merged_results H0"
,
metavar
=
"character"
),
make_option
(
c
(
"--H0NeBig"
)
,
type
=
"character"
,
default
=
NULL
,
help
=
"merged_results H0NeBig"
,
metavar
=
"character"
),
...
...
@@ -89,9 +91,10 @@ df_d_H0HaPC_NeSmall = build_df_dist_couple(df_H0NeSmall_melt, df_HaPCNeSmall_mel
df_d_H0HaPC_NeBigInSmall
=
build_df_dist_couple
(
df_H0NeBigInSmall_melt
,
df_HaPCNeBigInSmall_melt
,
"H0/HaPC NeBigInSmall"
)
df_d_H0HaPC_NeSmallInBig
=
build_df_dist_couple
(
df_H0NeSmallInBig_melt
,
df_HaPCNeSmallInBig_melt
,
"H0/HaPC NeSmallInBig"
)
df_d_H0HaPCOC
=
build_df_dist_couple
(
df_H0_melt
,
df_HaPCOC_melt
,
"H0/HaPCOC"
)
df_d_H0NeBigHaPCOC
=
build_df_dist_couple
(
df_H0NeBig_melt
,
df_HaPCOC_melt
,
"H0NeBig/HaPCOC"
)
df_d
=
rbind.data.frame
(
df_d_H0HaPC
,
df_d_H0HaPCOC
,
df_d_H0HaPC_NeBig
,
df_d_H0HaPC_NeSmall
,
df_d_H0HaPC_NeBigInSmall
,
df_d_H0HaPC_NeSmallInBig
)
df_d_H0HaPC_NeBigInSmall
,
df_d_H0HaPC_NeSmallInBig
,
df_d_H0NeBigHaPCOC
)
df_d
=
df_d
[
order
(
df_d
$
methode
),]
...
...
@@ -152,9 +155,11 @@ df_H0HaPC_NeSmall = build_df_couple(df_H0NeSmall_melt, df_HaPCNeSmall_melt, "H0/
df_H0HaPC_NeBigInSmall
=
build_df_couple
(
df_H0NeBigInSmall_melt
,
df_HaPCNeBigInSmall_melt
,
"H0/HaPC NeBigInSmall"
)
df_H0HaPC_NeSmallInBig
=
build_df_couple
(
df_H0NeSmallInBig_melt
,
df_HaPCNeSmallInBig_melt
,
"H0/HaPC NeSmallInBig"
)
df_H0HaPCOC
=
build_df_couple
(
df_H0_melt
,
df_HaPCOC_melt
,
"H0/HaPCOC"
)
df_H0NeBigHaPCOC
=
build_df_couple
(
df_H0NeBig_melt
,
df_HaPCOC_melt
,
"H0NeBig/HaPCOC"
)
df
=
rbind.data.frame
(
df_H0HaPC
,
df_H0HaPCOC
,
df_H0HaPC_NeBig
,
df_H0HaPC_NeSmall
,
df_H0HaPC_NeBigInSmall
,
df_H0HaPC_NeSmallInBig
)
df_H0HaPC_NeBigInSmall
,
df_H0HaPC_NeSmallInBig
,
df_H0NeBigHaPCOC
)
print
(
head
(
df
))
print
(
tail
(
df
))
...
...
@@ -200,7 +205,9 @@ print(summary(df_out))
print
(
"prep plot max mcc"
)
df_max_mcc_per_method
=
do.call
(
rbind
,
lapply
(
split
(
df_out
,
paste0
(
df_out
$
methode
,
df_out
$
couple
)),
function
(
x
)
{
return
(
x
[
which.max
(
x
$
MCC
),
c
(
"couple"
,
"methode"
,
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
,
"precision"
)])}))
function
(
x
)
{
return
(
x
[
which.max
(
x
$
MCC
),
c
(
"couple"
,
"methode"
,
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
,
"precision"
)])
}))
print
(
df_max_mcc_per_method
)
...
...
@@ -211,17 +218,24 @@ df_max_mcc_per_method_2$variable="MCC"
########################################################################
print
(
"prep plot recall_precision_per_meth"
)
df_recall_sup09_per_meth
=
do.call
(
rbind
,
lapply
(
split
(
df_out
,
paste0
(
df_out
$
methode
,
df_out
$
couple
)),
print
(
df_out
[
is.na
(
df_out
$
methode
),])
print
(
table
(
df_out
$
methode
))
print
(
table
(
df_out
$
methode
,
df_out
$
couple
))
df_recall_sup09_per_meth
=
do.call
(
rbind
,
lapply
(
split
(
df_out
,
paste0
(
df_out
$
methode
,
" "
,
df_out
$
couple
)),
function
(
x
)
{
x
$
precision
[
is.na
(
x
$
precision
)]
=
0
x2
=
x
[
x
$
precision
>
0.9
,]
if
(
nrow
(
x2
)
>
0
)
{
re
turn
(
x2
[
which.max
(
x2
$
sensitivity
),
c
(
"couple"
,
"methode"
,
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
,
"precision"
)]
)
re
s
=
x2
[
which.max
(
x2
$
sensitivity
),
c
(
"couple"
,
"methode"
,
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
,
"precision"
)]
}
else
{
x
=
x
[
1
,
c
(
"couple"
,
"methode"
,
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
,
"precision"
)]
print
(
x
)
x
[,
c
(
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
,
"precision"
)]
=
NA
re
turn
(
x
)
re
s
=
x
}
return
(
res
)
}))
print
(
df_recall_sup09_per_meth
)
...
...
@@ -276,6 +290,7 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
plot
=
plot
+
geom_hline
(
aes
(
yintercept
=
0.9
),
col
=
"black"
,
size
=
1
,
show.legend
=
NA
,
linetype
=
"dashed"
)
plot
=
plot
+
facet_grid
(
couple
~
methode
)
plot
=
plot
+
theme
(
axis.text.x
=
element_text
(
angle
=
45
,
hjust
=
1
))
plot
=
plot
+
ggtitle
(
paste0
(
"Threshold definition such as precision > 0.9 - ("
,
date
,
")"
))
plot_recall_precision
=
plot
...
...
@@ -305,7 +320,7 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
plot_max_MCC
=
plot
save_plot
(
paste0
(
opt
$
out
,
suffix
,
".
max_MCC
_per_meth.pdf"
),
save_plot
(
paste0
(
opt
$
out
,
suffix
,
".
indicator
_per_meth.pdf"
),
plot_max_MCC
,
ncol
=
0.4
*
length
(
unique
(
df_out_melt
$
methode
)),
nrow
=
1.7
,
...
...
lib/scripts/plot_sens_spe_all_trees.R
View file @
0030e37e
...
...
@@ -40,19 +40,19 @@ files = files[grep("tsv", files)]
files_split
=
strsplit
(
files
,
"."
,
fixed
=
T
)
files_df
=
as.data.frame
(
do.call
(
rbind
,
files_split
))
print
(
files_df
)
files_df_ok
=
data.frame
(
files
=
paste0
(
input_dir
,
"/"
,
files
),
tree
=
gsub
(
".tsv"
,
""
,
files
),
bl
=
"NA"
,
profil
=
opt
$
profil
)
files_df_ok
=
data.frame
(
files
=
paste0
(
input_dir
,
"/"
,
files
),
tree
=
gsub
(
".tsv"
,
""
,
files
),
profil
=
opt
$
profil
)
#files_df_ok = data.frame(files= paste0(input_dir,"/",files), tree = files_df$V1, bl = files_df$V1, profil = opt$profil)
condensed_meths
=
c
(
"PCOC"
,
"diffsel"
,
"Identical_LG08"
,
"Mutinomial_1MinusLRT"
,
"Tdg09_1MinusFDR"
,
"Msd_1MinusP"
,
"Topological_LG08"
)
read_dir
=
function
(
x
)
{
file
=
x
[
"files"
]
tree
=
x
[
"tree"
]
profil
=
x
[
"profil"
]
bl
=
x
[
"bl"
]
df
=
read.csv
(
file
,
sep
=
"\t"
,
header
=
T
)
df
$
tree
=
tree
df
$
profil
=
profil
df
$
bl
=
bl
df
=
df
[
df
$
methode
%in%
condensed_meths
,
]
return
(
df
)
}
...
...
@@ -62,41 +62,56 @@ alpha = 0.7
x_labs
=
""
y_labs
=
"Threshold"
print
(
head
(
df
))
df_max_mcc_per_method
=
do.call
(
rbind
,
lapply
(
split
(
df
,
df
$
methode
),
function
(
x
)
{
return
(
x
[
which.max
(
x
$
threshold
),
c
(
"methode"
,
"threshold"
,
"tree"
,
"profil"
,
"couple"
)])}))
df_tmp
=
subset
(
df
,
couple
==
"H0/HaPCOC"
)
df_t_per_method
=
do.call
(
rbind
,
lapply
(
split
(
df_tmp
,
paste0
(
df_tmp
$
methode
,
df_tmp
$
tree
)),
function
(
x
)
{
return
(
x
[
which.max
(
x
$
threshold
),
c
(
"methode"
,
"threshold"
,
"tree"
,
"profil"
,
"couple"
)])}))
df_
max_mcc
_per_method_2
=
df_
max_mcc
_per_method
df_
max_mcc
_per_method_2
[
df_
max_mcc
_per_method_2
$
tree
==
unique
(
df_
max_mcc
_per_method_2
$
tree
)[
1
],]
df_
t
_per_method_2
=
df_
t
_per_method
df_
t
_per_method_2
[
df_
t
_per_method_2
$
tree
==
unique
(
df_
t
_per_method_2
$
tree
)[
1
],]
df2
=
df
df2
$
retained_t
=
"no"
df2
$
retained_t
[
df2
$
couple
==
"H0/HaPCOC"
]
=
"yes"
plot
=
ggplot
(
df
,
aes
(
x
=
tree
,
y
=
threshold
,
col
=
couple
,
shape
=
profil
))
+
theme_bw
()
+
labs
(
x
=
x_labs
,
y
=
y_labs
)
+
ylim
(
c
(
0
,
1
.5
))
plot
=
ggplot
(
df
2
,
aes
(
x
=
tree
,
y
=
threshold
,
col
=
couple
,
shape
=
retained_t
))
+
theme_bw
()
+
labs
(
x
=
x_labs
,
y
=
y_labs
)
+
ylim
(
c
(
0
,
1
))
plot
=
plot
+
theme
(
axis.text.x
=
element_text
(
angle
=
0
,
hjust
=
1
))
plot
=
plot
+
geom_point
()
plot
=
plot
+
geom_hline
(
data
=
df_max_mcc_per_method
,
aes
(
yintercept
=
threshold
),
col
=
"red"
,
alpha
=
alpha
,
show.legend
=
NA
,
linetype
=
"dotted"
)
plot
=
plot
+
geom_label
(
data
=
df_max_mcc_per_method_2
,
nudge_y
=
0.25
,
aes
(
label
=
threshold
),
col
=
"red"
,
alpha
=
alpha
,
show.legend
=
NA
)
plot
=
plot
+
geom_point
(
position
=
position_dodge
(
.5
),
size
=
3
)
plot
=
plot
+
theme
(
legend.position
=
"top"
)
#plot = plot + guides(shape=FALSE)
plot
=
plot
+
scale_shape_manual
(
values
=
c
(
19
,
17
))
plot
=
plot
+
guides
(
col
=
guide_legend
(
ncol
=
2
))
plot
=
plot
+
guides
(
shape
=
guide_legend
(
nrow
=
2
))
#plot = plot + geom_point( data = df_t_per_method, aes(x=tree, y=threshold, group = couple), size = 5, shape = 1, col="red", alpha=alpha, position= position_dodge(.5))
#plot = plot + geom_label( data = df_t_per_method_2, nudge_y = 0.25, aes(label = threshold), col="red", alpha=alpha, show.legend = NA)
plot
=
plot
+
facet_grid
(
methode
~
.
)
+
coord_flip
()
plot
=
plot
+
ggtitle
(
"
t max per tree
"
)
plot
=
plot
+
ggtitle
(
"
Threshold definition such as\nprecision > 0.9 per H0*/Ha* couple
"
)
output_pdf
=
paste0
(
opt
$
out
,
".
max_
t_per_tree.pdf"
)
output_pdf
=
paste0
(
opt
$
out
,
".t_per_tree.pdf"
)
save_plot
(
output_pdf
,
plot
,
ncol
=
1
,
nrow
=
0.
3
*
length
(
unique
(
df
$
methode
)),
nrow
=
0.
5
*
length
(
unique
(
df
$
methode
)),
base_aspect_ratio
=
1.5
,
limitsize
=
FALSE
)
output_tsv
=
paste0
(
opt
$
out
,
".
max_
t_per_tree.tsv"
)
output_tsv
=
paste0
(
opt
$
out
,
".t_per_tree.tsv"
)
write.table
(
df
,
file
=
output_tsv
,
row.names
=
FALSE
,
quote
=
F
,
sep
=
"\t"
)
####################
# Spe en sens
####################
df_t
=
df
df_t
=
df_tmp
print
(
"df_t"
)
print
(
df_t
)
parse_file
=
function
(
df_m
,
df_t
)
{
df_m_melt
=
melt
(
df_m
)
...
...
@@ -104,10 +119,10 @@ parse_file = function(df_m, df_t) {
df_m_melt
$
t
=
as.numeric
(
as.character
(
df_m_melt
$
t
))
df_m_melt
$
is_P
=
df_m_melt
$
value
>
df_m_melt
$
t
P
=
tapply
(
df_m_melt
$
is_P
,
df_m_melt
$
variable
,
function
(
x
)
{
sum
(
x
,
na.rm
=
T
)})
P
[
df_m_melt
$
variable
[
is.na
(
df_m_melt
$
t
)]]
=
NA
n_sites
=
dim
(
df_m
)[
1
]
df
=
data.frame
(
methode
=
names
(
P
),
P
=
P
)
df
=
merge
(
df
,
df_t
,
by
=
"methode"
)
df
$
P
=
df
$
P
/
n_sites
return
(
df
)
}
...
...
@@ -119,27 +134,29 @@ files = files[grep("tsv", files)]
files_split
=
strsplit
(
files
,
"@"
,
fixed
=
T
)
files_df
=
as.data.frame
(
do.call
(
rbind
,
files_split
))
print
(
files_df
)
files_df_ok
=
data.frame
(
files
=
paste0
(
input_dir2
,
"/"
,
files
),
tree
=
files_df
$
V1
,
hyp
=
gsub
(
".tsv"
,
""
,
files_df
$
V2
),
bl
=
files_df
$
V1
,
profil
=
opt
$
profil
)
files_df_ok
=
data.frame
(
files
=
paste0
(
input_dir2
,
"/"
,
files
),
tree
=
files_df
$
V1
,
hyp
=
gsub
(
".tsv"
,
""
,
files_df
$
V2
),
profil
=
opt
$
profil
)
read_dir
=
function
(
x
)
{
file
=
x
[
"files"
]
tree
=
x
[
"tree"
]
profil
=
x
[
"profil"
]
bl
=
x
[
"bl"
]
hyp
=
x
[
"hyp"
]
df_m
=
read.csv
(
file
,
sep
=
"\t"
,
header
=
T
,
na.strings
=
c
(
"NA"
,
"NaN"
,
" "
,
""
))
print
(
head
(
df_m
))
df
=
parse_file
(
df_m
[,
colnames
(
df_m
)
%in%
df_t
$
methode
],
df_t
[
df_t
$
tree
==
tree
,
c
(
"methode"
,
"threshold"
)])
df
$
tree
=
tree
df
$
profil
=
profil
df
$
bl
=
bl
df
$
hyp
=
hyp
df
=
df
[
df
$
methode
%in%
condensed_meths
,
]
return
(
df
)
}
df
=
do.call
(
rbind
,
apply
(
files_df_ok
,
1
,
read_dir
))
rownames
(
df
)
<-
NULL
print
(
df
)
hyps_from
=
unique
(
df
$
hyp
)
hyps_to
=
rep
(
"Specificity"
,
length
(
hyps_from
))
hyps_to
[
grep
(
"Ha"
,
hyps_from
)]
=
"Sensitivity"
...
...
@@ -153,7 +170,11 @@ x_labs = ""
y_labs
=
"Value"
plot
=
ggplot
(
df
,
aes
(
x
=
methode
,
y
=
P
,
fill
=
hyp
))
+
theme_bw
()
+
labs
(
x
=
x_labs
,
y
=
y_labs
)
+
ylim
(
c
(
0
,
1.1
))
print
(
df
)
plot_tree
=
function
(
tree
)
{
df_tmp
=
df
[
df
$
tree
==
tree
,
]
plot
=
ggplot
(
df_tmp
,
aes
(
x
=
methode
,
y
=
P
,
fill
=
hyp
))
+
theme_bw
()
+
labs
(
x
=
x_labs
,
y
=
y_labs
)
+
ylim
(
c
(
0
,
1.1
))
plot
=
plot
+
theme
(
axis.text.x
=
element_text
(
angle
=
30
,
hjust
=
1
))
plot
=
plot
+
geom_bar
(
stat
=
"identity"
,
position
=
position_dodge
(
0.9
),
color
=
"black"
,
alpha
=
0.7
)
plot
=
plot
+
geom_text
(
aes
(
y
=
P
+
0.05
,
label
=
round
(
P
,
2
)),
angle
=
90
,
hjust
=
0
,
position
=
position_dodge
(
0.9
))
...
...
@@ -162,12 +183,16 @@ plot = plot + facet_grid(variable ~ tree)
plot
=
plot
+
ggtitle
(
paste0
(
"Compa methodes - "
,
opt
$
profil
,
" - ("
,
date
,
")"
))
plot
=
plot
+
scale_color_manual
(
values
=
c
(
'gray'
,
'black'
),
guide
=
FALSE
)
plot
=
plot
+
scale_alpha
(
range
=
c
(
0.7
,
1
),
guide
=
FALSE
)
}
plot_all
<-
plot_grid
(
plotlist
=
lapply
(
unique
(
df
$
tree
),
plot_tree
),
labels
=
"AUTO"
,
nrow
=
length
(
unique
(
df
$
tree
)))
output_pdf2
=
paste0
(
opt
$
out
,
".sens_spe_auto_t.pdf"
)
save_plot
(
output_pdf2
,
plot
,
ncol
=
length
(
unique
(
df
$
hyp
))
/
3
*
length
(
unique
(
df
$
tree
))
,
nrow
=
2
,
plot
_all
,
ncol
=
length
(
unique
(
df
$
hyp
))
*
length
(
unique
(
df
$
methode
))
/
30
,
nrow
=
2
*
length
(
unique
(
df
$
tree
))
,
base_aspect_ratio
=
1.5
,
limitsize
=
FALSE
)
...
...
lib/scripts/plot_trees.R
View file @
0030e37e
...
...
@@ -78,13 +78,13 @@ plotlist = lapply(split(files_df_ok,paste0(files_df_ok$tree_prefix)), plot_tree_
plot_all
<-
plot_grid
(
plotlist
=
plotlist
,
labels
=
NA
,
n
col
=
length
(
unique
(
files_df_ok
$
tree_prefix
)))
labels
=
NA
,
n
row
=
length
(
unique
(
files_df_ok
$
tree_prefix
)))
output_pdf
=
paste0
(
opt
$
out
,
".pdf"
)
save_plot
(
output_pdf
,
plot_all
,
ncol
=
length
(
unique
(
files_df_ok
$
tree_prefix
))
,
nrow
=
5
,
ncol
=
2
,
nrow
=
10
*
length
(
unique
(
files_df_ok
$
tree_prefix
))
,
base_aspect_ratio
=
2
,
limitsize
=
FALSE
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment