Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
5d95464a
Commit
5d95464a
authored
Aug 01, 2018
by
Carine Rey
Browse files
add 2 new post_analyse plots
parent
5649b154
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
390 additions
and
121 deletions
+390
-121
etc/docker/r_basics/Dockerfile
etc/docker/r_basics/Dockerfile
+3
-0
etc/docker/r_basics/build_docker.sh
etc/docker/r_basics/build_docker.sh
+1
-1
lib/convergence_detection.ml
lib/convergence_detection.ml
+6
-1
lib/convergence_detection.mli
lib/convergence_detection.mli
+2
-0
lib/file_formats.ml
lib/file_formats.ml
+5
-0
lib/phyml.ml
lib/phyml.ml
+2
-1
lib/pipeline.ml
lib/pipeline.ml
+2
-1
lib/post_analyses.ml
lib/post_analyses.ml
+60
-9
lib/scripts/calc_t_per_meth.R
lib/scripts/calc_t_per_meth.R
+206
-104
lib/scripts/merge_det_results.py
lib/scripts/merge_det_results.py
+10
-1
lib/scripts/plot_sens_spe_all_trees.R
lib/scripts/plot_sens_spe_all_trees.R
+3
-3
lib/scripts/plot_trees.R
lib/scripts/plot_trees.R
+90
-0
No files found.
etc/docker/r_basics/Dockerfile
View file @
5d95464a
...
...
@@ -8,3 +8,6 @@ RUN echo 'install.packages(c("optparse", "ggplot2", "reshape2", "cowplot", "coda
RUN
apt-get update
&&
\
apt-get
install
--no-install-recommends
-qy
\
pandoc
RUN
echo
'source("https://bioconductor.org/biocLite.R"); biocLite("ggtree")'
>
/tmp/packages.R
\
&&
Rscript /tmp/packages.R
etc/docker/r_basics/build_docker.sh
View file @
5d95464a
...
...
@@ -4,7 +4,7 @@ set -e
IMAGE_NAME
=
r_basics
DOCKERFILE_DIR
=
.
TAG
=
0
723
2018
TAG
=
0
801
2018
REPO
=
carinerey/
$IMAGE_NAME
:
$TAG
docker build
-t
$REPO
-f
./Dockerfile
$DOCKERFILE_DIR
...
...
lib/convergence_detection.ml
View file @
5d95464a
...
...
@@ -41,8 +41,12 @@ type dataset_res = {
plot_merged_results
:
svg
workflow
;
}
let
merge_results
~
res_by_tools
:
text_file
workflow
=
let
merge_results
?
fna_infos
~
res_by_tools
()
:
text_file
workflow
=
let
env
=
docker_image
~
account
:
"carinerey"
~
name
:
"python_basics"
~
tag
:
"07252018"
()
in
let
fna_infos
=
match
fna_infos
with
|
Some
(
sw
)
->
sw
|
_
->
None
in
let
command
=
List
.
map
res_by_tools
~
f
:
(
fun
res
->
let
w
=
match
res
with
|
`Pcoc
d
->
Pcoc
.
results
d
...
...
@@ -78,6 +82,7 @@ let merge_results ~res_by_tools : text_file workflow =
file_dump
(
string
Scripts
.
merge_det_results
)
;
opt
"-o"
ident
dest
;
seq
~
sep
:
" "
command
;
option
(
opt
"--fna_infos"
dep
)
fna_infos
;
]
;
]
...
...
lib/convergence_detection.mli
View file @
5d95464a
...
...
@@ -31,7 +31,9 @@ type dataset_res = {
}
val
merge_results
:
?
fna_infos
:
text_file
workflow
option
->
res_by_tools
:
result
list
->
unit
->
text_file
workflow
val
plot_merge_results
:
...
...
lib/file_formats.ml
View file @
5d95464a
...
...
@@ -6,6 +6,11 @@ class type nhx = object
method
format
:
[
`nhx
]
end
class
type
nw
=
object
inherit
text_file
method
format
:
[
`nw
]
end
class
type
diffsel_tree
=
object
inherit
text_file
method
format
:
[
`diffsel_tree
]
...
...
lib/phyml.ml
View file @
5d95464a
...
...
@@ -2,6 +2,7 @@ open Core
open
Bistro
.
Std
open
Bistro
.
EDSL
open
Bistro_bioinfo
.
Std
open
File_formats
type
phyml
...
...
@@ -27,5 +28,5 @@ let phyml ~tree phy : phyml directory workflow =
])
]
let
phyml_tree
~
tree
phy
=
let
phyml_tree
~
tree
phy
:
nw
workflow
=
(
phyml
~
tree
phy
)
/
selector
[
"tree.phy_phyml_tree.txt"
]
lib/pipeline.ml
View file @
5d95464a
...
...
@@ -253,7 +253,8 @@ let derive_from_dataset ~dataset ~preview ~fast_mode=
let
res_by_tools
=
List
.
map
det_meths
~
f
:
(
fun
det_meth
->
derive_from_det_meth
~
det_meth
~
dataset
~
preview
)
in
let
merged_results
=
merge_results
~
res_by_tools
in
let
fna_infos
=
dataset
.
dataset
.
fna_infos
in
let
merged_results
=
merge_results
~
fna_infos
~
res_by_tools
()
in
let
tsv
=
merged_results
in
let
faa
=
dataset
.
dataset
.
faa
in
let
tree
=
Tree_dataset
.
tree
dataset
.
dataset
.
tree_dataset
`Detection
in
...
...
lib/post_analyses.ml
View file @
5d95464a
...
...
@@ -6,13 +6,16 @@ open Bistro_utils
open
File_formats
open
Convergence_detection
type
plot_trees
type
post_analyses_dir
type
sens_spe_t_choices_plot
type
t_choices
=
{
t_choices_complete
:
text_file
workflow
;
t_choices_max
:
text_file
workflow
;
t_choices_recall09
:
text_file
workflow
;
t_choices_plot
:
text_file
workflow
;
t_choices_condensed_plot
:
text_file
workflow
;
tree_prefix
:
string
;
}
...
...
@@ -35,12 +38,20 @@ type simu_infos = {
tree_prefix
:
string
;
}
type
reinfered_tree
=
{
reinfered_tree
:
nw
workflow
;
input_tree
:
nhx
workflow
;
tree_prefix
:
string
;
model_prefix
:
string
;
}
type
post_analyses_simu
=
{
simu_infos_l
:
simu_infos
list
;
simu_infos_plot
:
text_file
workflow
;
trees_plot
:
text_file
workflow
;
}
let
r_env
=
docker_image
~
account
:
"carinerey"
~
name
:
"r_basics"
~
tag
:
"0
723
2018"
()
let
r_env
=
docker_image
~
account
:
"carinerey"
~
name
:
"r_basics"
~
tag
:
"0
801
2018"
()
let
is_hyp
~
hyp
(
dataset_results
:
dataset_res
)
=
let
model_prefix
=
dataset_results
.
model_prefix
in
...
...
@@ -51,7 +62,7 @@ let build_cmd_t_choices (opt_name : string) mr_option =
|
Some
x
->
[
opt
opt_name
dep
x
]
|
None
->
[]
let
make_t_choices
~
h0_mr
~
h0_NeBig_mr
~
h0_NeSmall_mr
~
haPCOC_mr
~
haPC_mr
~
haPC_NeBig_mr
~
haPC_NeSmall_mr
~
h0_NeBigInSmall_mr
let
make_t_choices
~
h0_mr
~
h0_NeBig_mr
~
h0_NeSmall_mr
~
haPCOC_mr
~
haPC_mr
~
haPC_NeBig_mr
~
haPC_NeSmall_mr
~
h0_NeBigInSmall_mr
~
h0_NeSmallInBig_mr
~
haPC_NeBigInSmall_mr
~
haPC_NeSmallInBig_mr
()
:
post_analyses_dir
directory
workflow
=
let
env
=
r_env
in
let
out
=
dest
//
"out"
in
...
...
@@ -75,7 +86,8 @@ let make_t_choices ~h0_mr ~h0_NeBig_mr ~h0_NeSmall_mr ~haPCOC_mr ~haPC_mr ~haPC
mkdir_p
dest
;
cmd
"Rscript"
([
[
file_dump
(
string
Scripts
.
calc_t_per_meth
)
;
opt
"--out "
ident
out
;
]
;
opt
"--out "
ident
out
;
]
;
cmd_mr
;
]
|>
List
.
concat
)
;
])
...
...
@@ -118,6 +130,29 @@ let group_simu_infos ~simu_infos_l : simu_infos directory workflow =
)
]
let
plot_trees
~
reinfered_tree_l
:
plot_trees
directory
workflow
=
let
env
=
r_env
in
let
cmd_cp_l
=
List
.
map
reinfered_tree_l
~
f
:
(
fun
rt
->
[
cmd
"cp"
[
dep
rt
.
reinfered_tree
;
tmp
//
(
rt
.
tree_prefix
^
"@"
^
rt
.
model_prefix
^
".nw"
)];
cmd
"cp"
[
dep
rt
.
input_tree
;
tmp
//
(
rt
.
tree_prefix
^
"@input_tree.nw"
)]
])
|>
List
.
concat
in
let
out
=
dest
//
"out"
in
workflow
~
descr
:
"post_analyses.plot_trees"
[
docker
env
(
and_list
([
[
mkdir_p
dest
];
[
mkdir_p
tmp
];
cmd_cp_l
;
[
cmd
"Rscript"
[
file_dump
(
string
Scripts
.
plot_trees
)
;
opt
"--input_dir"
ident
tmp
;
opt
"--out "
ident
out
;
];]
]
|>
List
.
concat
)
)
]
let
get_merged_results_opt
hx
=
match
hx
with
|
Some
w
->
Some
w
.
merged_results
|
None
->
None
...
...
@@ -176,11 +211,13 @@ let get_t_choices ~(dataset_results_l: dataset_res list) : t_choices option =
|
(
Some
h0
,
Some
_
)
->
let
t_choices_dir
=
make_t_choices_per_couple
{
h0_res
;
h0_NeBig_res
;
h0_NeSmall_res
;
ha_PC_res
;
ha_PCOC_res
;
ha_PC_NeBig_res
;
ha_PC_NeSmall_res
;
h0_NeBigInSmall_res
;
h0_NeSmallInBig_res
;
ha_PC_NeBigInSmall_res
;
ha_PC_NeSmallInBig_res
}
in
let
t_choices_max
=
t_choices_dir
/
selector
[
"out.recall09_per_meth.tsv"
]
in
let
t_choices_max
=
t_choices_dir
/
selector
[
"out.max_MCC_per_meth.tsv"
]
in
let
t_choices_recall09
=
t_choices_dir
/
selector
[
"out.recall09_per_meth.tsv"
]
in
let
t_choices_complete
=
t_choices_dir
/
selector
[
"out.complete.tsv"
]
in
let
t_choices_plot
=
t_choices_dir
/
selector
[
"out.pdf"
]
in
let
t_choices_condensed_plot
=
t_choices_dir
/
selector
[
"out_condensed.pdf"
]
in
let
tree_prefix
=
h0
.
tree_prefix
in
Some
{
t_choices_max
;
t_choices_
complete
;
t_choices
_plot
;
tree_prefix
}
Some
{
t_choices_max
;
t_choices_
recall09
;
t_choices_complete
;
t_choices_plot
;
t_choices_condensed
_plot
;
tree_prefix
}
|
_
->
None
...
...
@@ -227,8 +264,19 @@ let post_analyses_simu_of_simu_dataset_l ~simu_dataset_l =
model_prefix
=
dataset
.
model_prefix
}
)
in
let
reinfered_tree_l
=
List
.
map
simu_dataset_l
~
f
:
(
fun
dataset
->
let
rd
=
dataset
.
dataset
in
let
phy
=
Bppsuite
.
fa2phy
rd
.
fna
in
let
input_tree
=
rd
.
input_tree
in
let
reinfered_tree
=
Phyml
.
phyml_tree
~
tree
:
input_tree
phy
in
{
reinfered_tree
;
input_tree
;
tree_prefix
=
dataset
.
tree_prefix
;
model_prefix
=
dataset
.
model_prefix
}
)
in
let
simu_infos_plot
=
group_simu_infos
~
simu_infos_l
/
selector
[
"out.pdf"
]
in
{
simu_infos_l
;
simu_infos_plot
}
let
trees_plot
=
plot_trees
~
reinfered_tree_l
/
selector
[
"out.pdf"
]
in
{
simu_infos_l
;
simu_infos_plot
;
trees_plot
}
...
...
@@ -287,7 +335,8 @@ let repo_post_analyses_all_trees_of_all_post_analyses_per_tree ~profile_prefix ~
let
repo_of_post_analyses_simu
~
post_analyses_simu
=
[
Repo
.[
item
[
"hypothesis_validation.pdf"
]
post_analyses_simu
.
simu_infos_plot
item
[
"hypothesis_validation.pdf"
]
post_analyses_simu
.
simu_infos_plot
;
item
[
"trees_validation.pdf"
]
post_analyses_simu
.
trees_plot
;
]
|>
Repo
.
shift
"simu_infos"
;
(
List
.
map
post_analyses_simu
.
simu_infos_l
~
f
:
(
fun
simu_infos
->
...
...
@@ -308,11 +357,13 @@ let repo_of_post_analyses_res ~prefix ~post_analyses_res =
|
Some
w
->
Repo
.[
item
[
prefix
^
".t_choices.max_mcc_per_meth.tsv"
]
w
.
t_choices_max
;
item
[
prefix
^
".t_choices.recall09_per_meth.tsv"
]
w
.
t_choices_recall09
;
item
[
prefix
^
".t_choices.complete.tsv"
]
w
.
t_choices_complete
;
item
[
prefix
^
".t_choices.pdf"
]
w
.
t_choices_plot
;
item
[
prefix
^
".t_choices.condensed.pdf"
]
w
.
t_choices_condensed_plot
;
]
|>
Repo
.
shift
"t_choices"
);
(
(
*(
match post_analyses_res.auto_t_plot_l with
| None -> []
| Some w_l ->
...
...
@@ -323,6 +374,6 @@ let repo_of_post_analyses_res ~prefix ~post_analyses_res =
]
)|> List.concat
|> Repo.shift "auto_t_pdf"
);
);
*)
]
|>
List
.
concat
lib/scripts/calc_t_per_meth.R
View file @
5d95464a
...
...
@@ -37,6 +37,71 @@ if (is.null(opt$HaPCOC)){
stop
(
"At least one argument must be supplied (HaPCOC input file)"
,
call.
=
FALSE
)
}
########################################################################
print
(
"prep input files"
)
read_hyp
=
function
(
opt_name
)
{
if
(
!
is.null
(
opt_name
)){
df
=
read.table
(
opt_name
,
header
=
TRUE
,
sep
=
'\t'
,
na.strings
=
"NA"
)
df
=
df
[,
!
colnames
(
df
)
%in%
c
(
"Indel_prop"
,
"Indel_prop.ConvLeaves."
)]
id_vars
=
if
(
"P_distance"
%in%
colnames
(
df
))
{
c
(
"Sites"
,
"P_distance"
)}
else
{
c
(
"Sites"
)}
df_melt
=
melt
(
df
,
id.vars
=
id_vars
,
variable.name
=
"methode"
)
return
(
df_melt
)
}
else
{
return
(
NULL
)
}
}
df_H0_melt
=
read_hyp
(
opt
$
H0
)
df_H0NeBig_melt
=
read_hyp
(
opt
$
H0NeBig
)
df_H0NeSmall_melt
=
read_hyp
(
opt
$
H0NeSmall
)
df_H0NeBigInSmall_melt
=
read_hyp
(
opt
$
H0NeBigInSmall
)
df_H0NeSmallInBig_melt
=
read_hyp
(
opt
$
H0NeSmallInBig
)
df_HaPCOC_melt
=
read_hyp
(
opt
$
HaPCOC
)
df_HaPC_melt
=
read_hyp
(
opt
$
HaPC
)
df_HaPCNeBig_melt
=
read_hyp
(
opt
$
HaPCNeBig
)
df_HaPCNeSmall_melt
=
read_hyp
(
opt
$
HaPCNeSmall
)
df_HaPCNeBigInSmall_melt
=
read_hyp
(
opt
$
HaPCNeBigInSmall
)
df_HaPCNeSmallInBig_melt
=
read_hyp
(
opt
$
HaPCNeSmallInBig
)
########################################################################
print
(
"prep df_d"
)
build_df_dist_couple
=
function
(
df_h0
,
df_ha
,
name
)
{
if
((
!
is.null
(
df_h0
))
&
(
!
is.null
(
df_ha
)))
{
df
=
merge
(
df_h0
,
df_ha
,
by
=
c
(
"Sites"
,
"P_distance"
,
"methode"
),
suffix
=
c
(
"_H0"
,
"_Ha"
))
df_melt
=
melt
(
df
,
id.vars
=
c
(
"Sites"
,
"P_distance"
,
"methode"
),
variable.name
=
"val_H0Ha"
)
df_melt
$
couple
=
name
}
else
{
df_melt
=
NULL
}
return
(
df_melt
)
}
df_d_H0HaPC
=
build_df_dist_couple
(
df_H0_melt
,
df_HaPC_melt
,
"H0/HaPC"
)
df_d_H0HaPC_NeBig
=
build_df_dist_couple
(
df_H0NeBig_melt
,
df_HaPCNeBig_melt
,
"H0/HaPC NeBig"
)
df_d_H0HaPC_NeSmall
=
build_df_dist_couple
(
df_H0NeSmall_melt
,
df_HaPCNeSmall_melt
,
"H0/HaPC NeSmall"
)
df_d_H0HaPC_NeBigInSmall
=
build_df_dist_couple
(
df_H0NeBigInSmall_melt
,
df_HaPCNeBigInSmall_melt
,
"H0/HaPC NeBigInSmall"
)
df_d_H0HaPC_NeSmallInBig
=
build_df_dist_couple
(
df_H0NeSmallInBig_melt
,
df_HaPCNeSmallInBig_melt
,
"H0/HaPC NeSmallInBig"
)
df_d_H0HaPCOC
=
build_df_dist_couple
(
df_H0_melt
,
df_HaPCOC_melt
,
"H0/HaPCOC"
)
df_d
=
rbind.data.frame
(
df_d_H0HaPC
,
df_d_H0HaPCOC
,
df_d_H0HaPC_NeBig
,
df_d_H0HaPC_NeSmall
,
df_d_H0HaPC_NeBigInSmall
,
df_d_H0HaPC_NeSmallInBig
)
df_d
=
df_d
[
order
(
df_d
$
methode
),]
print
(
head
(
df_d
))
print
(
tail
(
df_d
))
########################################################################
print
(
"prep df"
)
## fun...
calc_TN_FP
=
function
(
vals
,
t
){
...
...
@@ -62,45 +127,18 @@ calc_TP_FN = function(vals, t){
}
calc_TN_FP_TP_FN
=
function
(
t
,
df_H0_melt
,
df_Ha_melt
){
df_TN_FP
=
do.call
(
rbind.data.frame
,
tapply
(
df_H0_melt
$
value
,
df_H0_melt
$
variabl
e
,
calc_TN_FP
,
t
=
t
))
df_TP_FN
=
do.call
(
rbind.data.frame
,
tapply
(
df_Ha_melt
$
value
,
df_H0_melt
$
variabl
e
,
calc_TP_FN
,
t
=
t
))
df_TN_FP
=
do.call
(
rbind.data.frame
,
tapply
(
df_H0_melt
$
value
,
df_H0_melt
$
method
e
,
calc_TN_FP
,
t
=
t
))
df_TP_FN
=
do.call
(
rbind.data.frame
,
tapply
(
df_Ha_melt
$
value
,
df_H0_melt
$
method
e
,
calc_TP_FN
,
t
=
t
))
df_TN_FP_TP_FN
=
merge
(
df_TN_FP
,
df_TP_FN
,
by
=
0
)
df_TN_FP_TP_FN
$
t
=
t
return
(
df_TN_FP_TP_FN
)
}
## program...
read_hyp
=
function
(
opt_name
)
{
if
(
!
is.null
(
opt_name
)){
df
=
read.table
(
opt_name
,
header
=
TRUE
,
sep
=
'\t'
,
na.strings
=
"NA"
)
df
=
df
[,
!
colnames
(
df
)
%in%
c
(
"Indel_prop"
,
"Indel_prop.ConvLeaves."
)]
df_melt
=
melt
(
df
,
id.vars
=
c
(
"Sites"
))
return
(
df_melt
)
}
else
{
return
(
NULL
)
}
}
df_H0_melt
=
read_hyp
(
opt
$
H0
)
df_H0NeBig_melt
=
read_hyp
(
opt
$
H0NeBig
)
df_H0NeSmall_melt
=
read_hyp
(
opt
$
H0NeSmall
)
df_H0NeBigInSmall_melt
=
read_hyp
(
opt
$
H0NeBigInSmall
)
df_H0NeSmallInBig_melt
=
read_hyp
(
opt
$
H0NeSmallInBig
)
df_HaPCOC_melt
=
read_hyp
(
opt
$
HaPCOC
)
df_HaPC_melt
=
read_hyp
(
opt
$
HaPC
)
df_HaPCNeBig_melt
=
read_hyp
(
opt
$
HaPCNeBig
)
df_HaPCNeSmall_melt
=
read_hyp
(
opt
$
HaPCNeSmall
)
df_HaPCNeBigInSmall_melt
=
read_hyp
(
opt
$
HaPCNeBigInSmall
)
df_HaPCNeSmallInBig_melt
=
read_hyp
(
opt
$
HaPCNeSmallInBig
)
build_df_couple
=
function
(
df_h0
,
df_ha
,
name
)
{
if
((
!
is.null
(
df_h0
))
&
(
!
is.null
(
df_ha
)))
{
df
=
do.call
(
rbind.data.frame
,
lapply
(
seq
(
0
,
0.999
,
0.
0
01
),
calc_TN_FP_TP_FN
,
df_H0_melt
=
df_h0
,
df_Ha_melt
=
df_ha
))
df
=
do.call
(
rbind.data.frame
,
lapply
(
seq
(
0
,
1
,
0.01
),
calc_TN_FP_TP_FN
,
df_H0_melt
=
df_h0
,
df_Ha_melt
=
df_ha
))
df
$
couple
=
name
}
else
{
df
=
NULL
...
...
@@ -121,7 +159,9 @@ df = rbind.data.frame(df_H0HaPC, df_H0HaPCOC,df_H0HaPC_NeBig,df_H0HaPC_NeSmall,
print
(
head
(
df
))
print
(
tail
(
df
))
## Sensitivity (= recall)
## Sensitivity (= recall)
df
$
sens
=
df
$
TP
/
(
df
$
TP
+
df
$
FN
)
df
$
sens
[
is.na
(
df
$
sens
)]
=
0
...
...
@@ -133,9 +173,9 @@ df$spe[is.na(df$spe)] = 0
n_sites
=
sum
(
df
[
1
,
c
(
"TP"
,
"FN"
)])
p
=
140
/
n_sites
n
=
(
6000
-140
)
/
n_sites
df
$
FP_2
=
df
$
FP
*
n
df
$
FP_2
=
df
$
FP
*
n
df
$
TP_2
=
df
$
TP
*
p
df
$
FN_2
=
df
$
FN
*
p
df
$
FN_2
=
df
$
FN
*
p
df
$
TN_2
=
df
$
TN
*
n
df
$
mcc
=
(
df
$
TP_2
*
df
$
TN_2
-
df
$
FP_2
*
df
$
FN_2
)
/
sqrt
((
df
$
TP_2
+
df
$
FP_2
)
*
(
df
$
TP_2
+
df
$
FN_2
)
*
(
df
$
TN_2
+
df
$
FP_2
)
*
(
df
$
TN_2
+
df
$
FN_2
))
...
...
@@ -156,9 +196,7 @@ df_out = df_out[order(df_out$methode),]
print
(
summary
(
df_out
))
df_out_melt
=
melt
(
df_out
,
id.vars
=
c
(
"couple"
,
"methode"
,
"threshold"
))
########################################################################
print
(
"prep plot max mcc"
)
df_max_mcc_per_method
=
do.call
(
rbind
,
lapply
(
split
(
df_out
,
paste0
(
df_out
$
methode
,
df_out
$
couple
)),
...
...
@@ -170,91 +208,155 @@ print(df_max_mcc_per_method)
df_max_mcc_per_method_2
=
df_max_mcc_per_method
df_max_mcc_per_method_2
$
variable
=
"MCC"
########################################################################
print
(
"prep plot recall_precision_per_meth"
)
df_recall_sup09_per_meth
=
do.call
(
rbind
,
lapply
(
split
(
df_out
,
paste0
(
df_out
$
methode
,
df_out
$
couple
)),
function
(
x
)
{
print
(
x
)
x
=
x
[
x
$
precision
>
0.9
,]
if
(
nrow
(
x
)
>
0
)
{
print
(
x
)
return
(
x
[
which.max
(
x
$
sensitivity
),
c
(
"couple"
,
"methode"
,
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
,
"precision"
)])
x2
=
x
[
x
$
precision
>
0.9
,]
if
(
nrow
(
x2
)
>
0
)
{
return
(
x2
[
which.max
(
x2
$
sensitivity
),
c
(
"couple"
,
"methode"
,
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
,
"precision"
)])
}
else
{
return
()
x
=
x
[
1
,
c
(
"couple"
,
"methode"
,
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
,
"precision"
)]
print
(
x
)
x
[,
c
(
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
,
"precision"
)]
=
NA
return
(
x
)
}
}))
print
(
df_recall_sup09_per_meth
)
########################################################################
print
(
"plot recall_precision_per_meth"
)
alpha
=
0.5
plot
=
ggplot
(
df_out
,
aes
(
x
=
sensitivity
,
y
=
precision
,
col
=
couple
))
plot
=
plot
+
theme_bw
()
plot
=
plot
+
labs
(
x
=
"Sensitivity ( = Recall)"
,
y
=
"Precision"
)
plot
=
plot
+
ylim
(
c
(
0
,
1
))
+
xlim
(
c
(
0
,
1
))
plot
=
plot
+
guides
(
fill
=
FALSE
)
+
guides
(
col
=
FALSE
)
plot
=
plot
+
scale_color_brewer
(
palette
=
"Set1"
)
plot
=
plot
+
geom_point
(
size
=
0.5
)
plot
=
plot
+
geom_line
()
plot
=
plot
+
geom_hline
(
aes
(
yintercept
=
0.9
),
col
=
"black"
,
alpha
=
alpha
,
show.legend
=
NA
,
linetype
=
"dotted"
)
plot
=
plot
+
facet_grid
(
couple
~
methode
)
plot_recall_precision
=
plot
plot_out
=
function
(
df_out
,
df_d
,
df_recall_sup09_per_meth
,
meths
=
NULL
,
suffix
=
""
)
{
nb_c
=
length
(
unique
(
df_out
$
couple
))
colors
=
c
(
c
(
"#984EA3"
,
"#4AA947"
,
"#377EB8"
,
"#E41A1C"
,
"#F5BE5B"
,
"#90EE90"
)[
1
:
nb_c
],
c
(
"#7F7F7F"
,
"#ADD8E6"
))
save_plot
(
paste0
(
opt
$
out
,
".recall_precision_per_meth.pdf"
),
plot_recall_precision
,
ncol
=
0.4
*
length
(
unique
(
df_out_melt
$
methode
)),
nrow
=
0.4
,
base_aspect_ratio
=
1
,
limitsize
=
FALSE
)
if
(
!
is.null
(
meths
))
{
print
(
"plot per indicator"
)
x_labs
=
"Threshold"
y_labs
=
""
plot
=
ggplot
(
df_out_melt
,
aes
(
x
=
threshold
,
y
=
value
,
col
=
couple
))
plot
=
plot
+
theme_bw
()
plot
=
plot
+
theme
(
legend.position
=
"top"
)
plot
=
plot
+
guides
(
fill
=
FALSE
)
+
theme
(
legend.position
=
"top"
)
plot
=
plot
+
labs
(
x
=
x_labs
,
y
=
y_labs
)
#+ ylim(y_lim)
plot
=
plot
+
geom_point
(
size
=
0.5
)
plot
=
plot
+
scale_color_brewer
(
palette
=
"Set1"
)
#plot = plot + geom_hline( data = df_max_mcc_per_method_2, aes(yintercept = MCC), alpha=alpha, show.legend = NA)
plot
=
plot
+
geom_vline
(
data
=
df_recall_sup09_per_meth
,
aes
(
xintercept
=
threshold
,
col
=
couple
),
alpha
=
alpha
,
show.legend
=
NA
,
linetype
=
"dotted"
)
plot
=
plot
+
facet_grid
(
variable
~
methode
,
scales
=
"free"
)
plot_max_MCC
=
plot
save_plot
(
paste0
(
opt
$
out
,
".max_MCC_per_meth.pdf"
),
plot_max_MCC
,
ncol
=
0.4
*
length
(
unique
(
df_out_melt
$
methode
)),
nrow
=
1.7
,
base_aspect_ratio
=
1.5
,
limitsize
=
FALSE
)
plot
=
plot_grid
(
plot_recall_precision
,
plot_max_MCC
,
labels
=
c
(
"A"
,
"B"
),
rel_heights
=
c
(
length
(
unique
(
df_out
$
couple
))
*
0.8
,
3
),
nrow
=
2
)
df_out
=
df_out
[
df_out
$
methode
%in%
meths
,]
df_d
=
df_d
[
df_d
$
methode
%in%
meths
,]
df_recall_sup09_per_meth
=
df_recall_sup09_per_meth
[
df_recall_sup09_per_meth
$
methode
%in%
meths
,]
}
else
{
meths
=
sort
(
unique
(
df_out
$
methode
))
}
save_plot
(
paste0
(
opt
$
out
,
".pdf"
),
plot
,
ncol
=
0.4
*
length
(
unique
(
df_out_melt
$
methode
)),
nrow
=
length
(
unique
(
df_out
$
couple
))
*
0.5
+
1
,
base_aspect_ratio
=
1
,
limitsize
=
FALSE
)
alpha
=
0.5
df_out
$
methode
=
factor
(
df_out
$
methode
,
levels
=
meths
)
df_recall_sup09_per_meth
$
methode
=
factor
(
df_recall_sup09_per_meth
$
methode
,
levels
=
meths
)
df_d
$
methode
=
factor
(
df_d
$
methode
,
levels
=
meths
)
couple_levels
=
unique
(
df_out
$
couple
)
couple_levels
=
c
(
couple_levels
[
couple_levels
!=
"H0/HaPCOC"
]
,
"H0/HaPCOC"
)
df_out
$
couple
=
factor
(
df_out
$
couple
,
levels
=
couple_levels
)
df_d
$
couple
=
factor
(
df_d
$
couple
,
levels
=
couple_levels
)
df_recall_sup09_per_meth
$
couple
=
factor
(
df_recall_sup09_per_meth
$
couple
,
levels
=
couple_levels
)
df_out_melt
=
melt
(
df_out
,
id.vars
=
c
(
"couple"
,
"methode"
,
"threshold"
))
df_out_melt
$
methode
=
factor
(
df_out_melt
$
methode
,
levels
=
meths
)
df_out_melt
$
variable
=
factor
(
df_out_melt
$
variable
,
levels
=
c
(
"sensitivity"
,
"specificity"
,
"precision"
,
"MCC"
))
df_out_melt
$
couple
=
factor
(
df_out_melt
$
couple
,
levels
=
couple_levels
)
print
(
"plot recall_precision_per_meth"
)
plot
=
ggplot
(
df_out
,
aes
(
x
=
sensitivity
,
y
=
precision
,
col
=
couple
))
plot
=
plot
+
theme_bw
()
plot
=
plot
+
labs
(
x
=
"Sensitivity (= Recall)"
,
y
=
"Precision"
)
plot
=
plot
+
theme
(
legend.position
=
"top"
)
plot
=
plot
+
ylim
(
c
(
0
,
1
))
+
xlim
(
c
(
0
,
1
))
plot
=
plot
+
guides
(
fill
=
FALSE
)
+
guides
(
col
=
FALSE
)
plot
=
plot
+
scale_color_manual
(
values
=
colors
)
plot
=
plot
+
geom_point
(
size
=
0.5
,
alpha
=
alpha
)
plot
=
plot
+
geom_line
()
plot
=
plot
+
geom_hline
(
aes
(
yintercept
=
0.9
),
col
=
"black"
,
size
=
1
,
show.legend
=
NA
,
linetype
=
"dashed"
)
plot
=
plot
+
facet_grid
(
couple
~
methode
)
plot
=
plot
+
theme
(
axis.text.x
=
element_text
(
angle
=
45
,
hjust
=
1
))
plot_recall_precision
=
plot
save_plot
(
paste0
(
opt
$
out
,
suffix
,
".recall_precision_per_meth.pdf"
),
plot_recall_precision
,
ncol
=
0.4
*
length
(
unique
(
df_out_melt
$
methode
)),
nrow
=
0.4
*
length
(
unique
(
df_out_melt
$
couple
)),
base_aspect_ratio
=
1
,
limitsize
=
FALSE
)
print
(
"plot per indicator"
)
x_labs
=
"Threshold"
y_labs
=
""
plot
=
ggplot
(
df_out_melt
,
aes
(
x
=
threshold
,
y
=
value
,
col
=
couple
))
plot
=
plot
+
theme_bw
()
plot
=
plot
+
theme
(
legend.position
=
"top"
)
plot
=
plot
+
guides
(
fill
=
FALSE
)
+
theme
(
legend.position
=
"top"
)
plot
=
plot
+
labs
(
x
=
x_labs
,
y
=
y_labs
)
#+ ylim(y_lim)
plot
=
plot
+
geom_point
(
size
=
0.5
,
alpha
=
alpha
)
plot
=
plot
+
scale_color_manual
(
values
=
colors
)
#plot = plot + geom_hline( data = df_max_mcc_per_method_2, aes(yintercept = MCC), alpha=alpha, show.legend = NA)
plot
=
plot
+
geom_vline
(
data
=
df_recall_sup09_per_meth
,
aes
(
xintercept
=
threshold
,
col
=
couple
),
size
=
1
,
show.legend
=
NA
,
linetype
=
"dashed"
)
plot
=
plot
+
facet_grid
(
variable
~
methode
,
scales
=
"free"
)
plot
=
plot
+
theme
(
axis.text.x
=
element_text
(
angle
=
45
,
hjust
=
1
))
plot_max_MCC
=
plot
save_plot
(
paste0
(
opt
$
out
,
suffix
,
".max_MCC_per_meth.pdf"
),
plot_max_MCC
,
ncol
=
0.4
*
length
(
unique
(
df_out_melt
$
methode
)),
nrow
=
1.7
,
base_aspect_ratio
=
1.5
,
limitsize
=
FALSE
)
print
(
"plot value/distance"
)
plot
=
ggplot
(
df_d
,
aes
(
y
=
P_distance
,
x
=
value
,
shape
=
val_H0Ha
,
col
=
val_H0Ha
))
plot
=
plot
+
theme_bw
()
plot
=
plot
+
theme
(
legend.position
=
"top"
)
plot
=
plot
+
labs
(
y
=
"P distance"
,
x
=
"Value"
)
plot
=
plot
+
xlim
(
c
(
0
,
1
))
+
ylim
(
c
(
0
,
max
(
max
(
df_d
$
P_distance
),
1
)))
plot
=
plot
+
guides
(
col
=
FALSE
)
plot
=
plot
+
scale_color_manual
(
values
=
colors
)
plot
=
plot