Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
a46650d4
Commit
a46650d4
authored
Jul 31, 2018
by
Carine Rey
Browse files
use recall/precision to choose the threshold + new hypothesis (bigNeinSmallNe and SmallNeInBigNe)
parent
3d0b2086
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
273 additions
and
58 deletions
+273
-58
lib/convergence_hypothesis.ml
lib/convergence_hypothesis.ml
+16
-10
lib/pipeline.ml
lib/pipeline.ml
+7
-1
lib/post_analyses.ml
lib/post_analyses.ml
+84
-15
lib/scripts/calc_t_per_meth.R
lib/scripts/calc_t_per_meth.R
+163
-31
lib/scripts/merge_det_results.py
lib/scripts/merge_det_results.py
+1
-1
lib/tree_dataset.ml
lib/tree_dataset.ml
+2
-0
No files found.
lib/convergence_hypothesis.ml
View file @
a46650d4
...
...
@@ -10,6 +10,8 @@ type t =
|
H0_NeVerySmall
|
H0_BigNeInSmallNe
|
H0_SmallNeInBigNe
|
HaPC_BigNeInSmallNe
|
HaPC_SmallNeInBigNe
|
HaPCOC_NeSmall
|
HaPC_NeSmall
|
H0_NeBig
...
...
@@ -17,18 +19,20 @@ type t =
|
HaPC_NeBig
let
string_of_model
m
=
match
m
with
|
H0
->
"H0"
|
HaPC
->
"HaPC"
|
HaPCOC
->
"HaPCOC"
|
H0_NeSmall
->
"H0_NeSmall"
|
H0_NeVerySmall
->
"H0_NeVerySmall"
|
H0
->
"H0"
|
HaPC
->
"HaPC"
|
HaPCOC
->
"HaPCOC"
|
H0_NeSmall
->
"H0_NeSmall"
|
H0_NeVerySmall
->
"H0_NeVerySmall"
|
H0_BigNeInSmallNe
->
"H0_BigNeInSmallNe"
|
H0_SmallNeInBigNe
->
"H0_SmallNeInBigNe"
|
HaPCOC_NeSmall
->
"HaPCOC_NeSmall"
|
HaPC_NeSmall
->
"HaPC_NeSmall"
|
H0_NeBig
->
"H0_NeBig"
|
HaPCOC_NeBig
->
"HaPCOC_NeBig"
|
HaPC_NeBig
->
"HaPC_NeBig"
|
HaPC_BigNeInSmallNe
->
"HaPC_BigNeInSmallNe"
|
HaPC_SmallNeInBigNe
->
"HaPC_SmallNeInBigNe"
|
HaPCOC_NeSmall
->
"HaPCOC_NeSmall"
|
HaPC_NeSmall
->
"HaPC_NeSmall"
|
H0_NeBig
->
"H0_NeBig"
|
HaPCOC_NeBig
->
"HaPCOC_NeBig"
|
HaPC_NeBig
->
"HaPC_NeBig"
let
assign
k
v
=
...
...
@@ -92,6 +96,8 @@ let bpp_config_F nodes hyp = [
|
H0_NeVerySmall
->
bpp_config_H0_F_Ne
|
H0_BigNeInSmallNe
->
bpp_config_H0_F_Ne
|
H0_SmallNeInBigNe
->
bpp_config_H0_F_Ne
|
HaPC_BigNeInSmallNe
->
bpp_config_HaPC_F_Ne
|
HaPC_SmallNeInBigNe
->
bpp_config_HaPC_F_Ne
|
HaPC_NeSmall
->
bpp_config_HaPC_F_Ne
|
HaPCOC_NeSmall
->
bpp_config_HaPCOC_F_Ne
|
H0_NeBig
->
bpp_config_H0_F_Ne
...
...
lib/pipeline.ml
View file @
a46650d4
...
...
@@ -67,11 +67,15 @@ let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile ~pr
|
HaPCOC_NeBig
->
10
.
|
H0_BigNeInSmallNe
->
10
.
|
H0_SmallNeInBigNe
->
0
.
1
|
HaPC_BigNeInSmallNe
->
10
.
|
HaPC_SmallNeInBigNe
->
0
.
1
|
_
->
1
.
in
let
ne_a
=
match
model
with
|
H0_BigNeInSmallNe
->
0
.
1
|
H0_SmallNeInBigNe
->
10
.
|
HaPC_BigNeInSmallNe
->
0
.
1
|
HaPC_SmallNeInBigNe
->
10
.
|
_
->
1
.
in
let
profile_f
=
profile
.
profile_f
in
...
...
@@ -114,9 +118,11 @@ let derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~ns ~no_Ne ~n
]
);
(
if
ne_test
then
[
H0_NeVerySmall
;
[
H0_BigNeInSmallNe
;
H0_SmallNeInBigNe
;
HaPC_BigNeInSmallNe
;
HaPC_SmallNeInBigNe
;
]
else
[]
...
...
lib/post_analyses.ml
View file @
a46650d4
...
...
@@ -46,19 +46,38 @@ let is_hyp ~hyp (dataset_results :dataset_res) =
let
model_prefix
=
dataset_results
.
model_prefix
in
model_prefix
=
hyp
let
make_t_choices
~
h0_merged_results
~
ha_merged_results
:
post_analyses_dir
directory
workflow
=
let
build_cmd_t_choices
(
opt_name
:
string
)
mr_option
=
match
mr_option
with
|
Some
x
->
[
opt
opt_name
dep
x
]
|
None
->
[]
let
make_t_choices
~
h0_mr
~
h0_NeBig_mr
~
h0_NeSmall_mr
~
haPCOC_mr
~
haPC_mr
~
haPC_NeBig_mr
~
haPC_NeSmall_mr
~
h0_NeBigInSmall_mr
~
h0_NeSmallInBig_mr
~
haPC_NeBigInSmall_mr
~
haPC_NeSmallInBig_mr
()
:
post_analyses_dir
directory
workflow
=
let
env
=
r_env
in
let
out
=
dest
//
"out"
in
let
cmd_mr
=
List
.
map
[
(
"--H0"
,
h0_mr
);
(
"--H0NeBig"
,
h0_NeBig_mr
);
(
"--H0NeSmall"
,
h0_NeSmall_mr
);
(
"--H0NeBigInSmall"
,
h0_NeBigInSmall_mr
);
(
"--H0NeSmallInBig"
,
h0_NeSmallInBig_mr
);
(
"--HaPCOC"
,
haPCOC_mr
);
(
"--HaPC"
,
haPC_mr
);
(
"--HaPCNeBig"
,
haPC_NeBig_mr
);
(
"--HaPCNeSmall"
,
haPC_NeSmall_mr
);
(
"--HaPCNeBigInSmall"
,
haPC_NeBigInSmall_mr
);
(
"--HaPCNeSmallInBig"
,
haPC_NeSmallInBig_mr
);
]
~
f
:
(
fun
(
opt_name
,
mr_option
)
->
build_cmd_t_choices
opt_name
mr_option
)
|>
List
.
concat
in
workflow
~
descr
:
"post_analyses.t_choices"
[
docker
env
(
and_list
[
mkdir_p
dest
;
cmd
"Rscript"
[
file_dump
(
string
Scripts
.
calc_t_per_meth
)
;
opt
"--H0"
dep
h0_merged_results
;
opt
"--Ha"
dep
ha_merged_results
;
opt
"--out "
ident
out
;
];
cmd
"Rscript"
([
[
file_dump
(
string
Scripts
.
calc_t_per_meth
)
;
opt
"--out "
ident
out
;
]
;
cmd_mr
;
]
|>
List
.
concat
)
;
])
]
...
...
@@ -99,15 +118,65 @@ let group_simu_infos ~simu_infos_l : simu_infos directory workflow =
)
]
let
get_merged_results_opt
hx
=
match
hx
with
|
Some
w
->
Some
w
.
merged_results
|
None
->
None
type
res_all_hyp
=
{
h0_res
:
dataset_res
option
;
h0_NeBig_res
:
dataset_res
option
;
h0_NeSmall_res
:
dataset_res
option
;
ha_PC_res
:
dataset_res
option
;
ha_PCOC_res
:
dataset_res
option
;
ha_PC_NeBig_res
:
dataset_res
option
;
ha_PC_NeSmall_res
:
dataset_res
option
;
h0_NeBigInSmall_res
:
dataset_res
option
;
h0_NeSmallInBig_res
:
dataset_res
option
;
ha_PC_NeBigInSmall_res
:
dataset_res
option
;
ha_PC_NeSmallInBig_res
:
dataset_res
option
;
}
let
make_t_choices_per_couple
{
h0_res
;
h0_NeBig_res
;
h0_NeSmall_res
;
ha_PC_res
;
ha_PCOC_res
;
ha_PC_NeBig_res
;
ha_PC_NeSmall_res
;
h0_NeBigInSmall_res
;
h0_NeSmallInBig_res
;
ha_PC_NeBigInSmall_res
;
ha_PC_NeSmallInBig_res
}
=
let
h0_mr
=
get_merged_results_opt
h0_res
in
let
h0_NeBig_mr
=
get_merged_results_opt
h0_NeBig_res
in
let
h0_NeSmall_mr
=
get_merged_results_opt
h0_NeSmall_res
in
let
h0_NeBigInSmall_mr
=
get_merged_results_opt
h0_NeBigInSmall_res
in
let
h0_NeSmallInBig_mr
=
get_merged_results_opt
h0_NeSmallInBig_res
in
let
haPCOC_mr
=
get_merged_results_opt
ha_PCOC_res
in
let
haPC_mr
=
get_merged_results_opt
ha_PC_res
in
let
haPC_NeBig_mr
=
get_merged_results_opt
ha_PC_NeBig_res
in
let
haPC_NeSmall_mr
=
get_merged_results_opt
ha_PC_NeSmall_res
in
let
haPC_NeBigInSmall_mr
=
get_merged_results_opt
ha_PC_NeBigInSmall_res
in
let
haPC_NeSmallInBig_mr
=
get_merged_results_opt
ha_PC_NeSmallInBig_res
in
make_t_choices
~
h0_mr
~
h0_NeBig_mr
~
h0_NeSmall_mr
~
haPCOC_mr
~
haPC_mr
~
haPC_NeBig_mr
~
haPC_NeSmall_mr
~
h0_NeBigInSmall_mr
~
h0_NeSmallInBig_mr
~
haPC_NeBigInSmall_mr
~
haPC_NeSmallInBig_mr
()
let
get_t_choices
~
(
dataset_results_l
:
dataset_res
list
)
:
t_choices
option
=
let
h0_res
=
List
.
find
dataset_results_l
(
is_hyp
~
hyp
:
"H0"
)
in
let
ha_res
=
List
.
find
dataset_results_l
(
is_hyp
~
hyp
:
"HaPCOC"
)
in
match
(
h0_res
,
ha_res
)
with
|
(
Some
h0
,
Some
ha
)
->
let
h0_merged_results
=
h0
.
merged_results
in
let
ha_merged_results
=
ha
.
merged_results
in
let
t_choices_dir
=
make_t_choices
~
h0_merged_results
~
ha_merged_results
in
let
t_choices_max
=
t_choices_dir
/
selector
[
"out.max_per_meth.tsv"
]
in
let
h0_res
=
List
.
find
dataset_results_l
(
is_hyp
~
hyp
:
"H0"
)
in
let
h0_NeBig_res
=
List
.
find
dataset_results_l
(
is_hyp
~
hyp
:
"H0_NeBig"
)
in
let
h0_NeSmall_res
=
List
.
find
dataset_results_l
(
is_hyp
~
hyp
:
"H0_NeSmall"
)
in
let
h0_NeBigInSmall_res
=
List
.
find
dataset_results_l
(
is_hyp
~
hyp
:
"H0_BigNeInSmallNe"
)
in
let
h0_NeSmallInBig_res
=
List
.
find
dataset_results_l
(
is_hyp
~
hyp
:
"H0_SmallNeInBigNe"
)
in
let
ha_PCOC_res
=
List
.
find
dataset_results_l
(
is_hyp
~
hyp
:
"HaPCOC"
)
in
let
ha_PC_res
=
List
.
find
dataset_results_l
(
is_hyp
~
hyp
:
"HaPC"
)
in
let
ha_PC_NeBig_res
=
List
.
find
dataset_results_l
(
is_hyp
~
hyp
:
"HaPC_NeBig"
)
in
let
ha_PC_NeSmall_res
=
List
.
find
dataset_results_l
(
is_hyp
~
hyp
:
"HaPC_NeSmall"
)
in
let
ha_PC_NeBigInSmall_res
=
List
.
find
dataset_results_l
(
is_hyp
~
hyp
:
"HaPC_BigNeInSmallNe"
)
in
let
ha_PC_NeSmallInBig_res
=
List
.
find
dataset_results_l
(
is_hyp
~
hyp
:
"HaPC_SmallNeInBigNe"
)
in
match
(
h0_res
,
ha_PCOC_res
)
with
|
(
Some
h0
,
Some
_
)
->
let
t_choices_dir
=
make_t_choices_per_couple
{
h0_res
;
h0_NeBig_res
;
h0_NeSmall_res
;
ha_PC_res
;
ha_PCOC_res
;
ha_PC_NeBig_res
;
ha_PC_NeSmall_res
;
h0_NeBigInSmall_res
;
h0_NeSmallInBig_res
;
ha_PC_NeBigInSmall_res
;
ha_PC_NeSmallInBig_res
}
in
let
t_choices_max
=
t_choices_dir
/
selector
[
"out.recall09_per_meth.tsv"
]
in
let
t_choices_complete
=
t_choices_dir
/
selector
[
"out.complete.tsv"
]
in
let
t_choices_plot
=
t_choices_dir
/
selector
[
"out.pdf"
]
in
let
tree_prefix
=
h0
.
tree_prefix
in
...
...
lib/scripts/calc_t_per_meth.R
View file @
a46650d4
...
...
@@ -9,11 +9,19 @@ library("ggplot2")
library
(
"cowplot"
)
option_list
=
list
(
make_option
(
c
(
"--H0"
),
type
=
"character"
,
default
=
NULL
,
help
=
"merged_results H0"
,
metavar
=
"character"
),
make_option
(
c
(
"--Ha"
),
type
=
"character"
,
default
=
NULL
,
help
=
"merged_results Ha"
,
metavar
=
"character"
),
make_option
(
c
(
"-o"
,
"--out"
),
type
=
"character"
,
default
=
"out"
,
make_option
(
c
(
"--H0"
)
,
type
=
"character"
,
default
=
NULL
,
help
=
"merged_results H0"
,
metavar
=
"character"
),
make_option
(
c
(
"--H0NeBig"
)
,
type
=
"character"
,
default
=
NULL
,
help
=
"merged_results H0NeBig"
,
metavar
=
"character"
),
make_option
(
c
(
"--H0NeSmall"
)
,
type
=
"character"
,
default
=
NULL
,
help
=
"merged_results H0NeSmall"
,
metavar
=
"character"
),
make_option
(
c
(
"--H0NeBigInSmall"
)
,
type
=
"character"
,
default
=
NULL
,
help
=
"merged_results H0NeBigInSmall"
,
metavar
=
"character"
),
make_option
(
c
(
"--H0NeSmallInBig"
)
,
type
=
"character"
,
default
=
NULL
,
help
=
"merged_results H0NeSmallInBig"
,
metavar
=
"character"
),
make_option
(
c
(
"--HaPCOC"
)
,
type
=
"character"
,
default
=
NULL
,
help
=
"merged_results HaPCOC"
,
metavar
=
"character"
),
make_option
(
c
(
"--HaPC"
)
,
type
=
"character"
,
default
=
NULL
,
help
=
"merged_results HaPC"
,
metavar
=
"character"
),
make_option
(
c
(
"--HaPCNeBig"
)
,
type
=
"character"
,
default
=
NULL
,
help
=
"merged_results HaPCNeBig"
,
metavar
=
"character"
),
make_option
(
c
(
"--HaPCNeSmall"
)
,
type
=
"character"
,
default
=
NULL
,
help
=
"merged_results HaPCNeSmall"
,
metavar
=
"character"
),
make_option
(
c
(
"--HaPCNeBigInSmall"
)
,
type
=
"character"
,
default
=
NULL
,
help
=
"merged_results HaPCNeBigInSmall"
,
metavar
=
"character"
),
make_option
(
c
(
"--HaPCNeSmallInBig"
)
,
type
=
"character"
,
default
=
NULL
,
help
=
"merged_results HaPCNeSmallInBig"
,
metavar
=
"character"
),
make_option
(
c
(
"-o"
,
"--out"
),
type
=
"character"
,
default
=
"out"
,
help
=
"output prefix [default= %default]"
,
metavar
=
"character"
)
);
...
...
@@ -24,9 +32,9 @@ if (is.null(opt$H0)){
print_help
(
opt_parser
)
stop
(
"At least one argument must be supplied (H0 input file)"
,
call.
=
FALSE
)
}
if
(
is.null
(
opt
$
Ha
)){
if
(
is.null
(
opt
$
Ha
PCOC
)){
print_help
(
opt_parser
)
stop
(
"At least one argument must be supplied (Ha input file)"
,
call.
=
FALSE
)
stop
(
"At least one argument must be supplied (Ha
PCOC
input file)"
,
call.
=
FALSE
)
}
## fun...
...
...
@@ -34,7 +42,7 @@ if (is.null(opt$Ha)){
calc_TN_FP
=
function
(
vals
,
t
){
TN
=
0
FP
=
0
vals
[
is.na
(
vals
)
]
=
0
vals
=
na.omit
(
vals
)
if
(
length
(
vals
)
>
0
)
{
TN
=
sum
(
vals
<=
t
)
FP
=
sum
(
vals
>
t
)
...
...
@@ -64,44 +72,97 @@ calc_TN_FP_TP_FN = function(t, df_H0_melt, df_Ha_melt){
## program...
df_H0
=
read.table
(
opt
$
H0
,
header
=
TRUE
,
sep
=
'\t'
)
df_Ha
=
read.table
(
opt
$
Ha
,
header
=
TRUE
,
sep
=
'\t'
)
read_hyp
=
function
(
opt_name
)
{
if
(
!
is.null
(
opt_name
)){
df
=
read.table
(
opt_name
,
header
=
TRUE
,
sep
=
'\t'
,
na.strings
=
"NA"
)
df
=
df
[,
!
colnames
(
df
)
%in%
c
(
"Indel_prop"
,
"Indel_prop.ConvLeaves."
)]
df_melt
=
melt
(
df
,
id.vars
=
c
(
"Sites"
))
return
(
df_melt
)
}
else
{
return
(
NULL
)
}
}
df_H0_melt
=
read_hyp
(
opt
$
H0
)
df_H0NeBig_melt
=
read_hyp
(
opt
$
H0NeBig
)
df_H0NeSmall_melt
=
read_hyp
(
opt
$
H0NeSmall
)
df_H0NeBigInSmall_melt
=
read_hyp
(
opt
$
H0NeBigInSmall
)
df_H0NeSmallInBig_melt
=
read_hyp
(
opt
$
H0NeSmallInBig
)
df_HaPCOC_melt
=
read_hyp
(
opt
$
HaPCOC
)
df_HaPC_melt
=
read_hyp
(
opt
$
HaPC
)
df_HaPCNeBig_melt
=
read_hyp
(
opt
$
HaPCNeBig
)
df_HaPCNeSmall_melt
=
read_hyp
(
opt
$
HaPCNeSmall
)
df_HaPCNeBigInSmall_melt
=
read_hyp
(
opt
$
HaPCNeBigInSmall
)
df_HaPCNeSmallInBig_melt
=
read_hyp
(
opt
$
HaPCNeSmallInBig
)
df_H0
=
df_H0
[,
!
colnames
(
df_H0
)
%in%
c
(
"Indel_prop"
,
"Indel_prop.ConvLeaves."
)]
df_Ha
=
df_Ha
[,
!
colnames
(
df_Ha
)
%in%
c
(
"Indel_prop"
,
"Indel_prop.ConvLeaves."
)]
df_H0_melt
=
melt
(
df_H0
,
id.vars
=
c
(
"Sites"
))
df_Ha_melt
=
melt
(
df_Ha
,
id.vars
=
c
(
"Sites"
))
build_df_couple
=
function
(
df_h0
,
df_ha
,
name
)
{
if
((
!
is.null
(
df_h0
))
&
(
!
is.null
(
df_ha
)))
{
df
=
do.call
(
rbind.data.frame
,
lapply
(
seq
(
0
,
0.999
,
0.001
),
calc_TN_FP_TP_FN
,
df_H0_melt
=
df_h0
,
df_Ha_melt
=
df_ha
))
df
$
couple
=
name
}
else
{
df
=
NULL
}
return
(
df
)
}
df
=
do.call
(
rbind.data.frame
,
lapply
(
seq
(
0
,
0.999
,
0.01
),
calc_TN_FP_TP_FN
,
df_H0_melt
=
df_H0_melt
,
df_Ha_melt
=
df_Ha_melt
))
df_H0HaPC
=
build_df_couple
(
df_H0_melt
,
df_HaPC_melt
,
"H0/HaPC"
)
df_H0HaPC_NeBig
=
build_df_couple
(
df_H0NeBig_melt
,
df_HaPCNeBig_melt
,
"H0/HaPC NeBig"
)
df_H0HaPC_NeSmall
=
build_df_couple
(
df_H0NeSmall_melt
,
df_HaPCNeSmall_melt
,
"H0/HaPC NeSmall"
)
df_H0HaPC_NeBigInSmall
=
build_df_couple
(
df_H0NeBigInSmall_melt
,
df_HaPCNeBigInSmall_melt
,
"H0/HaPC NeBigInSmall"
)
df_H0HaPC_NeSmallInBig
=
build_df_couple
(
df_H0NeSmallInBig_melt
,
df_HaPCNeSmallInBig_melt
,
"H0/HaPC NeSmallInBig"
)
df_H0HaPCOC
=
build_df_couple
(
df_H0_melt
,
df_HaPCOC_melt
,
"H0/HaPCOC"
)
df
=
rbind.data.frame
(
df_H0HaPC
,
df_H0HaPCOC
,
df_H0HaPC_NeBig
,
df_H0HaPC_NeSmall
,
df_H0HaPC_NeBigInSmall
,
df_H0HaPC_NeSmallInBig
)
print
(
head
(
df
))
print
(
tail
(
df
))
## Sensitivity (= recall)
df
$
sens
=
df
$
TP
/
(
df
$
TP
+
df
$
FN
)
df
$
sens
[
is.na
(
df
$
sens
)]
=
0
## Specificity
df
$
spe
=
df
$
TN
/
(
df
$
FP
+
df
$
TN
)
df
$
spe
[
is.na
(
df
$
spe
)]
=
0
## MCC
n_sites
=
sum
(
df
[
1
,
c
(
"TP"
,
"FN"
)])
p
=
140
/
n_sites
n
=
(
6000
-140
)
/
n_sites
df
$
FP_2
=
df
$
FP
*
n
df
$
TP_2
=
df
$
TP
*
p
df
$
FN_2
=
df
$
FN
*
p
df
$
TN_2
=
df
$
TN
*
n
df
$
TP_2
=
df
$
TP
*
p
df
$
FN_2
=
df
$
FN
*
p
df
$
TN_2
=
df
$
TN
*
n
df
$
mcc
=
(
df
$
TP_2
*
df
$
TN_2
-
df
$
FP_2
*
df
$
FN_2
)
/
sqrt
((
df
$
TP_2
+
df
$
FP_2
)
*
(
df
$
TP_2
+
df
$
FN_2
)
*
(
df
$
TN_2
+
df
$
FP_2
)
*
(
df
$
TN_2
+
df
$
FN_2
))
df
$
mcc
[
is.na
(
df
$
mcc
)]
=
0
df_out
=
df
[,
c
(
"Row.names"
,
"t"
,
"sens"
,
"spe"
,
"mcc"
)]
colnames
(
df_out
)
=
c
(
"methode"
,
"threshold"
,
"sensitivity"
,
"specificity"
,
"MCC"
)
## Precision
df
$
precision
=
(
df
$
sens
*
0.02
)
/
(
df
$
sens
*
0.02
+
(
1
-
df
$
spe
)
*
0.98
)
print
(
head
(
df
))
print
(
tail
(
df
))
df_out
=
df
[,
c
(
"Row.names"
,
"t"
,
"sens"
,
"spe"
,
"mcc"
,
"precision"
,
"couple"
)]
colnames
(
df_out
)
=
c
(
"methode"
,
"threshold"
,
"sensitivity"
,
"specificity"
,
"MCC"
,
"precision"
,
"couple"
)
df_out
=
df_out
[
order
(
df_out
$
methode
),]
df_out_melt
=
melt
(
df_out
,
id.vars
=
c
(
"methode"
,
"threshold"
))
print
(
summary
(
df_out
))
df_out_melt
=
melt
(
df_out
,
id.vars
=
c
(
"couple"
,
"methode"
,
"threshold"
))
df_max_mcc_per_method
=
do.call
(
rbind
,
lapply
(
split
(
df_out
,
df_out
$
methode
),
function
(
x
)
{
return
(
x
[
which.max
(
x
$
MCC
),
c
(
"methode"
,
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
)])}))
print
(
"prep plot max mcc"
)
df_max_mcc_per_method
=
do.call
(
rbind
,
lapply
(
split
(
df_out
,
paste0
(
df_out
$
methode
,
df_out
$
couple
)),
function
(
x
)
{
return
(
x
[
which.max
(
x
$
MCC
),
c
(
"couple"
,
"methode"
,
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
,
"precision"
)])}))
print
(
df_max_mcc_per_method
)
...
...
@@ -109,22 +170,93 @@ print(df_max_mcc_per_method)
df_max_mcc_per_method_2
=
df_max_mcc_per_method
df_max_mcc_per_method_2
$
variable
=
"MCC"
alpha
=
0.7
print
(
"prep plot recall_precision_per_meth"
)
df_recall_sup09_per_meth
=
do.call
(
rbind
,
lapply
(
split
(
df_out
,
paste0
(
df_out
$
methode
,
df_out
$
couple
)),
function
(
x
)
{
print
(
x
)
x
=
x
[
x
$
precision
>
0.9
,]
if
(
nrow
(
x
)
>
0
)
{
print
(
x
)
return
(
x
[
which.max
(
x
$
sensitivity
),
c
(
"couple"
,
"methode"
,
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
,
"precision"
)])
}
else
{
return
()
}
}))
print
(
df_recall_sup09_per_meth
)
print
(
"plot recall_precision_per_meth"
)
alpha
=
0.5
plot
=
ggplot
(
df_out
,
aes
(
x
=
sensitivity
,
y
=
precision
,
col
=
couple
))
plot
=
plot
+
theme_bw
()
plot
=
plot
+
labs
(
x
=
"Sensitivity ( = Recall)"
,
y
=
"Precision"
)
plot
=
plot
+
ylim
(
c
(
0
,
1
))
+
xlim
(
c
(
0
,
1
))
plot
=
plot
+
guides
(
fill
=
FALSE
)
+
guides
(
col
=
FALSE
)
plot
=
plot
+
scale_color_brewer
(
palette
=
"Set1"
)
plot
=
plot
+
geom_point
(
size
=
0.5
)
plot
=
plot
+
geom_line
()
plot
=
plot
+
geom_hline
(
aes
(
yintercept
=
0.9
),
col
=
"black"
,
alpha
=
alpha
,
show.legend
=
NA
,
linetype
=
"dotted"
)
plot
=
plot
+
facet_grid
(
couple
~
methode
)
plot_recall_precision
=
plot
save_plot
(
paste0
(
opt
$
out
,
".recall_precision_per_meth.pdf"
),
plot_recall_precision
,
ncol
=
0.4
*
length
(
unique
(
df_out_melt
$
methode
)),
nrow
=
0.4
,
base_aspect_ratio
=
1
,
limitsize
=
FALSE
)
print
(
"plot per indicator"
)
x_labs
=
"Threshold"
y_labs
=
""
plot
=
ggplot
(
df_out_melt
,
aes
(
x
=
threshold
,
y
=
value
))
+
theme_bw
()
+
guides
(
fill
=
FALSE
)
+
labs
(
x
=
x_labs
,
y
=
y_labs
)
#+ ylim(y_lim)
plot
=
plot
+
geom_point
()
plot
=
plot
+
geom_hline
(
data
=
df_max_mcc_per_method_2
,
aes
(
yintercept
=
MCC
),
col
=
"red"
,
alpha
=
alpha
,
show.legend
=
NA
)
plot
=
plot
+
geom_vline
(
data
=
df_max_mcc_per_method
,
aes
(
xintercept
=
threshold
),
col
=
"red"
,
alpha
=
alpha
,
show.legend
=
NA
,
linetype
=
"dotted"
)
plot
=
ggplot
(
df_out_melt
,
aes
(
x
=
threshold
,
y
=
value
,
col
=
couple
))
plot
=
plot
+
theme_bw
()
plot
=
plot
+
theme
(
legend.position
=
"top"
)
plot
=
plot
+
guides
(
fill
=
FALSE
)
+
theme
(
legend.position
=
"top"
)
plot
=
plot
+
labs
(
x
=
x_labs
,
y
=
y_labs
)
#+ ylim(y_lim)
plot
=
plot
+
geom_point
(
size
=
0.5
)
plot
=
plot
+
scale_color_brewer
(
palette
=
"Set1"
)
#plot = plot + geom_hline( data = df_max_mcc_per_method_2, aes(yintercept = MCC), alpha=alpha, show.legend = NA)
plot
=
plot
+
geom_vline
(
data
=
df_recall_sup09_per_meth
,
aes
(
xintercept
=
threshold
,
col
=
couple
),
alpha
=
alpha
,
show.legend
=
NA
,
linetype
=
"dotted"
)
plot
=
plot
+
facet_grid
(
variable
~
methode
,
scales
=
"free"
)
save_plot
(
paste0
(
opt
$
out
,
".pdf"
),
plot
,
plot_max_MCC
=
plot
save_plot
(
paste0
(
opt
$
out
,
".max_MCC_per_meth.pdf"
),
plot_max_MCC
,
ncol
=
0.4
*
length
(
unique
(
df_out_melt
$
methode
)),
nrow
=
1.7
,
base_aspect_ratio
=
1.5
,
limitsize
=
FALSE
)
plot
=
plot_grid
(
plot_recall_precision
,
plot_max_MCC
,
labels
=
c
(
"A"
,
"B"
),
rel_heights
=
c
(
length
(
unique
(
df_out
$
couple
))
*
0.8
,
3
),
nrow
=
2
)
save_plot
(
paste0
(
opt
$
out
,
".pdf"
),
plot
,
ncol
=
0.4
*
length
(
unique
(
df_out_melt
$
methode
)),
nrow
=
length
(
unique
(
df_out
$
couple
))
*
0.5
+
1
,
base_aspect_ratio
=
1
,
limitsize
=
FALSE
)
write.table
(
df_out
,
file
=
paste0
(
opt
$
out
,
".complete.tsv"
),
row.names
=
FALSE
,
quote
=
F
,
sep
=
"\t"
)
write.table
(
df_max_mcc_per_method
,
file
=
paste0
(
opt
$
out
,
".max_per_meth.tsv"
),
row.names
=
FALSE
,
quote
=
F
,
sep
=
"\t"
)
write.table
(
df_max_mcc_per_method
,
file
=
paste0
(
opt
$
out
,
".max_MCC_per_meth.tsv"
),
row.names
=
FALSE
,
quote
=
F
,
sep
=
"\t"
)
write.table
(
df_recall_sup09_per_meth
,
file
=
paste0
(
opt
$
out
,
".recall09_per_meth.tsv"
),
row.names
=
FALSE
,
quote
=
F
,
sep
=
"\t"
)
lib/scripts/merge_det_results.py
View file @
a46650d4
...
...
@@ -166,4 +166,4 @@ if len(set(df_list_len)) != 1:
df_final
=
reduce
(
lambda
x
,
y
:
pd
.
merge
(
x
,
y
,
on
=
'Sites'
,
how
=
'outer'
),
df_list
)
df_final
.
to_csv
(
OutName
,
sep
=
'
\t
'
,
index
=
False
)
df_final
.
to_csv
(
OutName
,
sep
=
'
\t
'
,
index
=
False
,
na_rep
=
'NA'
)
lib/tree_dataset.ml
View file @
a46650d4
...
...
@@ -29,6 +29,8 @@ let nodes dataset (model : Convergence_hypothesis.t) =
|
H0_NeBig
->
[
"tree.H0_a.node_ids"
]
|
H0_BigNeInSmallNe
->
[
"tree.H0_a.node_ids"
]
|
H0_SmallNeInBigNe
->
[
"tree.H0_a.node_ids"
]
|
HaPC_BigNeInSmallNe
->
[
"tree.Ha.node_ids"
]
|
HaPC_SmallNeInBigNe
->
[
"tree.Ha.node_ids"
]
)
let
tree
dataset
mode
=
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment