Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
VEBER Philippe
codepi
Commits
72ca0d70
Commit
72ca0d70
authored
Jul 16, 2018
by
Carine Rey
Browse files
add post analyses
parent
8f68285e
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
200 additions
and
1 deletion
+200
-1
lib/pipeline.ml
lib/pipeline.ml
+10
-1
lib/post_analyses.ml
lib/post_analyses.ml
+60
-0
lib/scripts/calc_t_per_meth.R
lib/scripts/calc_t_per_meth.R
+130
-0
No files found.
lib/pipeline.ml
View file @
72ca0d70
...
...
@@ -244,15 +244,24 @@ let simulation_main ~outdir ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn ~preview
let
validation_main
~
outdir
~
indir
?
(
np
=
2
)
?
(
mem
=
2
)
~
preview
~
no_diffsel
~
tree_dir
~
profile_fn
~
use_concat
()
=
let
trees
=
Array
.
to_list
@@
Sys
.
readdir
tree_dir
in
let
repo
=
List
.
map
trees
~
f
:
(
fun
tree
->
let
trees
=
[
tree
]
in
let
tree_prefix
=
Filename
.
chop_extension
tree
in
let
dataset_l
=
derive_sim
~
tree_dir
~
trees
~
profile_fn
~
preview
~
use_concat
@
parse_input_data
indir
in
let
dataset_results_l
=
derive_det
~
dataset_l
~
preview
~
no_diffsel
in
let
repo
=
[
let
post_analyses
=
Post_analyses
.
post_analyses_of_dataset_results_l
~
dataset_results_l
in
let
repo_per_tree
=
[
Dataset
.
repo
dataset_l
~
preview
;
repo_of_dataset_results_l
~
dataset_results_l
;
Repo
.
shift
tree_prefix
(
Post_analyses
.
repo_of_post_analyses
~
prefix
:
tree_prefix
~
post_analyses
);
]
|>
List
.
concat
in
repo_per_tree
)
|>
List
.
concat
in
Repo
.
build
~
outdir
~
np
~
mem
:
(
`GB
mem
)
~
logger
repo
let
simulation_command
=
...
...
lib/post_analyses.ml
0 → 100644
View file @
72ca0d70
open
Core
open
Bistro
.
Std
open
Bistro
.
EDSL
open
Bistro_bioinfo
.
Std
open
Bistro_utils
open
File_formats
open
Convergence_detection
type
post_analyses_dir
type
post_analyses
=
{
t_choices_complete
:
text_file
workflow
;
t_choices_max
:
text_file
workflow
;
t_choices_plot
:
text_file
workflow
;
}
let
is_hyp
~
hyp
dataset_results
=
let
model_prefix
=
dataset_results
.
model_prefix
in
let
merged_results
=
dataset_results
.
merged_results
in
model_prefix
=
hyp
let
make_t_choices
~
h0_merged_results
~
ha_merged_results
:
post_analyses_dir
directory
workflow
=
let
env
=
docker_image
~
account
:
"carinerey"
~
name
:
"r"
~
tag
:
"07162018"
()
in
let
out
=
dest
//
"out"
in
workflow
~
descr
:
"post_analyses.t_choices"
[
docker
env
(
and_list
[
mkdir_p
dest
;
cmd
"Rscript"
[
file_dump
(
string
Scripts
.
calc_t_per_meth
)
;
opt
"--H0"
dep
h0_merged_results
;
opt
"--Ha"
dep
ha_merged_results
;
opt
"--out "
ident
out
;
];
])
]
let
post_analyses_of_dataset_results_l
~
dataset_results_l
=
let
h0_res
=
List
.
find
dataset_results_l
(
is_hyp
~
hyp
:
"H0"
)
in
let
ha_res
=
List
.
find
dataset_results_l
(
is_hyp
~
hyp
:
"HaPCOC"
)
in
match
(
h0_res
,
ha_res
)
with
|
(
Some
h0
,
Some
ha
)
->
let
h0_merged_results
=
h0
.
merged_results
in
let
ha_merged_results
=
ha
.
merged_results
in
let
t_choices_dir
=
make_t_choices
~
h0_merged_results
~
ha_merged_results
in
let
t_choices_max
=
t_choices_dir
/
selector
[
"out.max_per_meth.tsv"
]
in
let
t_choices_complete
=
t_choices_dir
/
selector
[
"out.complete.tsv"
]
in
let
t_choices_plot
=
t_choices_dir
/
selector
[
"out.pdf"
]
in
Some
{
t_choices_max
;
t_choices_complete
;
t_choices_plot
}
|
_
->
failwith
{
|
tata
|
}
let
repo_of_post_analyses
~
prefix
~
post_analyses
=
match
post_analyses
with
|
None
->
[]
|
Some
w
->
Repo
.[
item
[
prefix
^
".t_choices.max_mcc_per_meth.tsv"
]
w
.
t_choices_max
;
item
[
prefix
^
".t_choices.complete.tsv"
]
w
.
t_choices_complete
;
item
[
prefix
^
".t_choices.pdf"
]
w
.
t_choices_plot
;
]
|>
Repo
.
shift
"t_choices"
lib/scripts/calc_t_per_meth.R
0 → 100644
View file @
72ca0d70
#!/usr/bin/env Rscript
#https://www.r-bloggers.com/passing-arguments-to-an-r-script-from-command-lines/
library
(
"optparse"
)
library
(
"reshape2"
)
library
(
"ggplot2"
)
library
(
"cowplot"
)
option_list
=
list
(
make_option
(
c
(
"--H0"
),
type
=
"character"
,
default
=
NULL
,
help
=
"merged_results H0"
,
metavar
=
"character"
),
make_option
(
c
(
"--Ha"
),
type
=
"character"
,
default
=
NULL
,
help
=
"merged_results Ha"
,
metavar
=
"character"
),
make_option
(
c
(
"-o"
,
"--out"
),
type
=
"character"
,
default
=
"out"
,
help
=
"output prefix [default= %default]"
,
metavar
=
"character"
)
);
opt_parser
=
OptionParser
(
option_list
=
option_list
);
opt
=
parse_args
(
opt_parser
);
if
(
is.null
(
opt
$
H0
)){
print_help
(
opt_parser
)
stop
(
"At least one argument must be supplied (H0 input file)"
,
call.
=
FALSE
)
}
if
(
is.null
(
opt
$
Ha
)){
print_help
(
opt_parser
)
stop
(
"At least one argument must be supplied (Ha input file)"
,
call.
=
FALSE
)
}
## fun...
calc_TN_FP
=
function
(
vals
,
t
){
TN
=
0
FP
=
0
vals
[
is.na
(
vals
)]
=
0
if
(
length
(
vals
)
>
0
)
{
TN
=
sum
(
vals
<=
t
)
FP
=
sum
(
vals
>
t
)
}
return
(
data.frame
(
TN
=
TN
,
FP
=
FP
))
}
calc_TP_FN
=
function
(
vals
,
t
){
FN
=
0
TP
=
0
vals
=
na.omit
(
vals
)
if
(
length
(
vals
)
>
0
)
{
FN
=
sum
(
vals
<=
t
)
TP
=
sum
(
vals
>
t
)
}
return
(
data.frame
(
TP
=
TP
,
FN
=
FN
))
}
calc_TN_FP_TP_FN
=
function
(
t
,
df_H0_melt
,
df_Ha_melt
){
df_TN_FP
=
do.call
(
rbind.data.frame
,
tapply
(
df_H0_melt
$
value
,
df_H0_melt
$
variable
,
calc_TN_FP
,
t
=
t
))
df_TP_FN
=
do.call
(
rbind.data.frame
,
tapply
(
df_Ha_melt
$
value
,
df_H0_melt
$
variable
,
calc_TP_FN
,
t
=
t
))
df_TN_FP_TP_FN
=
merge
(
df_TN_FP
,
df_TP_FN
,
by
=
0
)
df_TN_FP_TP_FN
$
t
=
t
return
(
df_TN_FP_TP_FN
)
}
## program...
df_H0
=
read.table
(
opt
$
H0
,
header
=
TRUE
,
sep
=
'\t'
)
df_Ha
=
read.table
(
opt
$
Ha
,
header
=
TRUE
,
sep
=
'\t'
)
df_H0
=
df_H0
[,
!
colnames
(
df_H0
)
%in%
c
(
"Indel_prop"
,
"Indel_prop.ConvLeaves."
)]
df_Ha
=
df_Ha
[,
!
colnames
(
df_Ha
)
%in%
c
(
"Indel_prop"
,
"Indel_prop.ConvLeaves."
)]
df_H0_melt
=
melt
(
df_H0
,
id.vars
=
c
(
"Sites"
))
df_Ha_melt
=
melt
(
df_Ha
,
id.vars
=
c
(
"Sites"
))
df
=
do.call
(
rbind.data.frame
,
lapply
(
seq
(
0
,
0.999
,
0.01
),
calc_TN_FP_TP_FN
,
df_H0_melt
=
df_H0_melt
,
df_Ha_melt
=
df_Ha_melt
))
df
$
sens
=
df
$
TP
/
(
df
$
TP
+
df
$
FN
)
df
$
sens
[
is.na
(
df
$
sens
)]
=
0
df
$
spe
=
df
$
TN
/
(
df
$
FP
+
df
$
TN
)
df
$
spe
[
is.na
(
df
$
spe
)]
=
0
n_sites
=
sum
(
df
[
1
,
c
(
"TP"
,
"FN"
)])
p
=
140
/
n_sites
n
=
(
6000
-140
)
/
n_sites
df
$
FP_2
=
df
$
FP
*
n
df
$
TP_2
=
df
$
TP
*
p
df
$
FN_2
=
df
$
FN
*
p
df
$
TN_2
=
df
$
TN
*
n
df
$
mcc
=
(
df
$
TP_2
*
df
$
TN_2
-
df
$
FP_2
*
df
$
FN_2
)
/
sqrt
((
df
$
TP_2
+
df
$
FP_2
)
*
(
df
$
TP_2
+
df
$
FN_2
)
*
(
df
$
TN_2
+
df
$
FP_2
)
*
(
df
$
TN_2
+
df
$
FN_2
))
df
$
mcc
[
is.na
(
df
$
mcc
)]
=
0
df_out
=
df
[,
c
(
"Row.names"
,
"t"
,
"sens"
,
"spe"
,
"mcc"
)]
colnames
(
df_out
)
=
c
(
"methode"
,
"threshold"
,
"sensitivity"
,
"specificity"
,
"MCC"
)
df_out
=
df_out
[
order
(
df_out
$
methode
),]
df_out_melt
=
melt
(
df_out
,
id.vars
=
c
(
"methode"
,
"threshold"
))
df_max_mcc_per_method
=
do.call
(
rbind
,
lapply
(
split
(
df_out
,
df_out
$
methode
),
function
(
x
)
{
return
(
x
[
which.max
(
x
$
MCC
),
c
(
"methode"
,
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
)])}))
print
(
df_max_mcc_per_method
)
df_max_mcc_per_method_2
=
df_max_mcc_per_method
df_max_mcc_per_method_2
$
variable
=
"MCC"
alpha
=
0.7
x_labs
=
"Threshold"
y_labs
=
""
plot
=
ggplot
(
df_out_melt
,
aes
(
x
=
threshold
,
y
=
value
))
+
theme_bw
()
+
guides
(
fill
=
FALSE
)
+
labs
(
x
=
x_labs
,
y
=
y_labs
)
#+ ylim(y_lim)
plot
=
plot
+
geom_point
()
plot
=
plot
+
geom_hline
(
data
=
df_max_mcc_per_method_2
,
aes
(
yintercept
=
MCC
),
col
=
"red"
,
alpha
=
alpha
,
show.legend
=
NA
)
plot
=
plot
+
geom_vline
(
data
=
df_max_mcc_per_method
,
aes
(
xintercept
=
threshold
),
col
=
"red"
,
alpha
=
alpha
,
show.legend
=
NA
,
linetype
=
"dotted"
)
plot
=
plot
+
facet_grid
(
variable
~
methode
,
scales
=
"free"
)
save_plot
(
paste0
(
opt
$
out
,
".pdf"
),
plot
,
ncol
=
0.4
*
length
(
unique
(
df_out_melt
$
methode
)),
nrow
=
1.7
,
base_aspect_ratio
=
1.5
,
limitsize
=
FALSE
)
write.table
(
df_out
,
file
=
paste0
(
opt
$
out
,
".complete.tsv"
),
row.names
=
FALSE
,
quote
=
F
,
sep
=
"\t"
)
write.table
(
df_max_mcc_per_method
,
file
=
paste0
(
opt
$
out
,
".max_per_meth.tsv"
),
row.names
=
FALSE
,
quote
=
F
,
sep
=
"\t"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment