Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
VEBER Philippe
codepi
Commits
0030e37e
Commit
0030e37e
authored
Aug 02, 2018
by
Carine Rey
Browse files
repo refactoring
parent
03815065
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
145 additions
and
97 deletions
+145
-97
lib/dataset.ml
lib/dataset.ml
+7
-4
lib/pipeline.ml
lib/pipeline.ml
+11
-9
lib/post_analyses.ml
lib/post_analyses.ml
+51
-47
lib/profile.ml
lib/profile.ml
+1
-1
lib/ready_dataset.ml
lib/ready_dataset.ml
+0
-1
lib/scripts/calc_t_per_meth.R
lib/scripts/calc_t_per_meth.R
+23
-8
lib/scripts/plot_sens_spe_all_trees.R
lib/scripts/plot_sens_spe_all_trees.R
+49
-24
lib/scripts/plot_trees.R
lib/scripts/plot_trees.R
+3
-3
No files found.
lib/dataset.ml
View file @
0030e37e
...
...
@@ -13,10 +13,13 @@ let repo ~preview dataset_l =
let
model_prefix
=
dataset
.
model_prefix
in
let
tree_prefix
=
dataset
.
tree_prefix
in
let
repo_ready_data
=
Ready_dataset
.
repo
dataset
.
dataset
in
let
repo_raw_data
=
if
preview
then
Raw_dataset
.
repo
~
prefix
:
model_prefix
(
Ready_dataset
.
to_raw
dataset
.
dataset
)
else
[]
in
let
repo_raw_data
=
Raw_dataset
.
repo
~
prefix
:
model_prefix
(
Ready_dataset
.
to_raw
dataset
.
dataset
)
in
List
.
concat
[
Repo
.
shift
"simulated_data"
(
Repo
.
shift
(
tree_prefix
^
"_"
^
model_prefix
)
repo_raw_data
);
Repo
.
shift
"simulated_data_debug"
(
Repo
.
shift
tree_prefix
(
Repo
.
shift
model_prefix
repo_ready_data
));
]
Repo
.
shift
"minimal"
(
Repo
.
shift
(
tree_prefix
^
"_"
^
model_prefix
)
repo_raw_data
);
Repo
.
shift
"debug"
repo_ready_data
;
]
|>
Repo
.
shift
"dataset"
|>
Repo
.
shift
model_prefix
|>
Repo
.
shift
"Results_per_hypothesis"
)
|>
List
.
concat
lib/pipeline.ml
View file @
0030e37e
...
...
@@ -119,10 +119,10 @@ let derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~ns ~no_Ne ~n
);
(
if
ne_test
then
[
H0_BigNeInSmallNe
;
H0_SmallNeInBigNe
;
HaPC_BigNeInSmallNe
;
HaPC_SmallNeInBigNe
;
H0_BigNeInSmallNe
;
H0_SmallNeInBigNe
;
HaPC_BigNeInSmallNe
;
HaPC_SmallNeInBigNe
;
]
else
[]
...
...
@@ -200,7 +200,6 @@ let repo_of_dataset_results_l ~dataset_results_l =
in
repo
|>
Repo
.
shift
dataset_results
.
model_prefix
|>
Repo
.
shift
dataset_results
.
tree_prefix
)
|>
List
.
concat
...
...
@@ -287,9 +286,12 @@ let derive_profile ?(indir = "") ?(ns = 0) ~preview ~fast_mode ~no_Ne ~ne_test ~
let
post_analyses_res
=
Post_analyses
.
post_analyses_res_of_dataset_results_l
~
dataset_results_l
in
let
repo_per_tree
=
[
Dataset
.
repo
dataset_l
~
preview
;
repo_of_dataset_results_l
~
dataset_results_l
;
Repo
.
shift
tree_prefix
(
Post_analyses
.
repo_of_post_analyses_res
~
prefix
:
tree_prefix
~
post_analyses_res
);
]
|>
List
.
concat
Repo
.
shift
"Results_per_hypothesis"
(
repo_of_dataset_results_l
~
dataset_results_l
);
Post_analyses
.
repo_of_post_analyses_res
~
prefix
:
tree_prefix
~
post_analyses_res
;
]
|>
List
.
concat
|>
Repo
.
shift
tree_prefix
|>
Repo
.
shift
"Results_per_tree"
in
(
repo_per_tree
,
post_analyses_res
)
)
...
...
@@ -347,7 +349,7 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe
repo_of_dataset_results_l
~
dataset_results_l
;
]
|>
List
.
concat
in
let
repo
=
sim_repo_l
@
repo_real_trees
in
let
repo
=
(
Repo
.
shift
"Simulated_datasets"
sim_repo_l
)
@
(
Repo
.
shift
"Real_datasets"
repo_real_trees
)
in
Repo
.
build
~
outdir
~
np
~
mem
:
(
`GB
mem
)
~
logger
repo
let
simulation_command
=
...
...
lib/post_analyses.ml
View file @
0030e37e
...
...
@@ -59,37 +59,37 @@ let is_hyp ~hyp (dataset_results :dataset_res) =
let
build_cmd_t_choices
(
opt_name
:
string
)
mr_option
=
match
mr_option
with
|
Some
x
->
[
opt
opt_name
dep
x
]
|
None
->
[]
|
Some
x
->
[
opt
opt_name
dep
x
]
|
None
->
[]
let
make_t_choices
~
h0_mr
~
h0_NeBig_mr
~
h0_NeSmall_mr
~
haPCOC_mr
~
haPC_mr
~
haPC_NeBig_mr
~
haPC_NeSmall_mr
~
h0_NeBigInSmall_mr
~
h0_NeSmallInBig_mr
~
haPC_NeBigInSmall_mr
~
haPC_NeSmallInBig_mr
()
:
post_analyses_dir
directory
workflow
=
let
env
=
r_env
in
let
out
=
dest
//
"out"
in
let
cmd_mr
=
List
.
map
[
(
"--H0"
,
h0_mr
);
(
"--H0NeBig"
,
h0_NeBig_mr
);
(
"--H0NeSmall"
,
h0_NeSmall_mr
);
(
"--H0NeBigInSmall"
,
h0_NeBigInSmall_mr
);
(
"--H0NeSmallInBig"
,
h0_NeSmallInBig_mr
);
(
"--HaPCOC"
,
haPCOC_mr
);
(
"--HaPC"
,
haPC_mr
);
(
"--HaPCNeBig"
,
haPC_NeBig_mr
);
(
"--HaPCNeSmall"
,
haPC_NeSmall_mr
);
(
"--HaPCNeBigInSmall"
,
haPC_NeBigInSmall_mr
);
(
"--HaPCNeSmallInBig"
,
haPC_NeSmallInBig_mr
);
]
~
f
:
(
fun
(
opt_name
,
mr_option
)
->
build_cmd_t_choices
opt_name
mr_option
)
|>
List
.
concat
in
(
"--H0"
,
h0_mr
);
(
"--H0NeBig"
,
h0_NeBig_mr
);
(
"--H0NeSmall"
,
h0_NeSmall_mr
);
(
"--H0NeBigInSmall"
,
h0_NeBigInSmall_mr
);
(
"--H0NeSmallInBig"
,
h0_NeSmallInBig_mr
);
(
"--HaPCOC"
,
haPCOC_mr
);
(
"--HaPC"
,
haPC_mr
);
(
"--HaPCNeBig"
,
haPC_NeBig_mr
);
(
"--HaPCNeSmall"
,
haPC_NeSmall_mr
);
(
"--HaPCNeBigInSmall"
,
haPC_NeBigInSmall_mr
);
(
"--HaPCNeSmallInBig"
,
haPC_NeSmallInBig_mr
);
]
~
f
:
(
fun
(
opt_name
,
mr_option
)
->
build_cmd_t_choices
opt_name
mr_option
)
|>
List
.
concat
in
workflow
~
descr
:
"post_analyses.t_choices"
[
docker
env
(
and_list
[
mkdir_p
dest
;
cmd
"Rscript"
([
[
file_dump
(
string
Scripts
.
calc_t_per_meth
)
;
opt
"--out "
ident
out
;
]
;
cmd_mr
;
]
|>
List
.
concat
)
;
[
file_dump
(
string
Scripts
.
calc_t_per_meth
)
;
opt
"--out "
ident
out
;
]
;
cmd_mr
;
]
|>
List
.
concat
)
;
])
]
...
...
@@ -133,8 +133,8 @@ let group_simu_infos ~simu_infos_l : simu_infos directory workflow =
let
plot_trees
~
reinfered_tree_l
:
plot_trees
directory
workflow
=
let
env
=
r_env
in
let
cmd_cp_l
=
List
.
map
reinfered_tree_l
~
f
:
(
fun
rt
->
[
cmd
"cp"
[
dep
rt
.
reinfered_tree
;
tmp
//
(
rt
.
tree_prefix
^
"@"
^
rt
.
model_prefix
^
".nw"
)];
cmd
"cp"
[
dep
rt
.
input_tree
;
tmp
//
(
rt
.
tree_prefix
^
"@input_tree.nw"
)]
cmd
"cp"
[
dep
rt
.
reinfered_tree
;
tmp
//
(
rt
.
tree_prefix
^
"@"
^
rt
.
model_prefix
^
".nw"
)];
cmd
"cp"
[
dep
rt
.
input_tree
;
tmp
//
(
rt
.
tree_prefix
^
"@input_tree.nw"
)]
])
|>
List
.
concat
in
let
out
=
dest
//
"out"
in
...
...
@@ -169,26 +169,26 @@ type res_all_hyp = {
h0_NeSmallInBig_res
:
dataset_res
option
;
ha_PC_NeBigInSmall_res
:
dataset_res
option
;
ha_PC_NeSmallInBig_res
:
dataset_res
option
;
}
}
let
make_t_choices_per_couple
{
h0_res
;
h0_NeBig_res
;
h0_NeSmall_res
;
ha_PC_res
;
ha_PCOC_res
;
ha_PC_NeBig_res
;
ha_PC_NeSmall_res
;
h0_NeBigInSmall_res
;
h0_NeSmallInBig_res
;
ha_PC_NeBigInSmall_res
;
ha_PC_NeSmallInBig_res
}
=
let
h0_mr
=
get_merged_results_opt
h0_res
in
let
h0_NeBig_mr
=
get_merged_results_opt
h0_NeBig_res
in
let
h0_NeSmall_mr
=
get_merged_results_opt
h0_NeSmall_res
in
let
h0_NeBigInSmall_mr
=
get_merged_results_opt
h0_NeBigInSmall_res
in
let
h0_NeSmallInBig_mr
=
get_merged_results_opt
h0_NeSmallInBig_res
in
let
h0_mr
=
get_merged_results_opt
h0_res
in
let
h0_NeBig_mr
=
get_merged_results_opt
h0_NeBig_res
in
let
h0_NeSmall_mr
=
get_merged_results_opt
h0_NeSmall_res
in
let
h0_NeBigInSmall_mr
=
get_merged_results_opt
h0_NeBigInSmall_res
in
let
h0_NeSmallInBig_mr
=
get_merged_results_opt
h0_NeSmallInBig_res
in
let
haPCOC_mr
=
get_merged_results_opt
ha_PCOC_res
in
let
haPCOC_mr
=
get_merged_results_opt
ha_PCOC_res
in
let
haPC_mr
=
get_merged_results_opt
ha_PC_res
in
let
haPC_NeBig_mr
=
get_merged_results_opt
ha_PC_NeBig_res
in
let
haPC_NeSmall_mr
=
get_merged_results_opt
ha_PC_NeSmall_res
in
let
haPC_NeBigInSmall_mr
=
get_merged_results_opt
ha_PC_NeBigInSmall_res
in
let
haPC_NeSmallInBig_mr
=
get_merged_results_opt
ha_PC_NeSmallInBig_res
in
let
haPC_mr
=
get_merged_results_opt
ha_PC_res
in
let
haPC_NeBig_mr
=
get_merged_results_opt
ha_PC_NeBig_res
in
let
haPC_NeSmall_mr
=
get_merged_results_opt
ha_PC_NeSmall_res
in
let
haPC_NeBigInSmall_mr
=
get_merged_results_opt
ha_PC_NeBigInSmall_res
in
let
haPC_NeSmallInBig_mr
=
get_merged_results_opt
ha_PC_NeSmallInBig_res
in
make_t_choices
~
h0_mr
~
h0_NeBig_mr
~
h0_NeSmall_mr
~
haPCOC_mr
~
haPC_mr
~
haPC_NeBig_mr
~
haPC_NeSmall_mr
~
h0_NeBigInSmall_mr
make_t_choices
~
h0_mr
~
h0_NeBig_mr
~
h0_NeSmall_mr
~
haPCOC_mr
~
haPC_mr
~
haPC_NeBig_mr
~
haPC_NeSmall_mr
~
h0_NeBigInSmall_mr
~
h0_NeSmallInBig_mr
~
haPC_NeBigInSmall_mr
~
haPC_NeSmallInBig_mr
()
...
...
@@ -286,7 +286,7 @@ let plot_sens_spe_t_choices ~t_choices_l ~dataset_results_l ~profile_prefix : se
let
merged_results_dir
=
tmp
//
"merged_results_dir"
in
let
out
=
dest
//
"out"
in
let
cmd_cp_t_choices_l
=
List
.
map
t_choices_l
~
f
:
(
fun
t_choices
->
cmd
"cp"
[
dep
t_choices
.
t_choices_
max
;
t_choices_dir
//
(
t_choices
.
tree_prefix
^
".tsv"
)]
cmd
"cp"
[
dep
t_choices
.
t_choices_
recall09
;
t_choices_dir
//
(
t_choices
.
tree_prefix
^
".tsv"
)]
)
in
let
cmd_cp_merged_results_l
=
List
.
map
dataset_results_l
~
f
:
(
fun
dataset_results
->
...
...
@@ -324,20 +324,22 @@ let repo_post_analyses_all_trees_of_all_post_analyses_per_tree ~profile_prefix ~
post_analyses_res
.
dataset_results_l
)
|>
List
.
concat
in
let
sens_spe_t_choices_plot
=
plot_sens_spe_t_choices
~
t_choices_l
~
dataset_results_l
~
profile_prefix
in
Repo
.[
item
[
"sens_spe.pdf"
]
(
sens_spe_t_choices_plot
/
selector
[
"out.sens_spe_auto_t.pdf"
]);
item
[
"all_t_choices.pdf"
]
(
sens_spe_t_choices_plot
/
selector
[
"out.max_t_per_tree.pdf"
]);
item
[
"sens_spe.tsv"
]
(
sens_spe_t_choices_plot
/
selector
[
"out.sens_spe_auto_t.tsv"
]);
item
[
"all_t_choices.tsv"
]
(
sens_spe_t_choices_plot
/
selector
[
"out.max_t_per_tree.tsv"
]);
]
[
Repo
.[
item
[
"sens_spe.tsv"
]
(
sens_spe_t_choices_plot
/
selector
[
"out.sens_spe_auto_t.tsv"
]);
item
[
"all_t_choices.tsv"
]
(
sens_spe_t_choices_plot
/
selector
[
"out.t_per_tree.tsv"
]);
]
|>
Repo
.
shift
"pdf_tsv"
;
Repo
.[
item
[
"sens_spe.pdf"
]
(
sens_spe_t_choices_plot
/
selector
[
"out.sens_spe_auto_t.pdf"
]);
item
[
"all_t_choices.pdf"
]
(
sens_spe_t_choices_plot
/
selector
[
"out.t_per_tree.pdf"
]);
]
]
|>
List
.
concat
let
repo_of_post_analyses_simu
~
post_analyses_simu
=
[
Repo
.[
item
[
"hypothesis_validation.pdf"
]
post_analyses_simu
.
simu_infos_plot
;
item
[
"trees_validation.pdf"
]
post_analyses_simu
.
trees_plot
;
]
|>
Repo
.
shift
"simu_infos"
]
;
(
List
.
map
post_analyses_simu
.
simu_infos_l
~
f
:
(
fun
simu_infos
->
match
simu_infos
.
simu_infos
with
...
...
@@ -345,10 +347,12 @@ let repo_of_post_analyses_simu ~post_analyses_simu =
|
Some
w
->
Repo
.[
item
[
simu_infos
.
tree_prefix
^
"@"
^
simu_infos
.
model_prefix
^
".tsv"
]
w
]
|>
Repo
.
shift
"simu_infos"
]
|>
Repo
.
shift
simu_infos
.
tree_prefix
|>
Repo
.
shift
"tsv"
)
|>
List
.
concat
);
]
|>
List
.
concat
|>
Repo
.
shift
"Simulation_details"
let
repo_of_post_analyses_res
~
prefix
~
post_analyses_res
=
[
...
...
@@ -361,7 +365,7 @@ let repo_of_post_analyses_res ~prefix ~post_analyses_res =
item
[
prefix
^
".t_choices.complete.tsv"
]
w
.
t_choices_complete
;
item
[
prefix
^
".t_choices.pdf"
]
w
.
t_choices_plot
;
item
[
prefix
^
".t_choices.condensed.pdf"
]
w
.
t_choices_condensed_plot
;
]
|>
Repo
.
shift
"t_choices"
]
);
(*(
match post_analyses_res.auto_t_plot_l with
...
...
@@ -374,6 +378,6 @@ let repo_of_post_analyses_res ~prefix ~post_analyses_res =
]
)|> List.concat
|> Repo.shift "auto_t_pdf"
);*)
);*)
]
|>
List
.
concat
lib/profile.ml
View file @
0030e37e
...
...
@@ -50,7 +50,7 @@ let cat_file ~(f_l: text_file workflow list) : text_file workflow =
let
profile_l_of_splitted_profile
~
nb_cat
~
nb_sites
profile_fn
=
let
profile_f
=
input
profile_fn
in
let
prefix
=
Filename
.
chop_extension
profile_fn
in
let
prefix
=
Filename
.
chop_extension
(
Filename
.
basename
profile_fn
)
in
let
dist_bins
=
match
nb_cat
with
|
3
->
"[0.01,0.4],[0.4,0.6],[0.6,2]"
|
1
->
"[0.01,2]"
...
...
lib/ready_dataset.ml
View file @
0030e37e
...
...
@@ -45,7 +45,6 @@ let repo rd =
;
]
|>
List
.
concat
|>
Repo
.
shift
"ready_dataset"
let
to_raw
{
input_tree
;
fna
;
fna_infos
}
=
{
Raw_dataset
.
input_tree
;
fna
;
fna_infos
}
...
...
lib/scripts/calc_t_per_meth.R
View file @
0030e37e
...
...
@@ -8,6 +8,8 @@ library("reshape2")
library
(
"ggplot2"
)
library
(
"cowplot"
)
date
=
format
(
Sys.time
(),
format
=
"%Y-%m-%d %X"
)
option_list
=
list
(
make_option
(
c
(
"--H0"
)
,
type
=
"character"
,
default
=
NULL
,
help
=
"merged_results H0"
,
metavar
=
"character"
),
make_option
(
c
(
"--H0NeBig"
)
,
type
=
"character"
,
default
=
NULL
,
help
=
"merged_results H0NeBig"
,
metavar
=
"character"
),
...
...
@@ -89,9 +91,10 @@ df_d_H0HaPC_NeSmall = build_df_dist_couple(df_H0NeSmall_melt, df_HaPCNeSmall_mel
df_d_H0HaPC_NeBigInSmall
=
build_df_dist_couple
(
df_H0NeBigInSmall_melt
,
df_HaPCNeBigInSmall_melt
,
"H0/HaPC NeBigInSmall"
)
df_d_H0HaPC_NeSmallInBig
=
build_df_dist_couple
(
df_H0NeSmallInBig_melt
,
df_HaPCNeSmallInBig_melt
,
"H0/HaPC NeSmallInBig"
)
df_d_H0HaPCOC
=
build_df_dist_couple
(
df_H0_melt
,
df_HaPCOC_melt
,
"H0/HaPCOC"
)
df_d_H0NeBigHaPCOC
=
build_df_dist_couple
(
df_H0NeBig_melt
,
df_HaPCOC_melt
,
"H0NeBig/HaPCOC"
)
df_d
=
rbind.data.frame
(
df_d_H0HaPC
,
df_d_H0HaPCOC
,
df_d_H0HaPC_NeBig
,
df_d_H0HaPC_NeSmall
,
df_d_H0HaPC_NeBigInSmall
,
df_d_H0HaPC_NeSmallInBig
)
df_d_H0HaPC_NeBigInSmall
,
df_d_H0HaPC_NeSmallInBig
,
df_d_H0NeBigHaPCOC
)
df_d
=
df_d
[
order
(
df_d
$
methode
),]
...
...
@@ -152,9 +155,11 @@ df_H0HaPC_NeSmall = build_df_couple(df_H0NeSmall_melt, df_HaPCNeSmall_melt, "H0/
df_H0HaPC_NeBigInSmall
=
build_df_couple
(
df_H0NeBigInSmall_melt
,
df_HaPCNeBigInSmall_melt
,
"H0/HaPC NeBigInSmall"
)
df_H0HaPC_NeSmallInBig
=
build_df_couple
(
df_H0NeSmallInBig_melt
,
df_HaPCNeSmallInBig_melt
,
"H0/HaPC NeSmallInBig"
)
df_H0HaPCOC
=
build_df_couple
(
df_H0_melt
,
df_HaPCOC_melt
,
"H0/HaPCOC"
)
df_H0NeBigHaPCOC
=
build_df_couple
(
df_H0NeBig_melt
,
df_HaPCOC_melt
,
"H0NeBig/HaPCOC"
)
df
=
rbind.data.frame
(
df_H0HaPC
,
df_H0HaPCOC
,
df_H0HaPC_NeBig
,
df_H0HaPC_NeSmall
,
df_H0HaPC_NeBigInSmall
,
df_H0HaPC_NeSmallInBig
)
df_H0HaPC_NeBigInSmall
,
df_H0HaPC_NeSmallInBig
,
df_H0NeBigHaPCOC
)
print
(
head
(
df
))
print
(
tail
(
df
))
...
...
@@ -200,7 +205,9 @@ print(summary(df_out))
print
(
"prep plot max mcc"
)
df_max_mcc_per_method
=
do.call
(
rbind
,
lapply
(
split
(
df_out
,
paste0
(
df_out
$
methode
,
df_out
$
couple
)),
function
(
x
)
{
return
(
x
[
which.max
(
x
$
MCC
),
c
(
"couple"
,
"methode"
,
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
,
"precision"
)])}))
function
(
x
)
{
return
(
x
[
which.max
(
x
$
MCC
),
c
(
"couple"
,
"methode"
,
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
,
"precision"
)])
}))
print
(
df_max_mcc_per_method
)
...
...
@@ -211,17 +218,24 @@ df_max_mcc_per_method_2$variable="MCC"
########################################################################
print
(
"prep plot recall_precision_per_meth"
)
df_recall_sup09_per_meth
=
do.call
(
rbind
,
lapply
(
split
(
df_out
,
paste0
(
df_out
$
methode
,
df_out
$
couple
)),
print
(
df_out
[
is.na
(
df_out
$
methode
),])
print
(
table
(
df_out
$
methode
))
print
(
table
(
df_out
$
methode
,
df_out
$
couple
))
df_recall_sup09_per_meth
=
do.call
(
rbind
,
lapply
(
split
(
df_out
,
paste0
(
df_out
$
methode
,
" "
,
df_out
$
couple
)),
function
(
x
)
{
x
$
precision
[
is.na
(
x
$
precision
)]
=
0
x2
=
x
[
x
$
precision
>
0.9
,]
if
(
nrow
(
x2
)
>
0
)
{
re
turn
(
x2
[
which.max
(
x2
$
sensitivity
),
c
(
"couple"
,
"methode"
,
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
,
"precision"
)]
)
re
s
=
x2
[
which.max
(
x2
$
sensitivity
),
c
(
"couple"
,
"methode"
,
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
,
"precision"
)]
}
else
{
x
=
x
[
1
,
c
(
"couple"
,
"methode"
,
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
,
"precision"
)]
print
(
x
)
x
[,
c
(
"threshold"
,
"MCC"
,
"sensitivity"
,
"specificity"
,
"precision"
)]
=
NA
re
turn
(
x
)
re
s
=
x
}
return
(
res
)
}))
print
(
df_recall_sup09_per_meth
)
...
...
@@ -276,6 +290,7 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
plot
=
plot
+
geom_hline
(
aes
(
yintercept
=
0.9
),
col
=
"black"
,
size
=
1
,
show.legend
=
NA
,
linetype
=
"dashed"
)
plot
=
plot
+
facet_grid
(
couple
~
methode
)
plot
=
plot
+
theme
(
axis.text.x
=
element_text
(
angle
=
45
,
hjust
=
1
))
plot
=
plot
+
ggtitle
(
paste0
(
"Threshold definition such as precision > 0.9 - ("
,
date
,
")"
))
plot_recall_precision
=
plot
...
...
@@ -305,7 +320,7 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
plot_max_MCC
=
plot
save_plot
(
paste0
(
opt
$
out
,
suffix
,
".
max_MCC
_per_meth.pdf"
),
save_plot
(
paste0
(
opt
$
out
,
suffix
,
".
indicator
_per_meth.pdf"
),
plot_max_MCC
,
ncol
=
0.4
*
length
(
unique
(
df_out_melt
$
methode
)),
nrow
=
1.7
,
...
...
lib/scripts/plot_sens_spe_all_trees.R
View file @
0030e37e
...
...
@@ -40,19 +40,19 @@ files = files[grep("tsv", files)]
files_split
=
strsplit
(
files
,
"."
,
fixed
=
T
)
files_df
=
as.data.frame
(
do.call
(
rbind
,
files_split
))
print
(
files_df
)
files_df_ok
=
data.frame
(
files
=
paste0
(
input_dir
,
"/"
,
files
),
tree
=
gsub
(
".tsv"
,
""
,
files
),
bl
=
"NA"
,
profil
=
opt
$
profil
)
files_df_ok
=
data.frame
(
files
=
paste0
(
input_dir
,
"/"
,
files
),
tree
=
gsub
(
".tsv"
,
""
,
files
),
profil
=
opt
$
profil
)
#files_df_ok = data.frame(files= paste0(input_dir,"/",files), tree = files_df$V1, bl = files_df$V1, profil = opt$profil)
condensed_meths
=
c
(
"PCOC"
,
"diffsel"
,
"Identical_LG08"
,
"Mutinomial_1MinusLRT"
,
"Tdg09_1MinusFDR"
,
"Msd_1MinusP"
,
"Topological_LG08"
)
read_dir
=
function
(
x
)
{
file
=
x
[
"files"
]
tree
=
x
[
"tree"
]
profil
=
x
[
"profil"
]
bl
=
x
[
"bl"
]
df
=
read.csv
(
file
,
sep
=
"\t"
,
header
=
T
)
df
$
tree
=
tree
df
$
profil
=
profil
df
$
bl
=
bl
df
=
df
[
df
$
methode
%in%
condensed_meths
,
]
return
(
df
)
}
...
...
@@ -62,41 +62,56 @@ alpha = 0.7
x_labs
=
""
y_labs
=
"Threshold"
print
(
head
(
df
))
df_max_mcc_per_method
=
do.call
(
rbind
,
lapply
(
split
(
df
,
df
$
methode
),
function
(
x
)
{
return
(
x
[
which.max
(
x
$
threshold
),
c
(
"methode"
,
"threshold"
,
"tree"
,
"profil"
,
"couple"
)])}))
df_tmp
=
subset
(
df
,
couple
==
"H0/HaPCOC"
)
df_t_per_method
=
do.call
(
rbind
,
lapply
(
split
(
df_tmp
,
paste0
(
df_tmp
$
methode
,
df_tmp
$
tree
)),
function
(
x
)
{
return
(
x
[
which.max
(
x
$
threshold
),
c
(
"methode"
,
"threshold"
,
"tree"
,
"profil"
,
"couple"
)])}))
df_
max_mcc
_per_method_2
=
df_
max_mcc
_per_method
df_
max_mcc
_per_method_2
[
df_
max_mcc
_per_method_2
$
tree
==
unique
(
df_
max_mcc
_per_method_2
$
tree
)[
1
],]
df_
t
_per_method_2
=
df_
t
_per_method
df_
t
_per_method_2
[
df_
t
_per_method_2
$
tree
==
unique
(
df_
t
_per_method_2
$
tree
)[
1
],]
df2
=
df
df2
$
retained_t
=
"no"
df2
$
retained_t
[
df2
$
couple
==
"H0/HaPCOC"
]
=
"yes"
plot
=
ggplot
(
df
,
aes
(
x
=
tree
,
y
=
threshold
,
col
=
couple
,
shape
=
profil
))
+
theme_bw
()
+
labs
(
x
=
x_labs
,
y
=
y_labs
)
+
ylim
(
c
(
0
,
1
.5
))
plot
=
ggplot
(
df
2
,
aes
(
x
=
tree
,
y
=
threshold
,
col
=
couple
,
shape
=
retained_t
))
+
theme_bw
()
+
labs
(
x
=
x_labs
,
y
=
y_labs
)
+
ylim
(
c
(
0
,
1
))
plot
=
plot
+
theme
(
axis.text.x
=
element_text
(
angle
=
0
,
hjust
=
1
))
plot
=
plot
+
geom_point
()
plot
=
plot
+
geom_hline
(
data
=
df_max_mcc_per_method
,
aes
(
yintercept
=
threshold
),
col
=
"red"
,
alpha
=
alpha
,
show.legend
=
NA
,
linetype
=
"dotted"
)
plot
=
plot
+
geom_label
(
data
=
df_max_mcc_per_method_2
,
nudge_y
=
0.25
,
aes
(
label
=
threshold
),
col
=
"red"
,
alpha
=
alpha
,
show.legend
=
NA
)
plot
=
plot
+
geom_point
(
position
=
position_dodge
(
.5
),
size
=
3
)
plot
=
plot
+
theme
(
legend.position
=
"top"
)
#plot = plot + guides(shape=FALSE)
plot
=
plot
+
scale_shape_manual
(
values
=
c
(
19
,
17
))
plot
=
plot
+
guides
(
col
=
guide_legend
(
ncol
=
2
))
plot
=
plot
+
guides
(
shape
=
guide_legend
(
nrow
=
2
))
#plot = plot + geom_point( data = df_t_per_method, aes(x=tree, y=threshold, group = couple), size = 5, shape = 1, col="red", alpha=alpha, position= position_dodge(.5))
#plot = plot + geom_label( data = df_t_per_method_2, nudge_y = 0.25, aes(label = threshold), col="red", alpha=alpha, show.legend = NA)
plot
=
plot
+
facet_grid
(
methode
~
.
)
+
coord_flip
()
plot
=
plot
+
ggtitle
(
"
t max per tree
"
)
plot
=
plot
+
ggtitle
(
"
Threshold definition such as\nprecision > 0.9 per H0*/Ha* couple
"
)
output_pdf
=
paste0
(
opt
$
out
,
".
max_
t_per_tree.pdf"
)
output_pdf
=
paste0
(
opt
$
out
,
".t_per_tree.pdf"
)
save_plot
(
output_pdf
,
plot
,
ncol
=
1
,
nrow
=
0.
3
*
length
(
unique
(
df
$
methode
)),
nrow
=
0.
5
*
length
(
unique
(
df
$
methode
)),
base_aspect_ratio
=
1.5
,
limitsize
=
FALSE
)
output_tsv
=
paste0
(
opt
$
out
,
".
max_
t_per_tree.tsv"
)
output_tsv
=
paste0
(
opt
$
out
,
".t_per_tree.tsv"
)
write.table
(
df
,
file
=
output_tsv
,
row.names
=
FALSE
,
quote
=
F
,
sep
=
"\t"
)
####################
# Spe en sens
####################
df_t
=
df
df_t
=
df_tmp
print
(
"df_t"
)
print
(
df_t
)
parse_file
=
function
(
df_m
,
df_t
)
{
df_m_melt
=
melt
(
df_m
)
...
...
@@ -104,10 +119,10 @@ parse_file = function(df_m, df_t) {
df_m_melt
$
t
=
as.numeric
(
as.character
(
df_m_melt
$
t
))
df_m_melt
$
is_P
=
df_m_melt
$
value
>
df_m_melt
$
t
P
=
tapply
(
df_m_melt
$
is_P
,
df_m_melt
$
variable
,
function
(
x
)
{
sum
(
x
,
na.rm
=
T
)})
P
[
df_m_melt
$
variable
[
is.na
(
df_m_melt
$
t
)]]
=
NA
n_sites
=
dim
(
df_m
)[
1
]
df
=
data.frame
(
methode
=
names
(
P
),
P
=
P
)
df
=
merge
(
df
,
df_t
,
by
=
"methode"
)
df
$
P
=
df
$
P
/
n_sites
return
(
df
)
}
...
...
@@ -119,27 +134,29 @@ files = files[grep("tsv", files)]
files_split
=
strsplit
(
files
,
"@"
,
fixed
=
T
)
files_df
=
as.data.frame
(
do.call
(
rbind
,
files_split
))
print
(
files_df
)
files_df_ok
=
data.frame
(
files
=
paste0
(
input_dir2
,
"/"
,
files
),
tree
=
files_df
$
V1
,
hyp
=
gsub
(
".tsv"
,
""
,
files_df
$
V2
),
bl
=
files_df
$
V1
,
profil
=
opt
$
profil
)
files_df_ok
=
data.frame
(
files
=
paste0
(
input_dir2
,
"/"
,
files
),
tree
=
files_df
$
V1
,
hyp
=
gsub
(
".tsv"
,
""
,
files_df
$
V2
),
profil
=
opt
$
profil
)
read_dir
=
function
(
x
)
{
file
=
x
[
"files"
]
tree
=
x
[
"tree"
]
profil
=
x
[
"profil"
]
bl
=
x
[
"bl"
]
hyp
=
x
[
"hyp"
]
df_m
=
read.csv
(
file
,
sep
=
"\t"
,
header
=
T
,
na.strings
=
c
(
"NA"
,
"NaN"
,
" "
,
""
))
print
(
head
(
df_m
))
df
=
parse_file
(
df_m
[,
colnames
(
df_m
)
%in%
df_t
$
methode
],
df_t
[
df_t
$
tree
==
tree
,
c
(
"methode"
,
"threshold"
)])
df
$
tree
=
tree
df
$
profil
=
profil
df
$
bl
=
bl
df
$
hyp
=
hyp
df
=
df
[
df
$
methode
%in%
condensed_meths
,
]
return
(
df
)
}
df
=
do.call
(
rbind
,
apply
(
files_df_ok
,
1
,
read_dir
))
rownames
(
df
)
<-
NULL
print
(
df
)
hyps_from
=
unique
(
df
$
hyp
)
hyps_to
=
rep
(
"Specificity"
,
length
(
hyps_from
))
hyps_to
[
grep
(
"Ha"
,
hyps_from
)]
=
"Sensitivity"
...
...
@@ -153,7 +170,11 @@ x_labs = ""
y_labs
=
"Value"
plot
=
ggplot
(
df
,
aes
(
x
=
methode
,
y
=
P
,
fill
=
hyp
))
+
theme_bw
()
+
labs
(
x
=
x_labs
,
y
=
y_labs
)
+
ylim
(
c
(
0
,
1.1
))
print
(
df
)
plot_tree
=
function
(
tree
)
{
df_tmp
=
df
[
df
$
tree
==
tree
,
]
plot
=
ggplot
(
df_tmp
,
aes
(
x
=
methode
,
y
=
P
,
fill
=
hyp
))
+
theme_bw
()
+
labs
(
x
=
x_labs
,
y
=
y_labs
)
+
ylim
(
c
(
0
,
1.1
))
plot
=
plot
+
theme
(
axis.text.x
=
element_text
(
angle
=
30
,
hjust
=
1
))
plot
=
plot
+
geom_bar
(
stat
=
"identity"
,
position
=
position_dodge
(
0.9
),
color
=
"black"
,
alpha
=
0.7
)
plot
=
plot
+
geom_text
(
aes
(
y
=
P
+
0.05
,
label
=
round
(
P
,
2
)),
angle
=
90
,
hjust
=
0
,
position
=
position_dodge
(
0.9
))
...
...
@@ -162,12 +183,16 @@ plot = plot + facet_grid(variable ~ tree)
plot
=
plot
+
ggtitle
(
paste0
(
"Compa methodes - "
,
opt
$
profil
,
" - ("
,
date
,
")"
))
plot
=
plot
+
scale_color_manual
(
values
=
c
(
'gray'
,
'black'
),
guide
=
FALSE
)
plot
=
plot
+
scale_alpha
(
range
=
c
(
0.7
,
1
),
guide
=
FALSE
)
}
plot_all
<-
plot_grid
(
plotlist
=
lapply
(
unique
(
df
$
tree
),
plot_tree
),
labels
=
"AUTO"
,
nrow
=
length
(
unique
(
df
$
tree
)))
output_pdf2
=
paste0
(
opt
$
out
,
".sens_spe_auto_t.pdf"
)
save_plot
(
output_pdf2
,
plot
,
ncol
=
length
(
unique
(
df
$
hyp
))
/
3
*
length
(
unique
(
df
$
tree
))
,
nrow
=
2
,
plot
_all
,
ncol
=
length
(
unique
(
df
$
hyp
))
*
length
(
unique
(
df
$
methode
))
/
30
,
nrow
=
2
*
length
(
unique
(
df
$
tree
))
,
base_aspect_ratio
=
1.5
,
limitsize
=
FALSE
)
...
...
lib/scripts/plot_trees.R
View file @
0030e37e
...
...
@@ -78,13 +78,13 @@ plotlist = lapply(split(files_df_ok,paste0(files_df_ok$tree_prefix)), plot_tree_
plot_all
<-
plot_grid
(
plotlist
=
plotlist
,
labels
=
NA
,
n
col
=
length
(
unique
(
files_df_ok
$
tree_prefix
)))
labels
=
NA
,
n
row
=
length
(
unique
(
files_df_ok
$
tree_prefix
)))
output_pdf
=
paste0
(
opt
$
out
,
".pdf"
)
save_plot
(
output_pdf
,
plot_all
,
ncol
=
length
(
unique
(
files_df_ok
$
tree_prefix
))
,
nrow
=
5
,
ncol
=
2
,
nrow
=
10
*
length
(
unique
(
files_df_ok
$
tree_prefix
))
,
base_aspect_ratio
=
2
,
limitsize
=
FALSE
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment