Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
725d12e7
Commit
725d12e7
authored
Jul 19, 2018
by
Carine Rey
Browse files
calculate and plot an auto threshold per detection methode for each input tree
parent
2b59323d
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
101 additions
and
25 deletions
+101
-25
lib/convergence_detection.ml
lib/convergence_detection.ml
+9
-4
lib/convergence_detection.mli
lib/convergence_detection.mli
+2
-0
lib/pipeline.ml
lib/pipeline.ml
+21
-9
lib/post_analyses.ml
lib/post_analyses.ml
+58
-12
lib/scripts/plot_convergent_sites.py
lib/scripts/plot_convergent_sites.py
+11
-0
No files found.
lib/convergence_detection.ml
View file @
725d12e7
...
...
@@ -77,7 +77,7 @@ let merge_results ~res_by_tools : text_file workflow =
]
;
]
let
plot_merge_results
~
plot_all_sites
~
(
res_by_tools
:
result
list
)
~
tree
~
faa
~
tsv
:
svg
workflow
=
let
plot_merge_results
?
t_choices
~
plot_all_sites
~
(
res_by_tools
:
result
list
)
~
tree
~
faa
~
tsv
()
:
svg
workflow
=
let
env
=
docker_image
~
account
:
"carinerey"
~
name
:
"pcoc"
~
tag
:
"06212018"
()
in
(* use of pcoc env due to its working X server for dra plot with ete3 *)
let
meths
=
List
.
map
res_by_tools
~
f
:
(
fun
res
->
...
...
@@ -96,7 +96,7 @@ let plot_merge_results ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~t
string
opt
)
|>
seq
~
sep
:
","
in
let
meths
_t
=
List
.
map
res_by_tools
~
f
:
(
fun
res
->
let
default
_t
=
List
.
map
res_by_tools
~
f
:
(
fun
res
->
let
opt
=
match
res
with
|
`Pcoc
_
->
"PCOC:0.99,PC:0.99,OC:0.99"
|
`Pcoc_gamma
_
->
"PCOC_gamma:0.99,PC_gamma:0.99,OC_gamma:0.99"
...
...
@@ -112,6 +112,10 @@ let plot_merge_results ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~t
string
opt
)
|>
seq
~
sep
:
","
in
let
meths_t
=
match
t_choices
with
|
Some
_
->
None
|
None
->
Some
default_t
in
let
package_diffsel_script_utils
=
tmp
//
"diffsel_script_utils.py"
in
let
package_plot_data
=
tmp
//
"plot_data.py"
in
let
script_plot_convergent_sites
=
tmp
//
"plot_convergent_sites.py"
in
...
...
@@ -134,9 +138,10 @@ let plot_merge_results ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~t
opt
"-tree"
dep
tree
;
opt
"-out"
ident
out
;
opt
"-meth"
ident
meths
;
opt
"-t"
ident
meths_t
;
option
(
opt
"-t"
ident
)
meths_t
;
option
(
opt
"--t_tsv"
dep
)
t_choices
;
flag
string
"--all_sites"
plot_all_sites
;
]
]
)
]
]
/
selector
[
"results.svg"
]
lib/convergence_detection.mli
View file @
725d12e7
...
...
@@ -34,9 +34,11 @@ val merge_results :
text_file
workflow
val
plot_merge_results
:
?
t_choices
:
text_file
workflow
->
plot_all_sites
:
bool
->
res_by_tools
:
result
list
->
tree
:
nhx
workflow
->
faa
:
aminoacid_fasta
workflow
->
tsv
:
text_file
workflow
->
unit
->
svg
workflow
lib/pipeline.ml
View file @
725d12e7
...
...
@@ -167,7 +167,7 @@ let repo_of_dataset_results_l ~dataset_results_l =
let
model_prefix
=
dataset_results
.
model_prefix
in
let
tree_prefix
=
dataset_results
.
tree_prefix
in
let
merged_results_item
=
Repo
.
item
[
tree_prefix
^
"."
^
model_prefix
^
".merged_results.tsv"
]
merged_results
in
let
plot_merged_results_item
=
Repo
.
item
[
tree_prefix
^
"."
^
model_prefix
^
".plot_merged_results"
]
plot_merge_results
in
let
plot_merged_results_item
=
Repo
.
item
[
tree_prefix
^
"."
^
model_prefix
^
".plot_merged_results
.svg
"
]
plot_merge_results
in
let
repo
=
merged_results_item
::
plot_merged_results_item
::
...
...
@@ -231,7 +231,7 @@ let derive_from_dataset ~dataset ~preview ~fast_mode=
let
faa
=
dataset
.
dataset
.
faa
in
let
tree
=
Tree_dataset
.
tree
dataset
.
dataset
.
tree_dataset
`Detection
in
let
plot_all_sites
=
if
dataset
.
is_real
then
false
else
true
in
let
plot_merged_results
=
plot_merge_results
~
plot_all_sites
~
res_by_tools
~
tsv
~
faa
~
tree
in
let
plot_merged_results
=
plot_merge_results
~
plot_all_sites
~
res_by_tools
~
tsv
~
faa
~
tree
()
in
let
model_prefix
=
dataset
.
model_prefix
in
let
tree_prefix
=
dataset
.
tree_prefix
in
{
model_prefix
;
tree_prefix
;
dataset
;
res_by_tools
;
merged_results
;
plot_merged_results
}
...
...
@@ -260,17 +260,16 @@ let simulation_main ~outdir ?(ns = 0) ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn
Repo
.
build
~
outdir
~
np
~
mem
:
(
`GB
mem
)
~
logger
repo
let
validation_main
~
outdir
?
(
indir
=
""
)
?
(
ns
=
0
)
?
(
np
=
2
)
?
(
mem
=
2
)
~
preview
~
fast_mode
~
no_Ne
~
no_HaPC
~
tree_dir
~
profile_fn
~
use_concat
~
only_simu
()
=
(* simulated trees *)
let
trees
=
Array
.
to_list
@@
Sys
.
readdir
tree_dir
in
let
simu_dataset_l
=
derive_sim
~
tree_dir
~
trees
~
profile_fn
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
in
let
post_analyses_simu
=
Post_analyses
.
post_analyses_simu_of_simu_dataset_l
~
simu_dataset_l
in
let
repo_of_post_analyses_simu
=
Post_analyses
.
repo_of_post_analyses_simu
~
post_analyses_simu
in
let
repo_per_tree
=
List
.
map
trees
~
f
:
(
fun
tree
->
let
repo_per_tree
_simu
=
List
.
map
trees
~
f
:
(
fun
tree
->
let
trees
=
[
tree
]
in
let
tree_prefix
=
Filename
.
chop_extension
tree
in
let
indir_dataset_l
=
if
indir
=
""
then
[]
else
parse_input_data
indir
in
let
dataset_l
=
derive_sim
~
tree_dir
~
trees
~
profile_fn
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
@
indir_dataset_l
in
derive_sim
~
tree_dir
~
trees
~
profile_fn
~
preview
~
use_concat
~
ns
~
no_Ne
~
no_HaPC
in
let
dataset_results_l
=
if
only_simu
then
[]
...
...
@@ -285,10 +284,23 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe
]
|>
List
.
concat
in
repo_per_tree
)
|>
List
.
concat
)
|>
List
.
concat
in
(* real trees *)
let
indir_dataset_l
=
if
indir
=
""
then
[]
else
parse_input_data
indir
in
let
dataset_l
=
indir_dataset_l
in
let
dataset_results_l
=
if
only_simu
then
[]
else
derive_det
~
dataset_l
~
preview
~
fast_mode
in
let
repo_real_trees
=
[
Dataset
.
repo
dataset_l
~
preview
;
repo_of_dataset_results_l
~
dataset_results_l
;
]
|>
List
.
concat
in
let
repo
=
repo_of_post_analyses_simu
@
repo_per_tree
in
let
repo
=
repo_of_post_analyses_simu
@
repo_real_trees
@
repo_per_tree
_simu
in
Repo
.
build
~
outdir
~
np
~
mem
:
(
`GB
mem
)
~
logger
repo
let
simulation_command
=
...
...
lib/post_analyses.ml
View file @
725d12e7
...
...
@@ -14,15 +14,24 @@ type t_choices = {
t_choices_plot
:
text_file
workflow
;
}
type
auto_t_plot
=
{
tree_prefix
:
string
;
model_prefix
:
string
;
auto_t_plot
:
svg
workflow
;
}
type
post_analyses_res
=
{
t_choices
:
t_choices
option
;
auto_t_plot_l
:
auto_t_plot
list
option
;
}
type
simu_infos
=
{
simu_infos
:
text_file
workflow
option
;
model_prefix
:
string
;
tree_prefix
:
string
;
}
type
post_analyses_res
=
{
t_choices
:
t_choices
option
;
}
type
post_analyses_simu
=
{
simu_infos_l
:
simu_infos
list
;
simu_infos_plot
:
text_file
workflow
;
...
...
@@ -100,6 +109,27 @@ let get_t_choices ~(dataset_results_l: dataset_res list) : t_choices option =
Some
{
t_choices_max
;
t_choices_complete
;
t_choices_plot
}
|
_
->
None
let
plot_det_meth_res_auto_t
~
t_choices
~
dataset_results_l
=
match
t_choices
with
|
None
->
None
|
Some
w
->
Some
(
List
.
map
dataset_results_l
~
f
:
(
fun
(
dataset_results
:
dataset_res
)
->
let
tree_prefix
=
dataset_results
.
tree_prefix
in
let
model_prefix
=
dataset_results
.
model_prefix
in
let
ready_dataset
=
dataset_results
.
dataset
.
dataset
in
let
tree
=
Tree_dataset
.
tree
ready_dataset
.
tree_dataset
`Detection
in
let
faa
=
ready_dataset
.
faa
in
let
tsv
=
dataset_results
.
merged_results
in
let
res_by_tools
=
dataset_results
.
res_by_tools
in
let
plot_all_sites
=
true
in
let
t_choices
=
w
.
t_choices_max
in
let
auto_t_plot
=
plot_merge_results
~
t_choices
~
plot_all_sites
~
res_by_tools
~
tree
~
faa
~
tsv
()
in
{
tree_prefix
;
model_prefix
;
auto_t_plot
}
)
)
let
get_simu_infos
~
(
dataset
:
Dataset
.
t
)
=
let
model_prefix
=
dataset
.
model_prefix
in
let
ready_dataset
=
dataset
.
dataset
in
...
...
@@ -113,7 +143,8 @@ let get_simu_infos ~(dataset:Dataset.t) =
let
post_analyses_res_of_dataset_results_l
~
dataset_results_l
=
let
t_choices
=
get_t_choices
~
dataset_results_l
in
{
t_choices
}
let
auto_t_plot_l
=
plot_det_meth_res_auto_t
~
t_choices
~
dataset_results_l
in
{
t_choices
;
auto_t_plot_l
}
let
post_analyses_simu_of_simu_dataset_l
~
simu_dataset_l
=
let
simu_infos_l
=
List
.
map
simu_dataset_l
~
f
:
(
fun
dataset
->
...
...
@@ -143,12 +174,27 @@ let repo_of_post_analyses_simu ~post_analyses_simu =
]
|>
List
.
concat
let
repo_of_post_analyses_res
~
prefix
~
post_analyses_res
=
match
post_analyses_res
.
t_choices
with
|
None
->
[]
|
Some
w
->
Repo
.[
item
[
prefix
^
".t_choices.max_mcc_per_meth.tsv"
]
w
.
t_choices_max
;
item
[
prefix
^
".t_choices.complete.tsv"
]
w
.
t_choices_complete
;
item
[
prefix
^
".t_choices.pdf"
]
w
.
t_choices_plot
;
]
|>
Repo
.
shift
"t_choices"
[
(
match
post_analyses_res
.
t_choices
with
|
None
->
[]
|
Some
w
->
Repo
.[
item
[
prefix
^
".t_choices.max_mcc_per_meth.tsv"
]
w
.
t_choices_max
;
item
[
prefix
^
".t_choices.complete.tsv"
]
w
.
t_choices_complete
;
item
[
prefix
^
".t_choices.pdf"
]
w
.
t_choices_plot
;
]
|>
Repo
.
shift
"t_choices"
);
(
match
post_analyses_res
.
auto_t_plot_l
with
|
None
->
[]
|
Some
w_l
->
List
.
map
w_l
~
f
:
(
fun
w
->
Repo
.[
let
prefix_f
=
w
.
tree_prefix
^
"@"
^
w
.
model_prefix
in
item
[
prefix_f
^
".auto_t.svg"
]
w
.
auto_t_plot
;
]
)
|>
List
.
concat
|>
Repo
.
shift
"auto_t_pdf"
);
]
|>
List
.
concat
lib/scripts/plot_convergent_sites.py
View file @
725d12e7
...
...
@@ -59,6 +59,9 @@ parser.add_argument('-meth', dest="methods_to_be_plotted", type=str,
parser
.
add_argument
(
'-t'
,
dest
=
"threshold_by_method"
,
type
=
str
,
metavar
=
"
\"
meth1:0.85,meth3:70
\"
"
,
help
=
"threshold to filter site by method in the table file (default:0.99 or 99)"
,
default
=
None
)
parser
.
add_argument
(
'--t_tsv'
,
dest
=
"threshold_by_method_tsv"
,
type
=
str
,
metavar
=
"
\"
t_choices.tsv
\"
"
,
help
=
"tabular file containing thresholds to filter site by method in the table file (default:none)"
,
default
=
None
)
parser
.
add_argument
(
'--all_sites'
,
dest
=
"plot_all_sites"
,
action
=
"store_true"
,
help
=
"Plot all sites (default:False)"
,
default
=
False
)
...
...
@@ -83,12 +86,19 @@ methods_to_be_plotted = args.methods_to_be_plotted
if
methods_to_be_plotted
:
methods_to_be_plotted
=
[
m
for
m
in
methods_to_be_plotted
.
split
(
","
)
if
m
]
MESSAGE
(
"Methods to be plotted: "
+
", "
.
join
([
param
(
meth
)
for
meth
in
methods_to_be_plotted
]))
threshold_by_method
=
args
.
threshold_by_method
dic_threshold_by_method
=
{}
if
threshold_by_method
:
dic_threshold_by_method
=
{
mt
.
split
(
":"
)[
0
]:
float
(
mt
.
split
(
":"
)[
1
])
for
mt
in
threshold_by_method
.
split
(
","
)
if
mt
}
MESSAGE
(
"Threshold by method: "
+
", "
.
join
([(
param
(
key
)
+
": "
+
param
(
value
))
for
key
,
value
in
dic_threshold_by_method
.
items
()]))
threshold_by_method_tsv
=
args
.
threshold_by_method_tsv
if
threshold_by_method_tsv
:
MESSAGE
(
"threshold tabular file is "
+
param
(
threshold_by_method_tsv
))
df_threshold_by_method
=
pd
.
read_table
(
threshold_by_method_tsv
)
dic_threshold_by_method
=
df_threshold_by_method
.
set_index
(
'methode'
).
to_dict
()[
'threshold'
]
MESSAGE
(
"Threshold by method: "
+
", "
.
join
([(
param
(
key
)
+
": "
+
param
(
value
))
for
key
,
value
in
dic_threshold_by_method
.
items
()]))
#===================================================================================================
STEP
(
"Reading input files:"
)
...
...
@@ -118,6 +128,7 @@ if not methods_to_be_plotted:
MESSAGE
(
"Computing thresholds to filter sites:"
)
methode_scales
=
{}
for
meth
in
methods_to_be_plotted
:
max_meth
=
max
(
result_table
[
meth
])
if
max_meth
>
1
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment