Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
cbbc83e0
Commit
cbbc83e0
authored
Aug 09, 2018
by
Carine Rey
Browse files
update
parent
702b72de
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
35 additions
and
19 deletions
+35
-19
lib/pipeline.ml
lib/pipeline.ml
+3
-2
lib/profile.ml
lib/profile.ml
+2
-2
lib/scripts/calc_t_per_meth.R
lib/scripts/calc_t_per_meth.R
+27
-12
lib/scripts/generate_pairs.py
lib/scripts/generate_pairs.py
+2
-2
lib/scripts/merge_det_results.py
lib/scripts/merge_det_results.py
+1
-1
No files found.
lib/pipeline.ml
View file @
cbbc83e0
...
...
@@ -238,12 +238,13 @@ let derive_from_dataset ~dataset ~preview ~fast_mode=
`Topological_LG
;
`Multinomial
;
`Pcoc
;
`Tdg09
;
]
;
if
preview
then
[]
else
[
`Pcoc_gamma
;
[
`Tdg09
;
`Pcoc_gamma
;
`Identical_WAG
;
`Topological_WAG
;
]
...
...
lib/profile.ml
View file @
cbbc83e0
...
...
@@ -62,8 +62,8 @@ let profile_l_of_splitted_profile ~nb_cat ~nb_sites profile_fn =
let
p0
=
splitted_profile
/
selector
[
"profile_0.tsv"
]
in
let
p1
=
splitted_profile
/
selector
[
"profile_1.tsv"
]
in
let
p2
=
splitted_profile
/
selector
[
"profile_2.tsv"
]
in
(*
{profile_c=cat_file [p0;p1;p2] ; profile_n=prefix ^ "_3categories" ; profile_f}
*)
{
profile_c
=
p2
;
profile_n
=
prefix
^
"_1categorie_max_dist"
;
profile_f
};
{
profile_c
=
cat_file
[
p0
;
p1
;
p2
]
;
profile_n
=
prefix
^
"_3categories"
;
profile_f
}
(*
{profile_c=p2 ; profile_n=prefix ^ "_1categorie_max_dist" ; profile_f};
*)
)
|
1
->
(
let
p0
=
splitted_profile
/
selector
[
"profile_0.tsv"
]
in
{
profile_c
=
p0
;
profile_n
=
prefix
;
profile_f
})
...
...
lib/scripts/calc_t_per_meth.R
View file @
cbbc83e0
...
...
@@ -52,7 +52,7 @@ read_hyp = function(opt_name) {
if
(
!
is.na
(
opt_name
)){
df
=
read.table
(
opt_name
,
header
=
TRUE
,
sep
=
'\t'
,
na.strings
=
"NA"
)
df
=
df
[,
!
colnames
(
df
)
%in%
c
(
"Indel_prop"
,
"Indel_prop.ConvLeaves."
)]
id_vars
=
if
(
"P_
distance
"
%in%
colnames
(
df
))
{
c
(
"Sites"
,
"P_
distance
"
,
"C1"
,
"C2"
,
"entropy_C1"
,
"entropy_C2"
)}
else
{
c
(
"Sites"
)}
id_vars
=
if
(
"P_
JSD
"
%in%
colnames
(
df
))
{
c
(
"Sites"
,
"P_
JSD"
,
"P_ED
"
,
"C1"
,
"C2"
,
"entropy_C1"
,
"entropy_C2"
)}
else
{
c
(
"Sites"
)}
df_melt
=
melt
(
df
,
id.vars
=
id_vars
,
variable.name
=
"methode"
)
return
(
df_melt
)
}
else
{
...
...
@@ -85,8 +85,8 @@ print("prep df_d")
build_df_dist_couple
=
function
(
df_h0
,
df_ha
,
name
)
{
if
((
!
is.null
(
df_h0
))
&
(
!
is.null
(
df_ha
)))
{
print
(
head
(
df_h0
))
df
=
merge
(
df_h0
,
df_ha
,
by
=
c
(
"Sites"
,
"P_
distance
"
,
"C1"
,
"C2"
,
"entropy_C1"
,
"entropy_C2"
,
"methode"
),
suffix
=
c
(
"_H0"
,
"_Ha"
))
df_melt
=
melt
(
df
,
id.vars
=
c
(
"Sites"
,
"P_
distance
"
,
"C1"
,
"C2"
,
"entropy_C1"
,
"entropy_C2"
,
"methode"
),
variable.name
=
"val_H0Ha"
)
df
=
merge
(
df_h0
,
df_ha
,
by
=
c
(
"Sites"
,
"P_
JSD"
,
"P_ED
"
,
"C1"
,
"C2"
,
"entropy_C1"
,
"entropy_C2"
,
"methode"
),
suffix
=
c
(
"_H0"
,
"_Ha"
))
df_melt
=
melt
(
df
,
id.vars
=
c
(
"Sites"
,
"P_
JSD"
,
"P_ED
"
,
"C1"
,
"C2"
,
"entropy_C1"
,
"entropy_C2"
,
"methode"
),
variable.name
=
"val_H0Ha"
)
df_melt
$
couple
=
name
}
else
{
df_melt
=
NULL
...
...
@@ -341,11 +341,11 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
plot
=
ggplot
(
df_d
,
aes
(
y
=
P_
distance
,
x
=
value
,
shape
=
val_H0Ha
,
col
=
val_H0Ha
))
plot
=
ggplot
(
df_d
,
aes
(
y
=
P_
JSD
,
x
=
value
,
shape
=
val_H0Ha
,
col
=
val_H0Ha
))
plot
=
plot
+
theme_bw
()
plot
=
plot
+
theme
(
legend.position
=
"top"
)
plot
=
plot
+
labs
(
y
=
"P
distance
"
,
x
=
"Value"
)
plot
=
plot
+
xlim
(
c
(
0
,
1
))
+
ylim
(
c
(
0
,
max
(
max
(
df_d
$
P_
distance
),
1
)))
plot
=
plot
+
labs
(
y
=
"P
JSD
"
,
x
=
"Value"
)
plot
=
plot
+
xlim
(
c
(
0
,
1
))
+
ylim
(
c
(
0
,
max
(
max
(
df_d
$
P_
JSD
),
1
)))
plot
=
plot
+
guides
(
col
=
FALSE
)
plot
=
plot
+
scale_color_manual
(
values
=
colors
)
plot
=
plot
+
geom_vline
(
data
=
df_recall_sup09_per_meth
,
aes
(
xintercept
=
threshold
,
col
=
couple
),
size
=
1
,
show.legend
=
NA
,
linetype
=
"dashed"
)
...
...
@@ -353,7 +353,21 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
plot
=
plot
+
scale_shape_manual
(
values
=
c
(
16
,
17
))
plot
=
plot
+
facet_grid
(
couple
~
methode
)
plot
=
plot
+
theme
(
axis.text.x
=
element_text
(
angle
=
45
,
hjust
=
1
))
plot_value_distance
=
plot
plot_value_JSD
=
plot
plot
=
ggplot
(
df_d
,
aes
(
y
=
P_ED
,
x
=
value
,
shape
=
val_H0Ha
,
col
=
val_H0Ha
))
plot
=
plot
+
theme_bw
()
plot
=
plot
+
theme
(
legend.position
=
"top"
)
plot
=
plot
+
labs
(
y
=
"P ED"
,
x
=
"Value"
)
plot
=
plot
+
xlim
(
c
(
0
,
1
))
+
ylim
(
c
(
0
,
max
(
max
(
df_d
$
P_ED
),
1
)))
plot
=
plot
+
guides
(
col
=
FALSE
)
plot
=
plot
+
scale_color_manual
(
values
=
colors
)
plot
=
plot
+
geom_vline
(
data
=
df_recall_sup09_per_meth
,
aes
(
xintercept
=
threshold
,
col
=
couple
),
size
=
1
,
show.legend
=
NA
,
linetype
=
"dashed"
)
plot
=
plot
+
geom_point
(
size
=
1.2
,
alpha
=
0.7
,
stroke
=
0.01
)
plot
=
plot
+
scale_shape_manual
(
values
=
c
(
16
,
17
))
plot
=
plot
+
facet_grid
(
couple
~
methode
)
plot
=
plot
+
theme
(
axis.text.x
=
element_text
(
angle
=
45
,
hjust
=
1
))
plot_value_ED
=
plot
plot
=
ggplot
(
subset
(
df_d
,
val_H0Ha
==
"value_Ha"
),
aes
(
y
=
entropy_C2
,
x
=
value
,
shape
=
val_H0Ha
,
col
=
val_H0Ha
))
plot
=
plot
+
theme_bw
()
...
...
@@ -398,22 +412,23 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
save_plot
(
paste0
(
opt
$
out
,
suffix
,
".value_distance_per_meth.pdf"
),
plot_value_
distance
,
plot_value_
JSD
,
ncol
=
0.4
*
length
(
unique
(
df_d
$
methode
)),
nrow
=
0.4
*
length
(
unique
(
df_d
$
couple
)),
base_aspect_ratio
=
1
,
limitsize
=
FALSE
)
plot
=
plot_grid
(
plot_recall_precision
,
plot_max_MCC
,
plot_value_
distance
,
plot_entropy_C2_C1
,
plot_entropy_C1
,
plot_entropy_C2
,
plot
=
plot_grid
(
plot_recall_precision
,
plot_max_MCC
,
plot_value_
JSD
,
plot_value_ED
,
plot_entropy_C2_C1
,
plot_entropy_C1
,
plot_entropy_C2
,
labels
=
c
(
"A"
,
"B"
,
"C"
,
"D"
,
"E"
,
"F"
),
rel_heights
=
c
(
length
(
unique
(
df_out
$
couple
))
*
0.8
,
3
,
length
(
unique
(
df_out
$
couple
))
*
0.8
,
length
(
unique
(
df_out
$
couple
))
*
0.8
,
length
(
unique
(
df_out
$
couple
))
*
0.8
,
length
(
unique
(
df_out
$
couple
))
*
0.8
),
nrow
=
6
)
rel_heights
=
c
(
length
(
unique
(
df_out
$
couple
))
*
0.8
,
3
,
length
(
unique
(
df_out
$
couple
))
*
0.8
,
length
(
unique
(
df_out
$
couple
))
*
0.8
,
length
(
unique
(
df_out
$
couple
))
*
0.8
,
length
(
unique
(
df_out
$
couple
))
*
0.8
,
length
(
unique
(
df_out
$
couple
))
*
0.8
),
nrow
=
7
)
save_plot
(
paste0
(
opt
$
out
,
suffix
,
".pdf"
),
plot
,
ncol
=
0.4
*
length
(
unique
(
df_out_melt
$
methode
)),
nrow
=
length
(
unique
(
df_out
$
couple
))
*
0.5
*
5
+
1
,
nrow
=
length
(
unique
(
df_out
$
couple
))
*
0.5
*
6
+
1
,
base_aspect_ratio
=
1
,
limitsize
=
FALSE
)
...
...
lib/scripts/generate_pairs.py
View file @
cbbc83e0
...
...
@@ -152,7 +152,7 @@ def entropy_p(p):
MESSAGE
(
"Preparing a dataframe for every bin..."
)
# columns = ["p1_" + str(i) for i in range(20)] + ["p2_" + str(i) for i in range(20)] + ["distance"]
columns
=
[
"p1"
,
"p2"
,
"distance"
,
"entropy_p1"
,
"entropy_p2"
]
columns
=
[
"p1"
,
"p2"
,
"distance"
,
"entropy_p1"
,
"entropy_p2"
,
"distance_eucl"
]
pair_bins
=
[
pd
.
DataFrame
(
columns
=
columns
)
for
x
in
range
(
nb_bins
)]
MESSAGE
(
"Picking profile pairs and computing distances..."
)
...
...
@@ -172,7 +172,7 @@ while min(map(lambda b: b.shape[0], pair_bins)) < binsize:
for
i
in
range
(
nb_bins
):
if
in_bin
(
dist
,
i
)
and
pair_bins
[
i
].
shape
[
0
]
<
binsize
:
new_row
=
[
p1
[
"i"
],
p2
[
"i"
],
dist
,
entropy_p
(
p1
[
"p"
]),
entropy_p
(
p2
[
"p"
])]
new_row
=
[
p1
[
"i"
],
p2
[
"i"
],
dist
,
entropy_p
(
p1
[
"p"
]),
entropy_p
(
p2
[
"p"
]),
euclidian_distance
(
p1
[
"p"
],
p2
[
"p"
])]
pair_bins
[
i
].
loc
[
len
(
pair_bins
[
i
])]
=
new_row
nb_ok
+=
1
break
...
...
lib/scripts/merge_det_results.py
View file @
cbbc83e0
...
...
@@ -154,7 +154,7 @@ if args.multinomial :
if
args
.
fna_infos
:
df_fna_infos
=
pd
.
read_csv
(
args
.
fna_infos
,
sep
=
"
\t
"
,
names
=
[
"C1"
,
"C2"
,
"P_
distance
"
,
"entropy_C1"
,
"entropy_C2"
])
df_fna_infos
=
pd
.
read_csv
(
args
.
fna_infos
,
sep
=
"
\t
"
,
names
=
[
"C1"
,
"C2"
,
"P_
JSD
"
,
"entropy_C1"
,
"entropy_C2"
,
"P_ED"
])
df_fna_infos
[
"Sites"
]
=
df_fna_infos
.
index
+
1
#df_fna_infos = df_fna_infos[['Sites','P_distance']]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment