Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
VEBER Philippe
codepi
Commits
cbbc83e0
Commit
cbbc83e0
authored
Aug 09, 2018
by
Carine Rey
Browse files
update
parent
702b72de
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
35 additions
and
19 deletions
+35
-19
lib/pipeline.ml
lib/pipeline.ml
+3
-2
lib/profile.ml
lib/profile.ml
+2
-2
lib/scripts/calc_t_per_meth.R
lib/scripts/calc_t_per_meth.R
+27
-12
lib/scripts/generate_pairs.py
lib/scripts/generate_pairs.py
+2
-2
lib/scripts/merge_det_results.py
lib/scripts/merge_det_results.py
+1
-1
No files found.
lib/pipeline.ml
View file @
cbbc83e0
...
...
@@ -238,12 +238,13 @@ let derive_from_dataset ~dataset ~preview ~fast_mode=
`Topological_LG
;
`Multinomial
;
`Pcoc
;
`Tdg09
;
]
;
if
preview
then
[]
else
[
`Pcoc_gamma
;
[
`Tdg09
;
`Pcoc_gamma
;
`Identical_WAG
;
`Topological_WAG
;
]
...
...
lib/profile.ml
View file @
cbbc83e0
...
...
@@ -62,8 +62,8 @@ let profile_l_of_splitted_profile ~nb_cat ~nb_sites profile_fn =
let
p0
=
splitted_profile
/
selector
[
"profile_0.tsv"
]
in
let
p1
=
splitted_profile
/
selector
[
"profile_1.tsv"
]
in
let
p2
=
splitted_profile
/
selector
[
"profile_2.tsv"
]
in
(*
{profile_c=cat_file [p0;p1;p2] ; profile_n=prefix ^ "_3categories" ; profile_f}
*)
{
profile_c
=
p2
;
profile_n
=
prefix
^
"_1categorie_max_dist"
;
profile_f
};
{
profile_c
=
cat_file
[
p0
;
p1
;
p2
]
;
profile_n
=
prefix
^
"_3categories"
;
profile_f
}
(*
{profile_c=p2 ; profile_n=prefix ^ "_1categorie_max_dist" ; profile_f};
*)
)
|
1
->
(
let
p0
=
splitted_profile
/
selector
[
"profile_0.tsv"
]
in
{
profile_c
=
p0
;
profile_n
=
prefix
;
profile_f
})
...
...
lib/scripts/calc_t_per_meth.R
View file @
cbbc83e0
...
...
@@ -52,7 +52,7 @@ read_hyp = function(opt_name) {
if
(
!
is.na
(
opt_name
)){
df
=
read.table
(
opt_name
,
header
=
TRUE
,
sep
=
'\t'
,
na.strings
=
"NA"
)
df
=
df
[,
!
colnames
(
df
)
%in%
c
(
"Indel_prop"
,
"Indel_prop.ConvLeaves."
)]
id_vars
=
if
(
"P_
distance
"
%in%
colnames
(
df
))
{
c
(
"Sites"
,
"P_
distance
"
,
"C1"
,
"C2"
,
"entropy_C1"
,
"entropy_C2"
)}
else
{
c
(
"Sites"
)}
id_vars
=
if
(
"P_
JSD
"
%in%
colnames
(
df
))
{
c
(
"Sites"
,
"P_
JSD"
,
"P_ED
"
,
"C1"
,
"C2"
,
"entropy_C1"
,
"entropy_C2"
)}
else
{
c
(
"Sites"
)}
df_melt
=
melt
(
df
,
id.vars
=
id_vars
,
variable.name
=
"methode"
)
return
(
df_melt
)
}
else
{
...
...
@@ -85,8 +85,8 @@ print("prep df_d")
build_df_dist_couple
=
function
(
df_h0
,
df_ha
,
name
)
{
if
((
!
is.null
(
df_h0
))
&
(
!
is.null
(
df_ha
)))
{
print
(
head
(
df_h0
))
df
=
merge
(
df_h0
,
df_ha
,
by
=
c
(
"Sites"
,
"P_
distance
"
,
"C1"
,
"C2"
,
"entropy_C1"
,
"entropy_C2"
,
"methode"
),
suffix
=
c
(
"_H0"
,
"_Ha"
))
df_melt
=
melt
(
df
,
id.vars
=
c
(
"Sites"
,
"P_
distance
"
,
"C1"
,
"C2"
,
"entropy_C1"
,
"entropy_C2"
,
"methode"
),
variable.name
=
"val_H0Ha"
)
df
=
merge
(
df_h0
,
df_ha
,
by
=
c
(
"Sites"
,
"P_
JSD"
,
"P_ED
"
,
"C1"
,
"C2"
,
"entropy_C1"
,
"entropy_C2"
,
"methode"
),
suffix
=
c
(
"_H0"
,
"_Ha"
))
df_melt
=
melt
(
df
,
id.vars
=
c
(
"Sites"
,
"P_
JSD"
,
"P_ED
"
,
"C1"
,
"C2"
,
"entropy_C1"
,
"entropy_C2"
,
"methode"
),
variable.name
=
"val_H0Ha"
)
df_melt
$
couple
=
name
}
else
{
df_melt
=
NULL
...
...
@@ -341,11 +341,11 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
plot
=
ggplot
(
df_d
,
aes
(
y
=
P_
distance
,
x
=
value
,
shape
=
val_H0Ha
,
col
=
val_H0Ha
))
plot
=
ggplot
(
df_d
,
aes
(
y
=
P_
JSD
,
x
=
value
,
shape
=
val_H0Ha
,
col
=
val_H0Ha
))
plot
=
plot
+
theme_bw
()
plot
=
plot
+
theme
(
legend.position
=
"top"
)
plot
=
plot
+
labs
(
y
=
"P
distance
"
,
x
=
"Value"
)
plot
=
plot
+
xlim
(
c
(
0
,
1
))
+
ylim
(
c
(
0
,
max
(
max
(
df_d
$
P_
distance
),
1
)))
plot
=
plot
+
labs
(
y
=
"P
JSD
"
,
x
=
"Value"
)
plot
=
plot
+
xlim
(
c
(
0
,
1
))
+
ylim
(
c
(
0
,
max
(
max
(
df_d
$
P_
JSD
),
1
)))
plot
=
plot
+
guides
(
col
=
FALSE
)
plot
=
plot
+
scale_color_manual
(
values
=
colors
)
plot
=
plot
+
geom_vline
(
data
=
df_recall_sup09_per_meth
,
aes
(
xintercept
=
threshold
,
col
=
couple
),
size
=
1
,
show.legend
=
NA
,
linetype
=
"dashed"
)
...
...
@@ -353,7 +353,21 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
plot
=
plot
+
scale_shape_manual
(
values
=
c
(
16
,
17
))
plot
=
plot
+
facet_grid
(
couple
~
methode
)
plot
=
plot
+
theme
(
axis.text.x
=
element_text
(
angle
=
45
,
hjust
=
1
))
plot_value_distance
=
plot
plot_value_JSD
=
plot
plot
=
ggplot
(
df_d
,
aes
(
y
=
P_ED
,
x
=
value
,
shape
=
val_H0Ha
,
col
=
val_H0Ha
))
plot
=
plot
+
theme_bw
()
plot
=
plot
+
theme
(
legend.position
=
"top"
)
plot
=
plot
+
labs
(
y
=
"P ED"
,
x
=
"Value"
)
plot
=
plot
+
xlim
(
c
(
0
,
1
))
+
ylim
(
c
(
0
,
max
(
max
(
df_d
$
P_ED
),
1
)))
plot
=
plot
+
guides
(
col
=
FALSE
)
plot
=
plot
+
scale_color_manual
(
values
=
colors
)
plot
=
plot
+
geom_vline
(
data
=
df_recall_sup09_per_meth
,
aes
(
xintercept
=
threshold
,
col
=
couple
),
size
=
1
,
show.legend
=
NA
,
linetype
=
"dashed"
)
plot
=
plot
+
geom_point
(
size
=
1.2
,
alpha
=
0.7
,
stroke
=
0.01
)
plot
=
plot
+
scale_shape_manual
(
values
=
c
(
16
,
17
))
plot
=
plot
+
facet_grid
(
couple
~
methode
)
plot
=
plot
+
theme
(
axis.text.x
=
element_text
(
angle
=
45
,
hjust
=
1
))
plot_value_ED
=
plot
plot
=
ggplot
(
subset
(
df_d
,
val_H0Ha
==
"value_Ha"
),
aes
(
y
=
entropy_C2
,
x
=
value
,
shape
=
val_H0Ha
,
col
=
val_H0Ha
))
plot
=
plot
+
theme_bw
()
...
...
@@ -398,22 +412,23 @@ plot_out = function(df_out, df_d , df_recall_sup09_per_meth, meths = NULL, suffi
save_plot
(
paste0
(
opt
$
out
,
suffix
,
".value_distance_per_meth.pdf"
),
plot_value_
distance
,
plot_value_
JSD
,
ncol
=
0.4
*
length
(
unique
(
df_d
$
methode
)),
nrow
=
0.4
*
length
(
unique
(
df_d
$
couple
)),
base_aspect_ratio
=
1
,
limitsize
=
FALSE
)
plot
=
plot_grid
(
plot_recall_precision
,
plot_max_MCC
,
plot_value_
distance
,
plot_entropy_C2_C1
,
plot_entropy_C1
,
plot_entropy_C2
,
plot
=
plot_grid
(
plot_recall_precision
,
plot_max_MCC
,
plot_value_
JSD
,
plot_value_ED
,
plot_entropy_C2_C1
,
plot_entropy_C1
,
plot_entropy_C2
,
labels
=
c
(
"A"
,
"B"
,
"C"
,
"D"
,
"E"
,
"F"
),
rel_heights
=
c
(
length
(
unique
(
df_out
$
couple
))
*
0.8
,
3
,
length
(
unique
(
df_out
$
couple
))
*
0.8
,
length
(
unique
(
df_out
$
couple
))
*
0.8
,
length
(
unique
(
df_out
$
couple
))
*
0.8
,
length
(
unique
(
df_out
$
couple
))
*
0.8
),
nrow
=
6
)
rel_heights
=
c
(
length
(
unique
(
df_out
$
couple
))
*
0.8
,
3
,
length
(
unique
(
df_out
$
couple
))
*
0.8
,
length
(
unique
(
df_out
$
couple
))
*
0.8
,
length
(
unique
(
df_out
$
couple
))
*
0.8
,
length
(
unique
(
df_out
$
couple
))
*
0.8
,
length
(
unique
(
df_out
$
couple
))
*
0.8
),
nrow
=
7
)
save_plot
(
paste0
(
opt
$
out
,
suffix
,
".pdf"
),
plot
,
ncol
=
0.4
*
length
(
unique
(
df_out_melt
$
methode
)),
nrow
=
length
(
unique
(
df_out
$
couple
))
*
0.5
*
5
+
1
,
nrow
=
length
(
unique
(
df_out
$
couple
))
*
0.5
*
6
+
1
,
base_aspect_ratio
=
1
,
limitsize
=
FALSE
)
...
...
lib/scripts/generate_pairs.py
View file @
cbbc83e0
...
...
@@ -152,7 +152,7 @@ def entropy_p(p):
MESSAGE
(
"Preparing a dataframe for every bin..."
)
# columns = ["p1_" + str(i) for i in range(20)] + ["p2_" + str(i) for i in range(20)] + ["distance"]
columns
=
[
"p1"
,
"p2"
,
"distance"
,
"entropy_p1"
,
"entropy_p2"
]
columns
=
[
"p1"
,
"p2"
,
"distance"
,
"entropy_p1"
,
"entropy_p2"
,
"distance_eucl"
]
pair_bins
=
[
pd
.
DataFrame
(
columns
=
columns
)
for
x
in
range
(
nb_bins
)]
MESSAGE
(
"Picking profile pairs and computing distances..."
)
...
...
@@ -172,7 +172,7 @@ while min(map(lambda b: b.shape[0], pair_bins)) < binsize:
for
i
in
range
(
nb_bins
):
if
in_bin
(
dist
,
i
)
and
pair_bins
[
i
].
shape
[
0
]
<
binsize
:
new_row
=
[
p1
[
"i"
],
p2
[
"i"
],
dist
,
entropy_p
(
p1
[
"p"
]),
entropy_p
(
p2
[
"p"
])]
new_row
=
[
p1
[
"i"
],
p2
[
"i"
],
dist
,
entropy_p
(
p1
[
"p"
]),
entropy_p
(
p2
[
"p"
]),
euclidian_distance
(
p1
[
"p"
],
p2
[
"p"
])]
pair_bins
[
i
].
loc
[
len
(
pair_bins
[
i
])]
=
new_row
nb_ok
+=
1
break
...
...
lib/scripts/merge_det_results.py
View file @
cbbc83e0
...
...
@@ -154,7 +154,7 @@ if args.multinomial :
if
args
.
fna_infos
:
df_fna_infos
=
pd
.
read_csv
(
args
.
fna_infos
,
sep
=
"
\t
"
,
names
=
[
"C1"
,
"C2"
,
"P_
distance
"
,
"entropy_C1"
,
"entropy_C2"
])
df_fna_infos
=
pd
.
read_csv
(
args
.
fna_infos
,
sep
=
"
\t
"
,
names
=
[
"C1"
,
"C2"
,
"P_
JSD
"
,
"entropy_C1"
,
"entropy_C2"
,
"P_ED"
])
df_fna_infos
[
"Sites"
]
=
df_fna_infos
.
index
+
1
#df_fna_infos = df_fna_infos[['Sites','P_distance']]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment