Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
eossr
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Admin message
Gitlab has been updated. More info
here
.
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
ESCAPE2020
WP3
eossr
Merge requests
!254
hide code cell in stat notebook
Code
Review changes
Check out branch
Download
Patches
Plain diff
Merged
hide code cell in stat notebook
more_stats
into
master
Overview
0
Commits
1
Pipelines
1
Changes
1
Merged
Vuillaume
requested to merge
more_stats
into
master
2 years ago
Overview
0
Commits
1
Pipelines
1
Changes
1
Expand
0
0
Merge request reports
Compare
master
master (base)
and
latest version
latest version
21c10774
1 commit,
2 years ago
1 file
+
22
−
21
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
docs/notebooks/ossr_statistics.ipynb
+
22
−
21
Options
%% Cell type:markdown id:6f9a9077 tags:
# OSSR Statistics
%% Cell type:code id:
dbb762f7
tags:
%% Cell type:code id:
fff204bf-3164-4844-a13d-e88b6231970a
tags:
```
python
from
eossr.api
import
get_ossr_records
,
get_zenodo_records
from
IPython.display
import
Markdown
as
md
from
datetime
import
date
import
itertools
import
matplotlib.pyplot
as
plt
```
%% Cell type:code id:32b381a7-ff5a-4c56-919a-d47ecdbec6bd tags:
```
python
plt
.
style
.
use
(
'
seaborn-v0_8-colorblind
'
)
```
%% Cell type:code id:b9668e64 tags:
```
python
title
=
f
"
## OSSR statistics generated the
{
date
.
today
()
}
"
ossr_records
=
get_ossr_records
()
stats_names
=
[
'
downloads
'
,
'
unique_downloads
'
,
'
views
'
,
'
unique_views
'
,
]
sum_stats
=
{
key
:
sum
([
int
(
rec
.
data
[
'
stats
'
][
key
])
for
rec
in
ossr_records
])
for
key
in
stats_names
}
text
=
f
"
## There are
{
len
(
ossr_records
)
}
records in the OSSR.
\n
"
for
key
,
value
in
sum_stats
.
items
():
text
+=
f
"
{
key
.
replace
(
'
_
'
,
'
'
)
}
:
{
value
}
\n\n
"
```
%% Cell type:code id:e71a35a5-8afa-466a-a2e6-424ff8d07611 tags:
```
python
import
matplotlib.pyplot
as
plt
from
collections
import
Counter
from
wordcloud
import
WordCloud
def
create_pie_chart
(
occurrences
,
ax
=
None
):
# Get the counts of each occurrence
counts
=
Counter
(
occurrences
)
ax
=
plt
.
gca
()
if
ax
is
None
else
ax
# Create the pie chart
ax
.
pie
(
list
(
counts
.
values
()),
labels
=
list
(
counts
.
keys
()),
autopct
=
'
%1.1f%%
'
,
startangle
=
90
)
# Show the chart
return
ax
def
create_word_cloud
(
words
):
# Generate a word frequency dictionary
word_freq
=
{}
for
word
in
words
:
word
=
word
.
upper
()
word_freq
[
word
]
=
word_freq
.
get
(
word
,
0
)
+
1
# Create a word cloud object
wordcloud
=
WordCloud
(
width
=
1000
,
height
=
600
,
background_color
=
'
white
'
,
stopwords
=
set
()).
generate_from_frequencies
(
word_freq
)
# Plot the word cloud
fig
=
plt
.
figure
(
figsize
=
(
10
,
6
),
facecolor
=
None
)
plt
.
imshow
(
wordcloud
)
plt
.
axis
(
"
off
"
)
plt
.
tight_layout
(
pad
=
0
)
return
fig
```
%% Cell type:code id:b9
992892-a857-46a4-adc9-2a18f2889e35
tags:
%% Cell type:code id:b9
668e64
tags:
```
python
title
=
f
"
## OSSR statistics generated the
{
date
.
today
()
}
\n
"
ossr_records
=
get_ossr_records
()
stats_names
=
[
'
downloads
'
,
'
unique_downloads
'
,
'
views
'
,
'
unique_views
'
,
]
sum_stats
=
{
key
:
sum
([
int
(
rec
.
data
[
'
stats
'
][
key
])
for
rec
in
ossr_records
])
for
key
in
stats_names
}
text
=
title
+
"
\n
"
+
f
"
### There are
{
len
(
ossr_records
)
}
records in the OSSR.
\n
"
for
n
in
sum_stats
:
text
+=
f
"
|
{
n
.
replace
(
'
_
'
,
'
'
)
}
"
text
+=
"
|
\n
"
for
n
in
sum_stats
:
text
+=
"
|----
"
text
+=
"
|
\n
"
for
k
,
v
in
sum_stats
.
items
():
text
+=
f
"
|
{
v
}
"
text
+=
"
|
\n
"
```
%% Cell type:code id:fe656e0b tags:
```
python
md(t
itle
)
md
(
t
ext
)
```
%% Cell type:code id:df351e32 tags:
```
python
md(text)
``
`
%%
Cell
type
:
code
id
:
d0e84058
tags
:
```
python
escape2020_community_records = get_zenodo_records(communities='escape2020')
url_escape2020_not_ossr = 'https://zenodo.org/communities/escape2020/search?page=1&size=20&q=&type=publication&type=lesson&type=poster'
text = f"
**Note that there are also {len(escape2020_community_records) - len(ossr_records)} records in the `escape2020` community that are not software or datasets.**
\n\n
"
text += f"You may find them
[
directly on Zenodo
](
{url_escape2020_not_ossr}
)
"
```
%% Cell type:code id:a4290b16-8e72-4fbe-96e7-758c3a113fd7 tags:
```
python
md("## Distribution of licenses in the OSSR")
```
%% Cell type:code id:ab7ff264-055e-4a64-b4b5-968f1e4b38b6 tags:
```
python
licenses =
[
record.metadata['license'
][
'id'
]
for record in ossr_records]
plt.figure(figsize=(7,7))
create_pie_chart(licenses);
```
%% Cell type:code id:e653506a-64ee-4bd5-8c93-6cfe997d7319 tags:
```
python
md("## Cloud of OSSR keywords")
```
%% Cell type:code id:7676462b-64a0-49ab-aa59-72945a0a5024 tags:
```
python
keywords = [record.metadata['keywords'] for record in ossr_records if 'keywords' in record.metadata]
keywords = list(itertools.chain(
*
keywords))
```
%% Cell type:code id:750351bd-f043-4f3a-a8e2-0dee8afed6ae tags:
```
python
fig =
create_word_cloud(keywords)
create_word_cloud(keywords)
;
```
%% Cell type:code id:
8cc6ae00-b2e3-4ee2-ba1c-373ff9cf0d6
f tags:
%% Cell type:code id:
12f81935-16e7-4aba-a6a9-474cd2ad081
f tags:
```
python
```
%% Cell type:code id:696416cf tags:
```
python
md(text)
```
%% Cell type:code id:ae5204cf tags:
```
python
```
Loading