Skip to content
Snippets Groups Projects
Commit e6778527 authored by Vuillaume's avatar Vuillaume
Browse files

Merge branch 'more_stats' into 'master'

more ossr stats in doc

See merge request !253
parents f5f87f0b 2397761e
No related branches found
No related tags found
1 merge request!253more ossr stats in doc
Pipeline #224013 passed
......@@ -9,3 +9,4 @@ sphinxcontrib-autoprogram
ipython_genutils
sphinxcontrib-mermaid
docutils>=0.17
wordcloud
%% Cell type:markdown id:6f9a9077 tags:
# OSSR Statistics
%% Cell type:code id:dbb762f7 tags:
``` python
from eossr.api import get_ossr_records, get_zenodo_records
from IPython.display import Markdown as md
from datetime import date
import itertools
import matplotlib.pyplot as plt
```
%% Cell type:code id:32b381a7-ff5a-4c56-919a-d47ecdbec6bd tags:
``` python
plt.style.use('seaborn-v0_8-colorblind')
```
%% Cell type:code id:b9668e64 tags:
``` python
title = f"## OSSR statistics generated the {date.today()}"
ossr_records = get_ossr_records()
stats_names = ['downloads', 'unique_downloads', 'views', 'unique_views', ]
sum_stats = {key: sum([int(rec.data['stats'][key]) for rec in ossr_records]) for key in stats_names}
text = f"## There are {len(ossr_records)} records in the OSSR.\n"
for key, value in sum_stats.items():
text+=f"{key.replace('_', ' ')}: {value}\n\n"
```
%% Cell type:code id:e71a35a5-8afa-466a-a2e6-424ff8d07611 tags:
``` python
import matplotlib.pyplot as plt
from collections import Counter
from wordcloud import WordCloud
def create_pie_chart(occurrences, ax=None):
# Get the counts of each occurrence
counts = Counter(occurrences)
ax = plt.gca() if ax is None else ax
# Create the pie chart
ax.pie(list(counts.values()), labels=list(counts.keys()), autopct='%1.1f%%', startangle=90)
# Show the chart
return ax
def create_word_cloud(words):
# Generate a word frequency dictionary
word_freq = {}
for word in words:
word = word.upper()
word_freq[word] = word_freq.get(word, 0) + 1
# Create a word cloud object
wordcloud = WordCloud(width=1000, height=600, background_color='white', stopwords=set()).generate_from_frequencies(word_freq)
# Plot the word cloud
fig = plt.figure(figsize=(10, 6), facecolor=None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad=0)
return fig
```
%% Cell type:code id:b9992892-a857-46a4-adc9-2a18f2889e35 tags:
``` python
```
%% Cell type:code id:fe656e0b tags:
``` python
md(title)
```
%% Cell type:code id:df351e32 tags:
``` python
md(text)
```
%% Cell type:code id:d0e84058 tags:
``` python
escape2020_community_records = get_zenodo_records(communities='escape2020')
url_escape2020_not_ossr = 'https://zenodo.org/communities/escape2020/search?page=1&size=20&q=&type=publication&type=lesson&type=poster'
text = f"**Note that there are also {len(escape2020_community_records) - len(ossr_records)} records in the `escape2020` community that are not software or datasets.**\n\n"
text += f"You may find them [directly on Zenodo]({url_escape2020_not_ossr})"
```
%% Cell type:code id:a4290b16-8e72-4fbe-96e7-758c3a113fd7 tags:
``` python
md("## Distribution of licenses in the OSSR")
```
%% Cell type:code id:ab7ff264-055e-4a64-b4b5-968f1e4b38b6 tags:
``` python
licenses = [record.metadata['license']['id'] for record in ossr_records]
plt.figure(figsize=(7,7))
create_pie_chart(licenses);
```
%% Cell type:code id:e653506a-64ee-4bd5-8c93-6cfe997d7319 tags:
``` python
md("## Cloud of OSSR keywords")
```
%% Cell type:code id:7676462b-64a0-49ab-aa59-72945a0a5024 tags:
``` python
keywords = [record.metadata['keywords'] for record in ossr_records if 'keywords' in record.metadata]
keywords = list(itertools.chain(*keywords))
```
%% Cell type:code id:750351bd-f043-4f3a-a8e2-0dee8afed6ae tags:
``` python
fig = create_word_cloud(keywords)
```
%% Cell type:code id:8cc6ae00-b2e3-4ee2-ba1c-373ff9cf0d6f tags:
``` python
```
%% Cell type:code id:696416cf tags:
``` python
md(text)
```
%% Cell type:code id:ae5204cf tags:
``` python
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment