diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index abc50b24075ff92f18832462441b969f54908035..da7f731b87b9d0caf2754aa1c2b849c34aeaa11e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,6 +1,9 @@ stages: - install - test + - build_containers + - zenodo + - deploy .junit_template: &junit_definition artifacts: @@ -35,3 +38,49 @@ test_py37: --cov-report=xml --cov-report=term <<: *junit_definition + +build_docker: + stage: build_containers + image: docker:19.03.12 + services: + - docker:19.03.12-dind + before_script: + - cat /etc/os-release # "Alpine Linux v3.12" + - apk add git + - export LAST_RELEASE=`git ls-remote --tags --refs --sort="v:refname" $CI_PROJECT_URL.git | tail -n1 | sed 's/.*\///'` + - echo $LAST_RELEASE + script: + - cd Docker + - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY + - docker build -t $CI_REGISTRY_IMAGE:$LAST_RELEASE . + - docker push $CI_REGISTRY_IMAGE:$LAST_RELEASE + only: + - tags + +deploy_zenodo: + stage: zenodo + image: gitlab-registry.in2p3.fr/escape2020/wp3/eossr:v0.1 + before_script: + - eossr-check-connection-zenodo --token $ZENODO_TOKEN --sandbox False -p $CI_PROJECT_DIR + script: + - mkdir -p build + - parse_last_release_git.sh $CI_PROJECT_NAME $CI_PROJECT_URL + - if [[ -f ./codemeta.json ]]; then cp ./codemeta.json ./build; fi + - ls ./build + + - eossr-upload-new-deposit --token $ZENODO_TOKEN --sandbox False --input-dir ./build +# - eossr-upload-new-version-deposit -t $ZENODO_TOKEN -s False -i ./build -id $ZENODO_PROJECT_ID + only: + - tags + +pages: + stage: deploy + image: python:3.7-alpine + script: + - pip install -U sphinx + - sphinx-build -b html docs public + artifacts: + paths: + - public + only: + - master diff --git a/Docker/Dockerfile b/Docker/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..58ba93feb58c6aed2e0672fd3401525a76ffa59d --- /dev/null +++ b/Docker/Dockerfile @@ -0,0 +1,7 @@ +FROM continuumio/miniconda3 + +ENV PATH=/opt/conda/bin/:${PATH} +SHELL [ "/bin/bash", "-c"] + +RUN conda install git pip && \ + pip --no-cache-dir install git+https://gitlab.in2p3.fr/escape2020/wp3/eossr diff --git a/README.md b/README.md index e6ab968198bdddc9baa47b9753b3bb0347bacb14..c944d8da4777c2e940b6e10c9f1115a020077ecb 100644 --- a/README.md +++ b/README.md @@ -7,4 +7,7 @@ https://gitlab.in2p3.fr/escape2020/wp3/eossr/-/commits/master) https://gitlab.in2p3.fr/escape2020/wp3/eossr/-/commits/master) -ESCAPE OSSR library \ No newline at end of file +ESCAPE OSSR library + +* Code: https://gitlab.in2p3.fr/escape2020/wp3/eossr +* Documentation: https://escape2020.pages.in2p3.fr/wp3/eossr/ \ No newline at end of file diff --git a/codemeta.json b/codemeta.json index 478f5e9a8fb117629764f888721badf574629f37..bc62fcf02a6f95879b3417699719e2073044f15a 100644 --- a/codemeta.json +++ b/codemeta.json @@ -38,7 +38,7 @@ "softwareVersion": ">=5.4.2" } ], - "keywords": [], + "keywords": ["jupyter-notebook"], "runtimePlatform": "", "downloadUrl": "", "installUrl": "", diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d4bb2cbb9eddb1bb1b4f366623044af8e4830919 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/README.rst b/docs/README.rst new file mode 100644 index 0000000000000000000000000000000000000000..d51cf8301a8feabeac3cff8e39ed0e6c12d06e08 --- /dev/null +++ b/docs/README.rst @@ -0,0 +1,5 @@ +README +====== + +.. literalinclude:: ../README.md + :lines: 9- \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..fdd6b31c52e73044e5413b806dc0c705a3f41ada --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,70 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +import shutil +sys.path.insert(0, os.path.abspath('..')) +notebook_dir = '../examples/notebooks/' +if not os.path.exists('examples'): + os.mkdir('examples') +[shutil.copy(notebook_dir + file, 'examples') for file in os.listdir(notebook_dir) if file.endswith('.ipynb')] + +# -- Project information ----------------------------------------------------- + +project = 'eossr' +copyright = '2021, ESCAPE WP3-OSSR developers and contributors' +author = 'Thomas Vuillaume & Enrique Garcia' + +release = '' + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.napoleon' +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# Parsers +source_parsers = { + '.md': 'recommonmark.parser.CommonMarkParser', +} + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +source_suffix = ['.rst', '.md'] + + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] \ No newline at end of file diff --git a/docs/docstring.rst b/docs/docstring.rst new file mode 100644 index 0000000000000000000000000000000000000000..73fe2f0f1b40e7e18fbd522ee40816bcea925c31 --- /dev/null +++ b/docs/docstring.rst @@ -0,0 +1,22 @@ +===================== +Package Documentation +===================== + +Autogenerated documentation from docstrings. + +API module +------------- + +.. automodule:: eossr.api.zenodo + :members: + :undoc-members: + :show-inheritance: + + +Metadata module +---------------------- + +.. automodule:: eossr.metadata.codemeta2zenodo + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/examples.rst b/docs/examples.rst new file mode 100644 index 0000000000000000000000000000000000000000..cebfa10790e2eacbedbf796293955504762af59f --- /dev/null +++ b/docs/examples.rst @@ -0,0 +1,10 @@ +Examples +======== + +Work in progress. + +.. toctree:: + :maxdepth: 0 + + examples/Harvest_zenodo.ipynb + ../examples/notebooks/Harvest_zenodo.ipynb diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..23c80d6a0c9f366bc38c13f09d13b075175f7846 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,26 @@ +.. eossr documentation master file, created by + sphinx-quickstart on Wed Sep 8 16:24:23 2021. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to eossr's documentation! +================================= + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + README + license + docstring + examples + + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/license.rst b/docs/license.rst new file mode 100644 index 0000000000000000000000000000000000000000..4466ef5ee9750bfaedd1fc3e0b18c044a7703b1c --- /dev/null +++ b/docs/license.rst @@ -0,0 +1,4 @@ +License +============= + +.. include:: ../LICENSE \ No newline at end of file diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000000000000000000000000000000000000..2119f51099bf37e4fdb6071dce9f451ea44c62dd --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/eossr/api/__init__.py b/eossr/api/__init__.py index 83f132a73d5412ec414dc6bd70583840bc3bb570..6d8b2d6f1fbc9cbcd27824cb0c99b7959c9f05d6 100644 --- a/eossr/api/__init__.py +++ b/eossr/api/__init__.py @@ -1,8 +1,9 @@ import requests +from . import zenodo from .zenodo import Record, get_zenodo_records, zenodo_api_url __all__ = [ - 'Record', + 'zenodo', 'get_ossr_records' ] @@ -42,3 +43,4 @@ def get_ossr_records(search='', **kwargs): kwargs['communities'] = escape_community return get_zenodo_records(search, **kwargs) + diff --git a/eossr/api/zenodo.py b/eossr/api/zenodo.py index 079ea6b789bb9586b387686914598d0ffe24b0db..8fe6352e1f59f19d77aa8a9e6d20af2b4e7c8662 100644 --- a/eossr/api/zenodo.py +++ b/eossr/api/zenodo.py @@ -313,7 +313,8 @@ class ZenodoAPI: parse_codemeta_and_write_zenodo_metadata_file(self.path_codemeta_file, self.path_zenodo_metadata_file) else: - pass + print("\n ! NO codemeta.json file found. \n" + " Please add one to the ROOT directory of your project to ble able to perform the conversion.") def check_upload_to_zenodo(self): """ @@ -341,8 +342,7 @@ class ZenodoAPI: if self.exist_codemeta_file: self.conversion_codemeta2zenodo() else: - print("\n ! NO codemeta.json NOR .zenodo.json file found. " - "Please add one to the ROOT directory of your project.") + # No codemeta.json nor .zenodo.json sys.exit(-1) print("\n * Using the .zenodo.json file to simulate a new upload to Zenodo... \n") diff --git a/eossr/ci/__init__.py b/eossr/ci/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/eossr/metadata/__init__.py b/eossr/metadata/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d57dd4a8e32bc8efe60f7084b55a486a7ce932bb 100644 --- a/eossr/metadata/__init__.py +++ b/eossr/metadata/__init__.py @@ -0,0 +1 @@ +from . import codemeta2zenodo diff --git a/eossr/metadata/codemeta2zenodo.py b/eossr/metadata/codemeta2zenodo.py index 8b204844ce0df5cb36bc74de0467433c662eb03b..373c9be07bddfc837198159c1cd7f8c474c23be4 100644 --- a/eossr/metadata/codemeta2zenodo.py +++ b/eossr/metadata/codemeta2zenodo.py @@ -50,7 +50,7 @@ def parse_person_schema_property(person_property, contributor_field): # Parse correctly the contributors if contributor_field in codemeta_contributors_fields: - if contributor_field == 'provider' or contributor_field == 'publisher': + if contributor_field in ['provider', 'publisher']: zenodo_person['type'] = 'Other' else: # First letter of contributor type MUST be capitalized @@ -92,7 +92,7 @@ def add_author_metadata(zenodo_file, codemt_person_entry, person_field): full_contacts[zenodo_person['name']] = zenodo_person # then save each person by field and avoid duplicates - for person in full_contacts: + for person, value in full_contacts.items(): if person_field in codemeta_creators_fields: @@ -100,11 +100,8 @@ def add_author_metadata(zenodo_file, codemt_person_entry, person_field): if 'creators' not in zenodo_file: zenodo_file['creators'] = [] - if full_contacts[person] not in zenodo_file['creators']: + if value not in zenodo_file['creators']: zenodo_file['creators'].append(full_contacts[person]) - else: - pass # avoid duplicates - elif person_field in codemeta_contributors_fields: if 'contributors' not in zenodo_file: @@ -112,8 +109,6 @@ def add_author_metadata(zenodo_file, codemt_person_entry, person_field): if full_contacts[person] not in zenodo_file['contributors']: zenodo_file['contributors'].append(full_contacts[person]) - else: - pass # avoid duplicates def find_matching_metadata(codemeta_json): @@ -171,11 +166,8 @@ def find_matching_metadata(codemeta_json): if 'softwareVersion' in codemeta_json and 'version' not in codemeta_json: metadata_zenodo['version'] = codemeta_json['softwareVersion'] - elif 'version' in codemeta_json and 'softwareVersion' not in codemeta_json: - metadata_zenodo['version'] = codemeta_json['version'] else: metadata_zenodo['version'] = codemeta_json['version'] - if 'keywords' in codemeta_json: if type(codemeta_json['keywords']) == list: metadata_zenodo['keywords'] = codemeta_json['keywords'] diff --git a/eossr/metadata/generator.py b/eossr/metadata/generator.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/eossr/metadata/schema/.zenodo.json b/eossr/metadata/schema/.zenodo.json new file mode 100644 index 0000000000000000000000000000000000000000..91889f9dcf28ecf93eb08c0d432b08368314a317 --- /dev/null +++ b/eossr/metadata/schema/.zenodo.json @@ -0,0 +1,46 @@ +{ + "title": "For details check https://developers.zenodo.org/#representation.", + "upload_type": "software", + "access_right": "open", + "publication_date": "YYYY-MM-DD", + "communities": [{"identifier": "escape2020"}], + "grants": [{"id": "10.13039/501100000780::824064"}], + "creators": [ + {"name": "Name and Surname", + "affiliation": "Institute, Center", + "orcid": "0000-0000-0000-0000"} + ], + "description": "", + "license": "", + "doi": "", + "prereserve_doi": "", + "keywords": [], + "notes": "", + "related_identifiers": "", + "contributors": [{}], + "references": [], + "version": "", + "language": "", + "journal_title": "", + "journal_volume": "", + "journal_issue": "", + "journal_pages": "", + "conference_title": "", + "conference_acronym": "", + "conference_dates": "", + "conference_place": "", + "conference_url": "", + "conference_session": "", + "conference_session_part": "", + "imprint_publisher": "", + "imprint_isbn": "", + "imprint_place": "", + "partof_title": "", + "partof_pages": "", + "thesis_supervisors": "", + "thesis_university": "", + "subjects": "", + "locations": [{}], + "dates": [{}], + "method": "" +} \ No newline at end of file diff --git a/eossr/metadata/tests/test_codemeta2zenodo.py b/eossr/metadata/tests/test_codemeta2zenodo.py index 779360d50915c27ed6f72109f7c47270d586766e..13cbb4ff767820686022c13b4b19d75373bb17c8 100644 --- a/eossr/metadata/tests/test_codemeta2zenodo.py +++ b/eossr/metadata/tests/test_codemeta2zenodo.py @@ -2,6 +2,7 @@ import json import unittest import tempfile from os.path import dirname, realpath, join +from eossr.metadata import codemeta2zenodo SAMPLES_DIR = join(dirname(realpath(__file__)), "samples") ROOT_DIR = dirname(realpath("codemeta.json")) @@ -60,9 +61,8 @@ zenodo_entries = [ def test_Codemeta2ZenodoController(): - from eossr.metadata.codemeta2zenodo import CodeMeta2ZenodoController - converter = CodeMeta2ZenodoController() + converter = codemeta2zenodo.CodeMeta2ZenodoController() assert converter.codemeta_data == {} assert converter.zenodo_data == {} @@ -84,41 +84,37 @@ def test_Codemeta2ZenodoController(): def test_add_author_metadata(): - from eossr.metadata.codemeta2zenodo import add_author_metadata, codemeta_allowed_person_fields with open(join(SAMPLES_DIR, "codemeta_contributors_sample.json")) as f: codemeta_metadata = json.load(f) zenodo_metadata = {} - assert all(person in codemeta_metadata.keys() for person in codemeta_allowed_person_fields) + assert all(person in codemeta_metadata.keys() for person in codemeta2zenodo.codemeta_allowed_person_fields) - for person in codemeta_allowed_person_fields: - add_author_metadata(zenodo_metadata, - codemeta_metadata[person], - person - ) + for person in codemeta2zenodo.codemeta_allowed_person_fields: + codemeta2zenodo.add_author_metadata(zenodo_metadata, + codemeta_metadata[person], + person + ) - assert 'creators' in zenodo_metadata.keys() + assert 'creators' in zenodo_metadata # 4 'creators' one repeated, should not be duplicated. # Maintainer and Contributor. Author and Creator are the same person assert len(zenodo_metadata['creators']) == 3 - assert 'contributors' in zenodo_metadata.keys() + assert 'contributors' in zenodo_metadata # Editor, Producer, Publisher, Provider and Sponsor assert len(zenodo_metadata['contributors']) == 5 def test_parse_person_schema_property(): - from eossr.metadata.codemeta2zenodo import \ - parse_person_schema_property, \ - codemeta_contributors_fields with open(join(SAMPLES_DIR, "codemeta_contributors_sample.json")) as f: codemeta_metadata = json.load(f) - for person in codemeta_contributors_fields: - zenodo_metadata = parse_person_schema_property(codemeta_metadata[person], - person) + for person in codemeta2zenodo.codemeta_contributors_fields: + zenodo_metadata = codemeta2zenodo.parse_person_schema_property(codemeta_metadata[person], + person) if person == 'editor': assert zenodo_metadata['type'] == 'Editor' elif person == 'producer': @@ -131,20 +127,18 @@ def test_parse_person_schema_property(): class TestConverting(unittest.TestCase): def test_sample_file_conversion(self): - from eossr.metadata.codemeta2zenodo import parse_codemeta_and_write_zenodo_metadata_file outfile = tempfile.NamedTemporaryFile(delete=True) - parse_codemeta_and_write_zenodo_metadata_file( + codemeta2zenodo.parse_codemeta_and_write_zenodo_metadata_file( join(SAMPLES_DIR, "codemeta_sample1.json"), outfile.name ) json.load(outfile) def test_root_codemeta_conversion(self): - from eossr.metadata.codemeta2zenodo import parse_codemeta_and_write_zenodo_metadata_file outfile = tempfile.NamedTemporaryFile(delete=True) - parse_codemeta_and_write_zenodo_metadata_file( + codemeta2zenodo.parse_codemeta_and_write_zenodo_metadata_file( join(ROOT_DIR, "codemeta.json"), outfile.name ) json.load(outfile) @@ -152,9 +146,8 @@ class TestConverting(unittest.TestCase): class TestLicense(unittest.TestCase): def test_license1(self): - from eossr.metadata.codemeta2zenodo import CodeMeta2ZenodoController - converter = CodeMeta2ZenodoController() + converter = codemeta2zenodo.CodeMeta2ZenodoController() converter.convert_license() assert 'license' not in converter.zenodo_data diff --git a/eossr/metadata/validator.py b/eossr/metadata/validator.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/examples/CI_code_snippets/1.ex_CI_build_docker_container.md b/examples/CI_code_snippets/1.ex_CI_build_docker_container.md new file mode 100644 index 0000000000000000000000000000000000000000..a7f3adc586720bb8a05068ad03868cf04bbb5b9b --- /dev/null +++ b/examples/CI_code_snippets/1.ex_CI_build_docker_container.md @@ -0,0 +1,31 @@ +# Build a Docker container during the CI process + + - Builds a Docker container during the CI process. + - Uploads the container to the GitLab container registry. + +**NOTE**. You should provide the Docker recipe (`Dockerfile`) and add it to a `Docker` directory in the root directory +of your project. + +```yaml +stages: + - build_container + + +build_docker: + stage: build_container + image: docker:19.03.12 + services: + - docker:19.03.12-dind + before_script: + - cat /etc/os-release # "Alpine Linux v3.12" + - apk add git + - export LAST_RELEASE=`git ls-remote --tags --refs --sort="v:refname" $CI_PROJECT_URL.git | tail -n1 | sed 's/.*\///'` + - echo $LAST_RELEASE + script: + - cd Docker # You should have added before your Dockerfile in this dir + - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY + - docker build -t $CI_REGISTRY_IMAGE:$LAST_RELEASE . + - docker push $CI_REGISTRY_IMAGE:$LAST_RELEASE + only: + - tags +``` \ No newline at end of file diff --git a/examples/CI_code_snippets/2.ex_CI_build_singularity_image.md b/examples/CI_code_snippets/2.ex_CI_build_singularity_image.md new file mode 100644 index 0000000000000000000000000000000000000000..1fbb7299b9ff63171f40434a3f101353cd10d85c --- /dev/null +++ b/examples/CI_code_snippets/2.ex_CI_build_singularity_image.md @@ -0,0 +1,40 @@ +# Build a Singularity image during the CI process + +This example is based on [this project](https://gitlab.com/singularityhub/gitlab-ci) (https://zenodo.org/record/3834833). +To be able to include the following code snippet into your `.gitlab-ci.yml` file, you should check the License +compatibility of your project and the origin one (an example can be found [here]( +https://gitlab.in2p3.fr/escape2020/wp3/template_project_escape/-/tree/master/.singularityci)). + + +This code snippet + + - Builds a Singularity image during the CI process. + - Makes available the created artifact (the image) to be used in the next CI stages. + +**PLEASE NOTE** + - You should provide the Singularity recipe (`Singularity`) and add it to a `Singularity` directory in the +root directory of your project. + - The image will be built using **Singularity v2.6**. + + +```yaml +stages: + - build_container + +build_singularity: + stage: build_container + image: singularityware/singularity:gitlab-2.6 + script: + - export IMAGE_NAME=$CI_PROJECT_NAME # Choose the name of the image + - singularity build $IMAGE_NAME.simg Singularity/Singularity + + - mkdir -p build && cp Singularity/*.simg build + - mkdir -p build && cp Singularity/Singularity* build + artifacts: + paths: + - build/Singularity.simg + - build/Singularity + only: + - tags + +``` \ No newline at end of file diff --git a/examples/CI_code_snippets/3.ex_CI_upload_new_deposit.md b/examples/CI_code_snippets/3.ex_CI_upload_new_deposit.md new file mode 100644 index 0000000000000000000000000000000000000000..76396e18bdab8496ed4592316289a62353cd436c --- /dev/null +++ b/examples/CI_code_snippets/3.ex_CI_upload_new_deposit.md @@ -0,0 +1,54 @@ +# Upload a new entry to the OSSR + + - Uses the GitLab CI to upload the current project to the ESCAPE-OSSR (The ESCAPE2020 Zenodo community). + - Note that the CI will be only triggered with the creation of a new release. + - The `codemeta.json` file - compulsory if you want to run this code - will be also added to the Zenodo entry as + a separate file. + - The `eossr-check-connection-zenodo` stage will create a fist dummy upload (that will be always erased), to check + that the released code will be successfully uploaded. + + +### Upload to Zenodo +```yaml +stages: + - deploy + +deploy_zenodo: + stage: deploy + image: gitlab-registry.in2p3.fr/escape2020/wp3/eossr:v0.1 + before_script: + - eossr-check-connection-zenodo --token $ZENODO_TOKEN --sandbox False -p $CI_PROJECT_DIR + script: + - mkdir -p build + - parse_last_release_git.sh $CI_PROJECT_NAME $CI_PROJECT_URL + - if [[ -f ./codemeta.json ]]; then cp ./codemeta.json ./build; fi + - ls ./build + + - eossr-upload-new-deposit -t $ZENODO_TOKEN -s False -i ./build -id $ZENODO_PROJECT_ID + only: + - tags + +``` + + +### Upload to Sandbox Zenodo +```yaml +stages: + - deploy + +deploy_zenodo: + stage: deploy + image: gitlab-registry.in2p3.fr/escape2020/wp3/eossr:v0.1 + before_script: + - eossr-check-connection-zenodo --token $SANDBOX_ZENODO_TOKEN --sandbox True -p $CI_PROJECT_DIR + script: + - mkdir -p build + - parse_last_release_git.sh $CI_PROJECT_NAME $CI_PROJECT_URL + - if [[ -f ./codemeta.json ]]; then cp ./codemeta.json ./build; fi + - ls ./build + + - eossr-upload-new-deposit -t $SANDBOX_ZENODO_TOKEN -s True -i ./build -id $ZENODO_PROJECT_ID + only: + - tags + +``` diff --git a/examples/CI_code_snippets/4.ex_CI_upload_new_version_deposit.md b/examples/CI_code_snippets/4.ex_CI_upload_new_version_deposit.md new file mode 100644 index 0000000000000000000000000000000000000000..961cdc8261610d14bfd4adb5bc69a6df69abeaf1 --- /dev/null +++ b/examples/CI_code_snippets/4.ex_CI_upload_new_version_deposit.md @@ -0,0 +1,69 @@ +# Upload a new version of an existing entry to the OSSR + + - Uses the GitLab CI to upload a **new version** of the current project to the ESCAPE-OSSR (The ESCAPE2020 Zenodo + community). + - Note that the CI will be only triggered with the creation of a new release. + - The `codemeta.json` file - compulsory if you want to run this code - will be also added to the Zenodo entry as + a separate file. + - The `eossr-check-connection-zenodo` stage will create a fist dummy upload (that will be always erased), to check + that the released code will be successfully uploaded. + +**NOTE**. +You should have saved the `deposit_id` of your project as a GitLab environment variable before the CI runs this stage. + +To do so: +1. Go to https://zenodo.org/deposit, +2. Click onto your just uploaded project, +3. From your browser search bar, **just** copy the number (your `deposit id`) that it is included in the https direction. + - ex: `https://zenodo.org/record/3884963` --> just copy `3884963`. +4. Save it as a new environment variable in your GitLab project. + * Go to your GitLab project. + * Click on `Settings` --> `CI/CD` --> `Variables` --> `Add variable` + * `KEY`=`$ZENODO_PROJECT_ID` and fill the value with the deposit id. + * For Sandbox Zenodo use `KEY` = `SANDBOX_ZENODO_PROJECT_ID` + + +### Upload to Zenodo +```yaml +stages: + - deploy + +deploy_zenodo: + stage: deploy + image: gitlab-registry.in2p3.fr/escape2020/wp3/eossr:v0.1 + before_script: + - eossr-check-connection-zenodo --token $ZENODO_TOKEN --sandbox False -p $CI_PROJECT_DIR + script: + - mkdir -p build + - parse_last_release_git.sh $CI_PROJECT_NAME $CI_PROJECT_URL + - if [[ -f ./codemeta.json ]]; then cp ./codemeta.json ./build; fi + - ls ./build + + - eossr-upload-new-version-deposit -t $ZENODO_TOKEN -s False -i ./build -id $ZENODO_PROJECT_ID + only: + - tags + +``` + + +### Upload to Sandbox Zenodo +```yaml +stages: + - deploy + +deploy_zenodo: + stage: deploy + image: gitlab-registry.in2p3.fr/escape2020/wp3/eossr:v0.1 + before_script: + - eossr-check-connection-zenodo --token $SANDBOX_ZENODO_TOKEN --sandbox True -p $CI_PROJECT_DIR + script: + - mkdir -p build + - parse_last_release_git.sh $CI_PROJECT_NAME $CI_PROJECT_URL + - if [[ -f ./codemeta.json ]]; then cp ./codemeta.json ./build; fi + - ls ./build + + - eossr-upload-new-version-deposit -t $SANDBOX_ZENODO_TOKEN -s True -i ./build -id $SANDBOX_ZENODO_PROJECT_ID + only: + - tags + +``` diff --git a/examples/CI_code_snippets/5.ex_CI_build_image_and_upload_OSSR.md b/examples/CI_code_snippets/5.ex_CI_build_image_and_upload_OSSR.md new file mode 100644 index 0000000000000000000000000000000000000000..e392a029d94f635a56fd620d0a0b2c3f7c456ad4 --- /dev/null +++ b/examples/CI_code_snippets/5.ex_CI_build_image_and_upload_OSSR.md @@ -0,0 +1,80 @@ +# Build a Singularity image, a Docker container and upload your project and images to the OSSR + +This code snippet will: + + - Builds a Docker container during the CI process. + - Builds a Singularity image during the CI process. + - Makes available the created artifacts (both images) to be used in the next CI stages. + - Uploads the next released version of the current project, together with both images, to the ESCAPE-OSSR + (The ESCAPE2020 Zenodo community). + +Have a look before to the examples in this same directory. + +```yaml +stages: + - build_container + - deploy + +build_singularity_image: + stage: build + image: singularityware/singularity:gitlab-2.6 + script: + # You should have added before your Singularity recipe in a Singularity dir + - singularity build $IMAGE_NAME.simg Singularity/Singularity + + - mkdir -p build && cp Singularity/*.simg build + - mkdir -p build && cp Singularity/Singularity* build + artifacts: + paths: + - build/Singularity.simg + - build/Singularity + only: + - tags + +build_docker_image: + stage: build + image: docker:19.03.12 + services: + - docker:19.03.12-dind + before_script: + - cat /etc/os-release # "Alpine Linux v3.12" + - apk add git + - export LAST_RELEASE=`git ls-remote --tags --refs --sort="v:refname" $CI_PROJECT_URL.git | tail -n1 | sed 's/.*\///'` + - echo $LAST_RELEASE + script: + - cd Docker + - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY + - docker build -t $CI_REGISTRY_IMAGE:$LAST_RELEASE . + - docker push $CI_REGISTRY_IMAGE:$LAST_RELEASE + + # Save docker image into a file to be uploaded in next stage + - docker save -o Docker_image_ESCAPE_template_project_$LAST_RELEASE.tar $CI_REGISTRY_IMAGE:$LAST_RELEASE + - cd .. && mkdir -p build && cp Docker/*.tar build + - ls build + artifacts: + paths: + - build/Docker_image_ESCAPE_template_project_*.tar + only: + - tags + + +deploy_zenodo: + stage: deploy + image: gitlab-registry.in2p3.fr/escape2020/wp3/eossr:v0.1 + # The `dependencies` key (and field) is added to this stage ONLY because we have created + # in previous stages some artifacts + dependencies: + - build_singularity_image + - build_docker_image + before_script: + - test_connection_zenodo --token $ZENODO_TOKEN --sandbox False -p $CI_PROJECT_DIR + script: + - mkdir -p build + - parse_last_release_git.sh $CI_PROJECT_NAME $CI_PROJECT_URL + - if [[ -f ./codemeta.json ]]; then cp ./codemeta.json ./build; fi + - ls ./build + + - upload_new_version_deposit -t $ZENODO_TOKEN -s False -i ./build -id $ZENODO_PROJECT_ID + only: + - tags +``` diff --git a/examples/notebooks/Harvest_zenodo.ipynb b/examples/notebooks/Harvest_zenodo.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..06a835b530f6a8d2f770b1846a1c9a237f0eac99 --- /dev/null +++ b/examples/notebooks/Harvest_zenodo.ipynb @@ -0,0 +1,926 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b0fcf1bf", + "metadata": {}, + "source": [ + "<h1><center> <font size=\"36\"> How to harvest metadata from Zenodo </font> </center></h1>\n", + "\n", + "---------------------\n", + "#### Notebook outline \n", + " - Zenodo OAI-PMH protocol\n", + " - Zenodo REST API\n", + " - Explore the REST API answer (payload) with the `request` library\n", + " - Using `eossr` library\n", + " - Using `PyZenodo3` library\n", + " - Pros and cons of both methods\n", + " \n", + "---------------------" + ] + }, + { + "cell_type": "markdown", + "id": "2529eacc", + "metadata": {}, + "source": [ + "## TL;DR: Pros and cons of each method\n", + " - Using AOI-PMH for harvesting;\n", + " + $+$ More efficient harvest:\n", + " - faster,\n", + " - thought for large and continues queries of a repository.\n", + " + $-$ Metadata representation of files is provided by the data provider.\n", + " - Using the REST API;\n", + " + $+$ Access to the full entry/record/community information.\n", + " + $-$ Harvest not optimised for large searches." + ] + }, + { + "cell_type": "markdown", + "id": "2193adc5", + "metadata": {}, + "source": [ + "## OAI-PMH protocol" + ] + }, + { + "cell_type": "markdown", + "id": "9bb7e516", + "metadata": {}, + "source": [ + "#### - First have a lookg to a nice [tutorial to the protocol](https://indico.cern.ch/event/5710/sessions/108048/attachments/988151/1405129/Simeon_tutorial.pdf)." + ] + }, + { + "cell_type": "markdown", + "id": "1bcf7733", + "metadata": {}, + "source": [ + "The [OAI-PMH protocol](https://www.openarchives.org/pmh/) uses a base URL + special syntax ('verbs') to query and find metadata representation(s) of a data provider.\n", + "\n", + "In the case of zenodo the base URL is: https://zenodo.org/oai2d.\n", + "\n", + "For example; \n", + " - to retrieve all the entries (`verb=ListRecords`)\n", + " - belonging to escape2020 community (`set=user-escape2020`)\n", + " - in the OAI DataCite metadata representation (`metadataPrefix=oai_datacite`)\n", + " \n", + "https://zenodo.org/oai2d?verb=ListRecords&set=user-escape2020&metadataPrefix=oai_datacite\n", + "\n", + "\n", + "Ex2:\n", + " - To obtain a single entry (`verb=GetRecord`)\n", + " - of a certain zenodo record - identified by the entry_id (`identifier=oai:zenodo.org:4105896`)\n", + " - in the Dublin Core metadata representation (`metadataPrefix=oai_dc`)\n", + " \n", + "https://zenodo.org/oai2d?verb=GetRecord&metadataPrefix=oai_dc&identifier=oai:zenodo.org:4105896" + ] + }, + { + "cell_type": "markdown", + "id": "26eba497", + "metadata": {}, + "source": [ + "## Example with the OAI-PMH protocol: A python OAI-Harvester" + ] + }, + { + "cell_type": "markdown", + "id": "145835bd", + "metadata": {}, + "source": [ + "```\n", + "pip install oaiharvest\n", + "oai-harvest -h\n", + "\n", + "# Examples of usage\n", + "oai-harvest https://zenodo.org/oai2d -s \"user-escape2020\" -d oai_dc\n", + "oai-harvest https://zenodo.org/oai2d -s \"user-escape2020\" -d oai_datacite4\n", + "oai-harvest https://zenodo.org/oai2d -s \"user-escape2020\" -d datacite3\n", + "\n", + "# Example of output\n", + "$ oai-harvest https://zenodo.org/oai2d -s \"user-escape2020\" -d datacite3\n", + "$ cd datacite3\n", + "$ ls\n", + "oai:zenodo.org:1689986.oai_dc.xml oai:zenodo.org:3884963.oai_dc.xml\n", + "oai:zenodo.org:2533132.oai_dc.xml oai:zenodo.org:3967386.oai_dc.xml\n", + "oai:zenodo.org:2542652.oai_dc.xml oai:zenodo.org:4012169.oai_dc.xml\n", + "oai:zenodo.org:2542664.oai_dc.xml oai:zenodo.org:4028908.oai_dc.xml\n", + "oai:zenodo.org:3356656.oai_dc.xml oai:zenodo.org:4044010.oai_dc.xml\n", + "oai:zenodo.org:3362435.oai_dc.xml oai:zenodo.org:4055176.oai_dc.xml\n", + "oai:zenodo.org:3572655.oai_dc.xml oai:zenodo.org:4105896.oai_dc.xml\n", + "oai:zenodo.org:3614662.oai_dc.xml oai:zenodo.org:4311271.oai_dc.xml\n", + "oai:zenodo.org:3659184.oai_dc.xml oai:zenodo.org:4419866.oai_dc.xml\n", + "oai:zenodo.org:3675081.oai_dc.xml oai:zenodo.org:4601451.oai_dc.xml\n", + "oai:zenodo.org:3734091.oai_dc.xml oai:zenodo.org:4687123.oai_dc.xml\n", + "oai:zenodo.org:3743489.oai_dc.xml oai:zenodo.org:4786641.oai_dc.xml\n", + "oai:zenodo.org:3743490.oai_dc.xml oai:zenodo.org:4790629.oai_dc.xml\n", + "oai:zenodo.org:3854976.oai_dc.xml\n", + "$ cat <FILE>\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "f9ad3584", + "metadata": {}, + "source": [ + " No token is needed to fetch metadata files provided by Zenodo (the provider). \n", + " However please note that the **metadata schema representation of the records is chosen by the provider !** \n", + " \n", + "Zenodo supports the following schema representations:\n", + " - `DataCite` (various version),\n", + " - `Dublin Core`,\n", + " - `MARC21`,\n", + " - However it **does not provide** metadata under the `codemeta.json` schema." + ] + }, + { + "cell_type": "markdown", + "id": "c6a47567", + "metadata": {}, + "source": [ + "# Query Zenodo's records through its REST API" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "26424a79", + "metadata": {}, + "outputs": [], + "source": [ + "# pip install request" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e7a84906", + "metadata": {}, + "outputs": [], + "source": [ + "import requests" + ] + }, + { + "cell_type": "markdown", + "id": "87f186f9", + "metadata": {}, + "source": [ + "We would need to specify some arguments to reduce the search" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "5ee3a192", + "metadata": {}, + "outputs": [], + "source": [ + "parameters = {'communities': 'escape2020',\n", + " 'size':100}" + ] + }, + { + "cell_type": "markdown", + "id": "e268aef2", + "metadata": {}, + "source": [ + "**NOTE** No token is needed to fetch/communicate with the REST API. \n", + "However, you would need to [create one](https://zenodo.org/account/settings/applications/) if you would like to write or publish through the API." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ddccf248", + "metadata": {}, + "outputs": [], + "source": [ + "token = ''" + ] + }, + { + "cell_type": "markdown", + "id": "4cd8011b", + "metadata": {}, + "source": [ + "## Example with the `requests` lib - How to recover all ESCAPE2020 community records ?" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "fde5ee19", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['aggregations', 'hits', 'links'])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "escape2020 = requests.get('https://zenodo.org/api/records', params=parameters).json()\n", + "escape2020.keys()" + ] + }, + { + "cell_type": "markdown", + "id": "9e564b25", + "metadata": {}, + "source": [ + "Let's explore the REST API payload to find the desired information." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "2506740f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'access_right': {'buckets': [{'doc_count': 15, 'key': 'open'}],\n", + " 'doc_count_error_upper_bound': 0,\n", + " 'sum_other_doc_count': 0},\n", + " 'file_type': {'buckets': [{'doc_count': 7, 'key': 'zip'},\n", + " {'doc_count': 4, 'key': 'pdf'},\n", + " {'doc_count': 3, 'key': 'gz'},\n", + " {'doc_count': 2, 'key': 'json'},\n", + " {'doc_count': 1, 'key': ''},\n", + " {'doc_count': 1, 'key': 'md'},\n", + " {'doc_count': 1, 'key': 'simg'},\n", + " {'doc_count': 1, 'key': 'tar'}],\n", + " 'doc_count_error_upper_bound': 0,\n", + " 'sum_other_doc_count': 0},\n", + " 'keywords': {'buckets': [{'doc_count': 3, 'key': 'ESCAPE'},\n", + " {'doc_count': 2, 'key': 'CTA'},\n", + " {'doc_count': 2, 'key': 'python'},\n", + " {'doc_count': 1, 'key': 'AGN'},\n", + " {'doc_count': 1, 'key': 'EOSC'},\n", + " {'doc_count': 1,\n", + " 'key': 'European Open Science Cloud, ESFRI, e-Infrastructures'},\n", + " {'doc_count': 1, 'key': 'Horizon Europe'},\n", + " {'doc_count': 1, 'key': 'Interoperability'},\n", + " {'doc_count': 1, 'key': 'MWL'},\n", + " {'doc_count': 1,\n", + " 'key': 'Machine Learning, Big Data, Aapche Kafka, Gravitational Wave'}],\n", + " 'doc_count_error_upper_bound': 0,\n", + " 'sum_other_doc_count': 17},\n", + " 'type': {'buckets': [{'doc_count': 10,\n", + " 'key': 'software',\n", + " 'subtype': {'buckets': [],\n", + " 'doc_count_error_upper_bound': 0,\n", + " 'sum_other_doc_count': 0}},\n", + " {'doc_count': 3,\n", + " 'key': 'publication',\n", + " 'subtype': {'buckets': [{'doc_count': 1, 'key': 'other'},\n", + " {'doc_count': 1, 'key': 'report'},\n", + " {'doc_count': 1, 'key': 'workingpaper'}],\n", + " 'doc_count_error_upper_bound': 0,\n", + " 'sum_other_doc_count': 0}},\n", + " {'doc_count': 1,\n", + " 'key': 'lesson',\n", + " 'subtype': {'buckets': [],\n", + " 'doc_count_error_upper_bound': 0,\n", + " 'sum_other_doc_count': 0}},\n", + " {'doc_count': 1,\n", + " 'key': 'poster',\n", + " 'subtype': {'buckets': [],\n", + " 'doc_count_error_upper_bound': 0,\n", + " 'sum_other_doc_count': 0}}],\n", + " 'doc_count_error_upper_bound': 0,\n", + " 'sum_other_doc_count': 0}}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Nice summary of the request we just made\n", + "escape2020['aggregations']" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "dd36b4be", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dict_keys(['hits', 'total'])\n", + "15\n" + ] + } + ], + "source": [ + "# Total number of entries in the payload\n", + "print(escape2020['hits'].keys())\n", + "print(escape2020['hits']['total'])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "0550cffb", + "metadata": {}, + "outputs": [], + "source": [ + "all_entries = escape2020['hits']['hits']" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "94a4e8c9", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'conceptdoi': '10.5281/zenodo.5176088',\n", + " 'conceptrecid': '5176088',\n", + " 'created': '2021-08-16T07:21:15.005975+00:00',\n", + " 'doi': '10.5281/zenodo.5176089',\n", + " 'files': [{'bucket': '409f3f8e-cb73-4a10-b718-3b8fc238a616',\n", + " 'checksum': 'md5:eab8fbaa4c318cbd75629eb6a7719ecb',\n", + " 'key': 'EOSC_SYMPOSIUM_2021_Report.pdf',\n", + " 'links': {'self': 'https://zenodo.org/api/files/409f3f8e-cb73-4a10-b718-3b8fc238a616/EOSC_SYMPOSIUM_2021_Report.pdf'},\n", + " 'size': 1654918,\n", + " 'type': 'pdf'}],\n", + " 'id': 5176089,\n", + " 'links': {'badge': 'https://zenodo.org/badge/doi/10.5281/zenodo.5176089.svg',\n", + " 'bucket': 'https://zenodo.org/api/files/409f3f8e-cb73-4a10-b718-3b8fc238a616',\n", + " 'conceptbadge': 'https://zenodo.org/badge/doi/10.5281/zenodo.5176088.svg',\n", + " 'conceptdoi': 'https://doi.org/10.5281/zenodo.5176088',\n", + " 'doi': 'https://doi.org/10.5281/zenodo.5176089',\n", + " 'html': 'https://zenodo.org/record/5176089',\n", + " 'latest': 'https://zenodo.org/api/records/5176089',\n", + " 'latest_html': 'https://zenodo.org/record/5176089',\n", + " 'self': 'https://zenodo.org/api/records/5176089'},\n", + " 'metadata': {'access_right': 'open',\n", + " 'access_right_category': 'success',\n", + " 'communities': [{'id': 'envri'},\n", + " {'id': 'eosc_synergy'},\n", + " {'id': 'eoscsecretariat'},\n", + " {'id': 'escape2020'},\n", + " {'id': 'expands'},\n", + " {'id': 'ni4os-europe'},\n", + " {'id': 'sshoc'}],\n", + " 'contributors': [{'affiliation': 'Trust-IT Services',\n", + " 'name': 'Ferguson, Nicholas',\n", + " 'orcid': '0000-0001-5523-6430',\n", + " 'type': 'WorkPackageLeader'}],\n", + " 'creators': [{'affiliation': 'Technopolis Group Belgium',\n", + " 'name': 'Bertacchini, Veronica'},\n", + " {'affiliation': 'Trust-IT Services',\n", + " 'name': 'Drago, Federico',\n", + " 'orcid': '0000-0002-1333-4478'},\n", + " {'affiliation': 'TU Wien',\n", + " 'name': 'Flicker, Katharina',\n", + " 'orcid': '0000-0001-6040-2798'},\n", + " {'affiliation': 'KIT', 'name': 'Gebreyesus, Netsanet'},\n", + " {'affiliation': 'GÉANT', 'name': 'Grant, Annabel'},\n", + " {'affiliation': 'CERN',\n", + " 'name': 'Jones, Bob',\n", + " 'orcid': '0000-0001-9092-4589'},\n", + " {'affiliation': 'CSC-IT Center for Science', 'name': 'Liinamaa, Iiris'},\n", + " {'affiliation': 'CSC-IT Center for Science', 'name': 'Märkälä, Anu'},\n", + " {'affiliation': 'Athena Research Center',\n", + " 'name': 'Marinos-Kouris, Christos'},\n", + " {'affiliation': 'GO FAIR Foundation',\n", + " 'name': 'Meerman, Bert',\n", + " 'orcid': '0000-0002-0071-2660'},\n", + " {'affiliation': 'TU Wien',\n", + " 'name': 'Saurugger, Bernd',\n", + " 'orcid': '0000-0001-5730-3983'},\n", + " {'affiliation': 'Trust-IT Services',\n", + " 'name': 'Smith, Zachary',\n", + " 'orcid': '0000-0002-9984-008X'}],\n", + " 'description': '<p>The EOSC Symposium 2021 provided a key engagement opportunity for the EOSC community after the European Open Science Cloud finally entered its highly-anticipated implementation phase in 2021. Delivered online to just under 1,000 EOSC stakeholders from over 63 different countries, this was not only the largest EOSC Symposium yet, but it was also an essential opportunity for convergence and alignment on principles and priorities.</p>\\n\\n<p>The EOSC Association will play an important role in this phase. With already over 210 member and observer organisations from across Europe, the Association represents a single voice for the advocacy and representation of the broader EOSC Stakeholder community in Europe, promoting alignment of EU research policy and priorities.</p>\\n\\n<p>The Association will continuously develop the EOSC Strategic Research and Innovation Agenda (SRIA) which will influence future EOSC activities at institutional, national and EU level (including the EOSC-related work programmes in Horizon Europe). This living document will adapt to the changing EOSC ecosystem and the needs of EOSC stakeholders. The Association is setting up a series of Advisory Groups (AG) with Task Forces (TF) to engage with the EOSC community around priority areas, namely:</p>\\n\\n<ul>\\n\\t<li>Implementation of EOSC</li>\\n\\t<li>Metadata and Data Quality</li>\\n\\t<li>Research Careers and Curricula</li>\\n\\t<li>Sustaining EOSC</li>\\n\\t<li>Technical Challenges on EOSC</li>\\n</ul>\\n\\n<p>The Symposium was the first opportunity for the Association to present the draft charters of the Task Forces. A key objective of the event was also for the Association to understand what work has been carried out, is in progress, or is planned on the topics of the AGs and TFs. A call for contributions ran throughout May 2021, with a total of 137 applications received. Through presentations, lightning talks, and panels, over 70 community members were able to highlight key findings and recommendations for the AGs and TFs to take into consideration for their work.</p>',\n", + " 'doi': '10.5281/zenodo.5176089',\n", + " 'grants': [{'acronym': 'EOSCsecretariat.eu',\n", + " 'code': '831644',\n", + " 'funder': {'acronyms': [],\n", + " 'doi': '10.13039/501100000780',\n", + " 'links': {'self': 'https://zenodo.org/api/funders/10.13039/501100000780'},\n", + " 'name': 'European Commission'},\n", + " 'links': {'self': 'https://zenodo.org/api/grants/10.13039/501100000780::831644'},\n", + " 'program': 'H2020',\n", + " 'title': 'EOSCsecretariat.eu'}],\n", + " 'keywords': ['EOSC', 'Open Science', 'Horizon Europe', 'Interoperability'],\n", + " 'language': 'eng',\n", + " 'license': {'id': 'CC-BY-4.0'},\n", + " 'publication_date': '2021-08-10',\n", + " 'related_identifiers': [{'identifier': '10.5281/zenodo.5176088',\n", + " 'relation': 'isVersionOf',\n", + " 'scheme': 'doi'}],\n", + " 'relations': {'version': [{'count': 1,\n", + " 'index': 0,\n", + " 'is_last': True,\n", + " 'last_child': {'pid_type': 'recid', 'pid_value': '5176089'},\n", + " 'parent': {'pid_type': 'recid', 'pid_value': '5176088'}}]},\n", + " 'resource_type': {'subtype': 'report',\n", + " 'title': 'Report',\n", + " 'type': 'publication'},\n", + " 'title': 'EOSC Symposium 2021 Report'},\n", + " 'owners': [91736],\n", + " 'revision': 8,\n", + " 'stats': {'downloads': 438.0,\n", + " 'unique_downloads': 374.0,\n", + " 'unique_views': 475.0,\n", + " 'version_downloads': 438.0,\n", + " 'version_unique_downloads': 374.0,\n", + " 'version_unique_views': 475.0,\n", + " 'version_views': 494.0,\n", + " 'version_volume': 724854084.0,\n", + " 'views': 494.0,\n", + " 'volume': 724854084.0},\n", + " 'updated': '2021-08-24T14:27:14.603504+00:00'}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# The content of the first entry of the payload - It contain all the info that we can also find in Zenodo\n", + "all_entries[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0b48bcff", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5176089 \t EOSC Symposium 2021 Report\n", + "5153369 \t agnpy: Modelling Active Galactic Nuclei radiative processes with python.\n", + "5093909 \t ESCAPE Data Science Summer School 2021\n", + "4923992 \t ESCAPE template project\n", + "4786641 \t ZenodoCI\n", + "4601451 \t gLike: numerical maximization of heterogeneous joint likelihood functions of a common free parameter plus nuisance parameters\n", + "4419866 \t IndexedConv/IndexedConv: v1.3\n", + "4044010 \t EOSC - a tool for enabling Open Science in Europe\n", + "3854976 \t FairRootGroup/DDS\n", + "3743489 \t ESCAPE the maze\n", + "3675081 \t ESFRI cluster projects - Position papers on expectations and planned contributions to the EOSC\n", + "3659184 \t ctapipe_io_mchdf5\n", + "3614662 \t FairRoot\n", + "3362435 \t FairMQ\n", + "3356656 \t A prototype for a real time pipeline for the detection of transient signals and their automatic classification\n" + ] + } + ], + "source": [ + "# Example to retrieve entries_ids and titles\n", + "for entry in all_entries:\n", + " print(f\"{entry['id']} \\t {entry['metadata']['title']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "e2afd195", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5176089 \t ['EOSC', 'Open Science', 'Horizon Europe', 'Interoperability']\n", + "5153369 \t ['radiative processes', 'blazars', 'radio galaxies', 'AGN', 'jets', 'MWL', 'astropy', 'numpy', 'python']\n", + "5093909 \t ['python', 'lesson']\n", + "4923992 \t ['ESCAPE', 'jupyter-notebook']\n", + "4786641 \t ['ESCAPE']\n", + "4419866 \t ['CTA']\n", + "4044010 \t ['European Open Science Cloud, ESFRI, e-Infrastructures']\n", + "3743489 \t ['ESCAPE']\n", + "3659184 \t ['CTA']\n", + "3614662 \t ['geant4', 'c-plus-plus', 'cmake', 'reconstruction', 'vmc', 'modular', 'analysis', 'simulation']\n", + "3356656 \t ['Machine Learning, Big Data, Aapche Kafka, Gravitational Wave']\n" + ] + } + ], + "source": [ + "# Example of all the keywords within each entry\n", + "for entry in all_entries:\n", + " try:\n", + " print(f\"{entry['id']} \\t {entry['metadata']['keywords']}\")\n", + " except KeyError:\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "id": "e9007eef", + "metadata": {}, + "source": [ + "#### Let's explore a specific ESCAPE2020 entry, for example `agnpy`." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "75b4de93", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['conceptdoi', 'conceptrecid', 'created', 'doi', 'files', 'id', 'links', 'metadata', 'owners', 'revision', 'stats', 'updated'])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agnpy = requests.get('https://zenodo.org/api/records/4687123', params=parameters).json()\n", + "agnpy.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "39be15f1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'access_right': 'open',\n", + " 'access_right_category': 'success',\n", + " 'communities': [{'id': 'escape2020'}],\n", + " 'creators': [{'affiliation': \"Institut de FÃsica d'Altes Energies (IFAE)\",\n", + " 'name': 'Cosimo Nigro'},\n", + " {'name': 'Julian Sitarek'},\n", + " {'affiliation': 'Minnesota State University Moorhead', 'name': 'Matt Craig'},\n", + " {'name': 'PaweÅ‚ Gliwny'},\n", + " {'affiliation': '@sourcery-ai', 'name': 'Sourcery AI'}],\n", + " 'description': '<p>In this release the major features added are:</p>\\n<ul>\\n<li><p>an exponential cutoff power-law for the electron spectra;</p>\\n</li>\\n<li><p>the possibility to compute the gamma-gamma opacity for misaligned sources (<code>viewing angle != 0</code>) for the following targets: point source behind the jet, BLR and the DT.</p>\\n</li>\\n</ul>',\n", + " 'doi': '10.5281/zenodo.4687123',\n", + " 'license': {'id': 'other-open'},\n", + " 'publication_date': '2021-04-14',\n", + " 'related_identifiers': [{'identifier': 'https://github.com/cosimoNigro/agnpy/tree/v0.0.10',\n", + " 'relation': 'isSupplementTo',\n", + " 'scheme': 'url'},\n", + " {'identifier': '10.5281/zenodo.4055175',\n", + " 'relation': 'isVersionOf',\n", + " 'scheme': 'doi'}],\n", + " 'relations': {'version': [{'count': 7,\n", + " 'index': 3,\n", + " 'is_last': False,\n", + " 'last_child': {'pid_type': 'recid', 'pid_value': '5153369'},\n", + " 'parent': {'pid_type': 'recid', 'pid_value': '4055175'}}]},\n", + " 'resource_type': {'title': 'Software', 'type': 'software'},\n", + " 'title': 'cosimoNigro/agnpy: v0.0.10: added EPWL for electrons and off-axis absorption calculation',\n", + " 'version': 'v0.0.10'}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agnpy['metadata']" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "1ee7197f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://zenodo.org/api/files/a806b549-922e-4025-9453-a5f4c0913fdd/cosimoNigro/agnpy-v0.0.10.zip\n" + ] + } + ], + "source": [ + "for file in agnpy['files']:\n", + " print(file['links']['self'])" + ] + }, + { + "cell_type": "markdown", + "id": "bb63887b", + "metadata": {}, + "source": [ + "We could do a simple `wget` of the previous URL and recover the file updoaded to Zenodo.\n", + "\n", + "Let's see and example with various files uploaded." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "16db6ee0", + "metadata": {}, + "outputs": [], + "source": [ + "ESCAPE_template = requests.get('https://zenodo.org/api/records/4790629', params=parameters).json()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "9feca5e6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://zenodo.org/api/files/923a2614-a0fa-4927-bb3b-704168f3c768/codemeta.json\n", + "https://zenodo.org/api/files/923a2614-a0fa-4927-bb3b-704168f3c768/Singularity\n", + "https://zenodo.org/api/files/923a2614-a0fa-4927-bb3b-704168f3c768/Singularity.simg\n", + "https://zenodo.org/api/files/923a2614-a0fa-4927-bb3b-704168f3c768/template_project_escape-v2.1.zip\n" + ] + } + ], + "source": [ + "for file in ESCAPE_template['files']:\n", + " print(file['links']['self'])" + ] + }, + { + "cell_type": "markdown", + "id": "4070d988", + "metadata": {}, + "source": [ + "## eossr\n", + "\n", + "All these methods are implemented in the [Zenodo client](https://gitlab.in2p3.fr/escape2020/wp3/eossr/-/blob/master/eossr/api/zenodo.py) (a REST API handler) of the [eossr library](https://gitlab.in2p3.fr/escape2020/wp3/eossr). \n", + "\n", + "The library is also in charge of automatise the project's uploads from GitLab to Zenodo (by the use of the GitLab-CI and the REST API handler)." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "6cd7f714", + "metadata": {}, + "outputs": [], + "source": [ + "# pip install https://gitlab.in2p3.fr/escape2020/wp3/eossr/-/archive/master/eossr-master.zip" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "0dbbbd64", + "metadata": {}, + "outputs": [], + "source": [ + "from eossr.api.zenodo import ZenodoAPI\n", + "z = ZenodoAPI(access_token=token, sandbox=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "2037338d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "15" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "entries = z.fetch_community_entries(community_name='escape2020', \n", + " results_per_query=100)\n", + "entries.json()['hits']['total']" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "3985383f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5176089 EOSC Symposium 2021 Report\n", + "5153369 agnpy: Modelling Active Galactic Nuclei radiative processes with python.\n", + "5093909 ESCAPE Data Science Summer School 2021\n", + "4923992 ESCAPE template project\n", + "4786641 ZenodoCI\n", + "4601451 gLike: numerical maximization of heterogeneous joint likelihood functions of a common free parameter plus nuisance parameters\n", + "4419866 IndexedConv/IndexedConv: v1.3\n", + "4044010 EOSC - a tool for enabling Open Science in Europe\n", + "3854976 FairRootGroup/DDS\n", + "3743489 ESCAPE the maze\n", + "3675081 ESFRI cluster projects - Position papers on expectations and planned contributions to the EOSC\n", + "3659184 ctapipe_io_mchdf5\n", + "3614662 FairRoot\n", + "3362435 FairMQ\n", + "3356656 A prototype for a real time pipeline for the detection of transient signals and their automatic classification\n" + ] + } + ], + "source": [ + "ids = z.fetch_community_entries_per_id(community_name='escape2020', \n", + " results_per_query=100)\n", + "\n", + "titles = z.fetch_community_entries_per_title(community_name='escape2020', \n", + " results_per_query=100)\n", + "\n", + "for id, title in zip(ids, titles):\n", + " print(id, title)" + ] + }, + { + "cell_type": "markdown", + "id": "595ba083", + "metadata": {}, + "source": [ + "## PyZenodo3\n", + "\n", + "Another equivalent example with the pyzenodo3 library" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "91985172", + "metadata": {}, + "outputs": [], + "source": [ + "# pip install pyzenodo3" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "7cd937b9", + "metadata": {}, + "outputs": [], + "source": [ + "import pyzenodo3\n", + "\n", + "zen = pyzenodo3.Zenodo()\n", + "records = zen.search('agnpy')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "815578d9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'conceptdoi': '10.5281/zenodo.5174757',\n", + " 'conceptrecid': '5174757',\n", + " 'created': '2021-08-10T07:43:16.545873+00:00',\n", + " 'doi': '10.5281/zenodo.5174758',\n", + " 'files': [{'bucket': '4a6752c9-f922-45f5-a6e5-82f9a016ba87',\n", + " 'checksum': 'md5:516ae662f4d8a251a7d8b9fb41007e56',\n", + " 'key': 'cosimoNigro/agnpy_paper-v0.1.0.zip',\n", + " 'links': {'self': 'https://zenodo.org/api/files/4a6752c9-f922-45f5-a6e5-82f9a016ba87/cosimoNigro/agnpy_paper-v0.1.0.zip'},\n", + " 'size': 45740,\n", + " 'type': 'zip'}],\n", + " 'id': 5174758,\n", + " 'links': {'badge': 'https://zenodo.org/badge/doi/10.5281/zenodo.5174758.svg',\n", + " 'bucket': 'https://zenodo.org/api/files/4a6752c9-f922-45f5-a6e5-82f9a016ba87',\n", + " 'conceptbadge': 'https://zenodo.org/badge/doi/10.5281/zenodo.5174757.svg',\n", + " 'conceptdoi': 'https://doi.org/10.5281/zenodo.5174757',\n", + " 'doi': 'https://doi.org/10.5281/zenodo.5174758',\n", + " 'html': 'https://zenodo.org/record/5174758',\n", + " 'latest': 'https://zenodo.org/api/records/5174758',\n", + " 'latest_html': 'https://zenodo.org/record/5174758',\n", + " 'self': 'https://zenodo.org/api/records/5174758'},\n", + " 'metadata': {'access_right': 'open',\n", + " 'access_right_category': 'success',\n", + " 'creators': [{'affiliation': \"Institut de FÃsica d'Altes Energies (IFAE), The Barcelona Institute of Science and Technology, Campus UAB, 08193 Bellaterra (Barcelona), Spain\",\n", + " 'name': 'Cosimo Nigro',\n", + " 'orcid': '0000-0001-8375-1907'},\n", + " {'affiliation': 'University of Lodz, Faculty of Physics and Applied Informatics, Department of Astrophysics, 90-236 Lodz, Poland',\n", + " 'name': 'Julian Sitarek',\n", + " 'orcid': '0000-0002-1659-5374'},\n", + " {'affiliation': 'University of Lodz, Faculty of Physics and Applied Informatics, Department of Astrophysics, 90-236 Lodz, Poland',\n", + " 'name': 'PaweÅ‚ Gliwny',\n", + " 'orcid': '0000-0002-4183-391X'},\n", + " {'affiliation': \"Laboratoire d'Annecy de Physique des Particules, Univ. Grenoble Alpes, Univ. Savoie Mont Blanc, CNRS, LAPP, 74000 Annecy, France\",\n", + " 'name': 'David Sanchez'},\n", + " {'affiliation': 'Minnesota State University Moorhead, Moorhead, Minnesota, US',\n", + " 'name': 'Matthew Craig',\n", + " 'orcid': '0000-0002-4183-391X'}],\n", + " 'description': \"This repository contains the scripts to generate the figures included in the paper 'agnpy: an open-source python package modelling the radiative processes of jetted active galactic nuclei'.\",\n", + " 'doi': '10.5281/zenodo.5174758',\n", + " 'keywords': ['radiative processes',\n", + " 'blazars',\n", + " 'radio galaxies',\n", + " 'AGN',\n", + " 'jets',\n", + " 'MWL',\n", + " 'astropy',\n", + " 'numpy',\n", + " 'python'],\n", + " 'license': {'id': 'other-open'},\n", + " 'publication_date': '2021-08-10',\n", + " 'related_identifiers': [{'identifier': 'https://github.com/cosimoNigro/agnpy_paper/tree/v0.1.0',\n", + " 'relation': 'isSupplementTo',\n", + " 'scheme': 'url'},\n", + " {'identifier': '10.5281/zenodo.5174757',\n", + " 'relation': 'isVersionOf',\n", + " 'scheme': 'doi'}],\n", + " 'relations': {'version': [{'count': 1,\n", + " 'index': 0,\n", + " 'is_last': True,\n", + " 'last_child': {'pid_type': 'recid', 'pid_value': '5174758'},\n", + " 'parent': {'pid_type': 'recid', 'pid_value': '5174757'}}]},\n", + " 'resource_type': {'title': 'Software', 'type': 'software'},\n", + " 'title': 'agnpy: an open-source python package modelling the radiative processes of jetted active galactic nuclei',\n", + " 'version': '0.1.0'},\n", + " 'owners': [99841],\n", + " 'revision': 3,\n", + " 'stats': {'downloads': 0.0,\n", + " 'unique_downloads': 0.0,\n", + " 'unique_views': 12.0,\n", + " 'version_downloads': 0.0,\n", + " 'version_unique_downloads': 0.0,\n", + " 'version_unique_views': 12.0,\n", + " 'version_views': 13.0,\n", + " 'version_volume': 0.0,\n", + " 'views': 13.0,\n", + " 'volume': 0.0},\n", + " 'updated': '2021-08-10T13:48:43.185119+00:00'}" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "records[0].data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a304d374", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}