codemeta2zenodo.py 12.46 KiB
#!/usr/bin/env python
import json
from datetime import date
from pathlib import Path
from .zenodo import add_escape2020_community, add_escape2020_grant, validate_zenodo_metadata
codemeta_creators_fields = ['author', 'creator', 'maintainer', 'contributor']
codemeta_contributors_fields = ['editor', 'producer', 'publisher', 'provider', 'sponsor']
codemeta_allowed_person_fields = codemeta_creators_fields + codemeta_contributors_fields
def parse_person_schema_property(person_property, contributor_field):
"""
Parse the Person Schema property correctly
Parameters:
--------
person_property: dict
dictionary codemeta key with the a list or a single Person property
item.
contributor_field : str
contributor type {'editor', 'producer', 'sponsor'} or publisher,
although the last one can only happen if `upload_type` is publication
(NOT SUPPORTED - contact E. Garcia by email).
Returns:
--------
zenodo_person: dict
dictionary with the correct zenodo syntax for all {author, contributor,
maintainer}.
"""
zenodo_person = {}
name = person_property['familyName']
if 'givenName' in person_property:
name += f', {person_property["givenName"]}'
zenodo_person['name'] = name
if "@id" in person_property:
if 'orcid.org/' in person_property["@id"]:
# reformat "https://orcid.org/0000-0002-5686-2078"
zenodo_person['orcid'] = person_property["@id"].split('orcid.org/')[-1]
else:
zenodo_person['orcid'] = person_property["@id"]
if "affiliation" in person_property:
zenodo_person['affiliation'] = person_property['affiliation']['name']
# Parse correctly the contributors
if contributor_field in codemeta_contributors_fields:
if contributor_field in ['provider', 'publisher']:
zenodo_person['type'] = 'Other'
else:
# First letter of contributor type MUST be capitalized
# (not for two words' contributor !)
zenodo_person['type'] = contributor_field.title()
return zenodo_person
def add_author_metadata(zenodo_file, codemt_person_entry, person_field):
"""
Aux function to parse correctly all the authors, contributors and
maintainers that can be found at the codemeta.json file
zenodo_file: dict
metadata dictionary with the zenodo syntax
codemt_person_entry: list or dict
metadata dictionary key field with the codemeta syntax
person_field: str
codemeta key field specifying creator {author, contributor, maintainer,
creator}, or contributors {editor, sponsor, producer, project
manager...}
"""
full_contacts = {}
# First create the full contact agenda by field
if type(codemt_person_entry) is list:
for person_property in codemt_person_entry:
zenodo_person = parse_person_schema_property(person_property, person_field)
# 'name' is the only key that MUST be contained in a
# person_property at least
full_contacts[zenodo_person['name']] = zenodo_person
else:
zenodo_person = parse_person_schema_property(codemt_person_entry, person_field)
full_contacts[zenodo_person['name']] = zenodo_person
# then save each person by field and avoid duplicates
for person, value in full_contacts.items():
if person_field in codemeta_creators_fields:
# Contributors and maintainers in the same zenodo key
if 'creators' not in zenodo_file:
zenodo_file['creators'] = []
if value not in zenodo_file['creators']:
zenodo_file['creators'].append(full_contacts[person])
elif person_field in codemeta_contributors_fields:
if 'contributors' not in zenodo_file:
zenodo_file['contributors'] = []
if full_contacts[person] not in zenodo_file['contributors']:
zenodo_file['contributors'].append(full_contacts[person])
def find_matching_metadata(codemeta_json):
"""
Please note that the following fields are ASSUMED. If they are not
correct, change them, or contact us otherwise.
* "access_right": "open"
* "language": "eng"
param codemeta_json: dict
already parsed dictionary containing the metadata of the codemeta.json
file
Returns:
--------
metadata_zenodo : dict
dictionary cotaining the metadata information found at the
codemeta.json file but written using the Zenodo syntax.
"""
# All the 'person type' allowed in the CodeMeta schema are listed in the
# 'codemeta_allowed_person_fields' list. However, the Zenodo schema
# does not accept certain codemeta 'person type' properties; like
# publisher and provider, nor all the extended schema.org 'person type'
# (actor, director, member, performer ...).
# The crosswalk will be limited to the 'codemeta_allowed_person_fields'
# list.
def append_related_identifiers(metadata_zenodo: dict, meta_dict: dict):
if 'related_identifiers' in metadata_zenodo:
if not isinstance(metadata_zenodo['related_identifiers'], list):
raise TypeError(
f"metadata_zenodo['related_identifiers'] should be a list, "
f"but is {type(metadata_zenodo['related_identifiers'])}"
)
metadata_zenodo['related_identifiers'].append(meta_dict)
else:
metadata_zenodo['related_identifiers'] = [meta_dict]
return metadata_zenodo
metadata_zenodo = {'language': 'eng', 'access_right': 'open'}
if codemeta_json["@type"] == "SoftwareSourceCode":
metadata_zenodo['upload_type'] = 'software'
else:
metadata_zenodo['upload_type'] = ''
print(
"\nCould not identify the type of schema in the `codemeta.json file`.\n"
"Thus the 'upload_type' at the `.zenodo.json` file was left EMPTY.\n"
"Please fill it up by yourself choosing from the following list - otherwise zenodo will NOT be able "
"to publish your entry:\n"
" * publication: Publication\n"
" * poster: Poster\n"
" * presentation: Presentation\n"
" * dataset: Dataset\n"
" * image: Image\n"
" * video: Video/Audio\n"
" * software: Software\n"
" * lesson: Lesson\n"
" * physicalobject: Physical object\n"
" * other: Other\n"
)
if 'name' in codemeta_json:
metadata_zenodo['title'] = codemeta_json['name']
if 'description' in codemeta_json:
metadata_zenodo['description'] = codemeta_json['description']
if (
'softwareVersion' in codemeta_json
and 'version' in codemeta_json
and codemeta_json['softwareVersion'] != codemeta_json['version']
):
raise ValueError('`softwareVersion` and `version` have different values')
if 'softwareVersion' in codemeta_json:
metadata_zenodo['version'] = str(codemeta_json['softwareVersion'])
elif 'version' in codemeta_json:
metadata_zenodo['version'] = str(codemeta_json['version'])
else:
raise TypeError("A version must be provided in codemeta, either with `version` or `softwareVersion`")
if 'keywords' in codemeta_json:
if type(codemeta_json['keywords']) == list:
metadata_zenodo['keywords'] = codemeta_json['keywords']
else:
metadata_zenodo['keywords'] = [codemeta_json['keywords']]
if 'releaseNotes' in codemeta_json:
metadata_zenodo['notes'] = "Release Notes: " + codemeta_json['releaseNotes']
if 'citation' in codemeta_json:
metadata_zenodo['references'] = codemeta_json['citation']
if 'datePublished' in codemeta_json:
metadata_zenodo['publication_date'] = codemeta_json['datePublished']
else:
metadata_zenodo['publication_date'] = str(date.today())
for person_type in codemeta_allowed_person_fields:
if person_type in codemeta_json:
add_author_metadata(metadata_zenodo, codemeta_json[person_type], person_field=person_type)
if 'codeRepository' in codemeta_json:
meta_dict = {
"scheme": "url",
"identifier": codemeta_json['codeRepository'],
"relation": "isDerivedFrom",
"resource_type": metadata_zenodo['upload_type'],
}
metadata_zenodo = append_related_identifiers(metadata_zenodo, meta_dict)
if 'readme' in codemeta_json:
meta_dict = {
"scheme": "url",
"identifier": codemeta_json['readme'],
"relation": "isDocumentedBy",
"resource_type": "publication-softwaredocumentation",
}
metadata_zenodo = append_related_identifiers(metadata_zenodo, meta_dict)
return metadata_zenodo
class CodeMeta2ZenodoController(object):
"""Control the conversion of a codemeta file to a zenodo file"""
def __init__(self, codemeta_dict):
assert isinstance(codemeta_dict, dict)
self.codemeta_data = codemeta_dict
self.zenodo_data = {}
@classmethod
def from_file(cls, codemeta_filename):
"""Load `codemeta_filename` into the converter"""
with open(codemeta_filename) as infile:
controller = cls(json.load(infile))
return controller
def convert_license(self):
record_license = self.codemeta_data.get('license', None)
if record_license is None:
print(
"No license information found.\n"
"This means a proprietary record_license.\n"
"Please contact us, ESCAPE encourages Open Source Science.\n"
)
return
if record_license.startswith('https://spdx.org/licenses/'):
self.zenodo_data['license'] = record_license.split('/')[-1]
else:
self.zenodo_data['license'] = 'other-open'
def convert(self, validate=True):
"""Convert data over to zenodo format"""
self.zenodo_data = find_matching_metadata(self.codemeta_data)
self.convert_license()
if validate:
self.validate()
def validate(self):
"""
Validate the zenodo data.
"""
validate_zenodo_metadata(self.zenodo_data)
def add_escape2020_community(self):
"""
Add compulsory information to the .zenodo.json file:
* zenodo community : ESCAPE2020
"""
add_escape2020_community(self.zenodo_data)
def add_escape2020_grant(self):
"""
Add compulsory information to the .zenodo.json file:
* ESCAPE grant ID (zenodo syntax)
"""
add_escape2020_grant(self.zenodo_data)
def write_zenodo(self, zenodo_filename):
"""Write `zenodo_filename` after conversion"""
with open(zenodo_filename, 'w') as outfile:
json.dump(self.zenodo_data, outfile, indent=4, sort_keys=True)
def converter(codemeta_dict, add_escape2020=True):
"""
Convert codemeta metadata into zendoo metadata
:param codemeta_dict: dict
:param add_escape2020: bool
if True, add escape2020 community and grant
:return: dict
zenodo metadata
"""
meta_converter = CodeMeta2ZenodoController(codemeta_dict)
meta_converter.convert()
if add_escape2020:
meta_converter.add_escape2020_community()
meta_converter.add_escape2020_grant()
return meta_converter.zenodo_data
def parse_codemeta_and_write_zenodo_metadata_file(codemeta_filename, outdir, add_escape2020=True, overwrite=True):
"""
Reads the codemeta.json file and creates a new `.zenodo.json` file in outdir.
This file contains the same information that in the codemeta.json file but following the zenodo metadata schema.
codemeta_filename: str or Path
path to the codemeta.json file
outdir: str or Path
path to the outdir where the file `.zenodo.json` will be created
add_escape2020: bool
adds escape2020 metadata in zenodo metadata file
overwrite: bool
overwrite existing `.zendoo.json` file in `outdir`
"""
meta_converter = CodeMeta2ZenodoController.from_file(codemeta_filename)
meta_converter.convert()
if add_escape2020:
meta_converter.add_escape2020_community()
meta_converter.add_escape2020_grant()
outfile = Path(outdir).joinpath('.zenodo.json')
if not outfile.exists() or overwrite:
meta_converter.write_zenodo(outfile.name)
else:
raise FileExistsError(f"The file {outfile} exists. Use overwrite.")