diff --git a/eossr/__init__.py b/eossr/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..a4e2017f00168387ec2b73f5cb9fec45c817ec90 100644 --- a/eossr/__init__.py +++ b/eossr/__init__.py @@ -0,0 +1 @@ +__version__ = "0.1" diff --git a/eossr/metadata/codemeta2zenodo.py b/eossr/metadata/codemeta2zenodo.py new file mode 100644 index 0000000000000000000000000000000000000000..8b204844ce0df5cb36bc74de0467433c662eb03b --- /dev/null +++ b/eossr/metadata/codemeta2zenodo.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python + +import json + +codemeta_creators_fields = ['author', 'creator', 'maintainer', 'contributor'] +codemeta_contributors_fields = \ + ['editor', 'producer', 'publisher', 'provider', 'sponsor'] +codemeta_allowed_person_fields = \ + codemeta_creators_fields + codemeta_contributors_fields + + +def parse_person_schema_property(person_property, contributor_field): + """ + Parse the Person Schema property correctly + + Parameters: + -------- + person_property: dict + dictionary codemeta key with the a list or a single Person property + item. + contributor_field : str + contributor type {'editor', 'producer', 'sponsor'} or publisher, + although the last one can only happen if `upload_type` is publication + (NOT SUPPORTED - contact E. Garcia by email). + + Returns: + -------- + zenodo_person: dict + dictionary with the correct zenodo syntax for all {author, contributor, + maintainer}. + """ + zenodo_person = {} + + name = person_property['familyName'] + if 'givenName' in person_property: + name += f', {person_property["givenName"]}' + zenodo_person['name'] = name + + if "@id" in person_property: + if 'orcid.org/' in person_property["@id"]: + # reformat "https://orcid.org/0000-0002-5686-2078" + zenodo_person['orcid'] = person_property["@id"] \ + .split('orcid.org/')[-1] + else: + zenodo_person['orcid'] = person_property["@id"] + + if "affiliation" in person_property: + zenodo_person['affiliation'] = person_property['affiliation']['name'] + + # Parse correctly the contributors + if contributor_field in codemeta_contributors_fields: + + if contributor_field == 'provider' or contributor_field == 'publisher': + zenodo_person['type'] = 'Other' + else: + # First letter of contributor type MUST be capitalized + # (not for two words' contributor !) + zenodo_person['type'] = contributor_field.title() + + return zenodo_person + + +def add_author_metadata(zenodo_file, codemt_person_entry, person_field): + """ + Aux function to parse correctly all the authors, contributors and + maintainers that can be found at the codemeta.json file + + zenodo_file: dict + metadata dictionary with the zenodo syntax + codemt_person_entry: list or dict + metadata dictionary key field with the codemeta syntax + person_field: str + codemeta key field specifying creator {author, contributor, maintainer, + creator}, or contributors {editor, sponsor, producer, project + manager...} + + """ + full_contacts = {} + + # First create the full contact agenda by field + if type(codemt_person_entry) is list: + + for person_property in codemt_person_entry: + zenodo_person = parse_person_schema_property(person_property, + person_field) + # 'name' is the only key that MUST be contained in a + # person_property at least + full_contacts[zenodo_person['name']] = zenodo_person + else: + zenodo_person = parse_person_schema_property(codemt_person_entry, + person_field) + full_contacts[zenodo_person['name']] = zenodo_person + + # then save each person by field and avoid duplicates + for person in full_contacts: + + if person_field in codemeta_creators_fields: + + # Contributors and maintainers in the same zenodo key + if 'creators' not in zenodo_file: + zenodo_file['creators'] = [] + + if full_contacts[person] not in zenodo_file['creators']: + zenodo_file['creators'].append(full_contacts[person]) + else: + pass # avoid duplicates + + elif person_field in codemeta_contributors_fields: + + if 'contributors' not in zenodo_file: + zenodo_file['contributors'] = [] + + if full_contacts[person] not in zenodo_file['contributors']: + zenodo_file['contributors'].append(full_contacts[person]) + else: + pass # avoid duplicates + + +def find_matching_metadata(codemeta_json): + """ + Please note that the following fields are ASSUMED. If they are not + correct, change them, or contact us otherwise. + * "access_right": "open" + * "language": "eng" + + param codemeta_json: dict + already parsed dictionary containing the metadata of the codemeta.json + file + + Returns: + -------- + metadata_zenodo : dict + dictionary cotaining the metadata information found at the + codemeta.json file but written using the Zenodo syntax. + """ + # All the 'person type' allowed in the CodeMeta schema are listed in the + # 'codemeta_allowed_person_fields' list. However, the Zenodo schema + # does not accept certain codemeta 'person type' properties; like + # publisher and provider, nor all the extended schema.org 'person type' + # (actor, director, member, performer ...). + # The crosswalk will be limited to the 'codemeta_allowed_person_fields' + # list. + + metadata_zenodo = {'language': 'eng', + 'access_right': 'open'} + + if codemeta_json["@type"] == "SoftwareSourceCode": + metadata_zenodo['upload_type'] = 'software' + else: + metadata_zenodo['upload_type'] = '' + print("\nCould not identify the type of schema in the `codemeta.json file`.\n" + "Thus the 'upload_type' at the `.zenodo.json` file was left EMPTY.\n" + "Please fill it up by yourself choosing from the following list - otherwise zenodo will NOT be able " + "to publish your entry:\n" + " * publication: Publication\n" + " * poster: Poster\n" + " * presentation: Presentation\n" + " * dataset: Dataset\n" + " * image: Image\n" + " * video: Video/Audio\n" + " * software: Software\n" + " * lesson: Lesson\n" + " * physicalobject: Physical object\n" + " * other: Other\n") + + if 'name' in codemeta_json: + metadata_zenodo['title'] = codemeta_json['name'] + + if 'description' in codemeta_json: + metadata_zenodo['description'] = codemeta_json['description'] + + if 'softwareVersion' in codemeta_json and 'version' not in codemeta_json: + metadata_zenodo['version'] = codemeta_json['softwareVersion'] + elif 'version' in codemeta_json and 'softwareVersion' not in codemeta_json: + metadata_zenodo['version'] = codemeta_json['version'] + else: + metadata_zenodo['version'] = codemeta_json['version'] + + if 'keywords' in codemeta_json: + if type(codemeta_json['keywords']) == list: + metadata_zenodo['keywords'] = codemeta_json['keywords'] + else: + metadata_zenodo['keywords'] = [codemeta_json['keywords']] + + if 'releaseNotes' in codemeta_json: + metadata_zenodo['notes'] = "Release Notes: " + \ + codemeta_json['releaseNotes'] + + if 'citation' in codemeta_json: + metadata_zenodo['references'] = codemeta_json['citation'] + + if 'datePublished' in codemeta_json: + metadata_zenodo['publication_date'] = codemeta_json['datePublished'] + + for person_type in codemeta_allowed_person_fields: + if person_type in codemeta_json: + add_author_metadata(metadata_zenodo, codemeta_json[person_type], + person_field=person_type) + + return metadata_zenodo + + +class CodeMeta2ZenodoController(object): + """Control the conversion of a codemeta file to a zenodo file""" + def __init__(self): + self.codemeta_data = {} + self.zenodo_data = {} + + def load_codemeta(self, codemeta_filename): + """Load `codemeta_filename` into the converter""" + with open(codemeta_filename) as infile: + self.codemeta_data = json.load(infile) + + def convert_license(self): + license = self.codemeta_data.get('license', None) + if license is None: + print("No license information found.\n" + "This means a proprietary license.\n" + "Please contact us, ESCAPE encourages Open Source Science.\n") + return + if license.startswith('https://spdx.org/licenses/'): + self.zenodo_data['license'] = license.split('/')[-1] + else: + self.zenodo_data['license'] = 'other-open' + + def convert(self): + """Convert data over to zenodo format""" + self.zenodo_data = find_matching_metadata(self.codemeta_data) + self.convert_license() + + def add_escape2020_community(self): + """ + Add compulsory information to the .zenodo.json file: + * zenodo community : ESCAPE2020 + """ + self.zenodo_data["communities"] = [{"identifier": "escape2020"}] + + def add_escape2020_grant(self): + """ + Add compulsory information to the .zenodo.json file: + * ESCAPE grant ID (zenodo syntax) + """ + self.zenodo_data["grants"] = [{"id": "10.13039/501100000780::824064"}] + + def write_zenodo(self, zenodo_filename): + """Write `zenodo_filename` after conversion""" + + with open(zenodo_filename, 'w') as outfile: + json.dump(self.zenodo_data, outfile, indent=4, sort_keys=True) + + +def parse_codemeta_and_write_zenodo_metadata_file(codemeta_filename, + zenodo_outname): + """ + Reads the codemeta.json file and creates a new `.zenodo.json` file. This + file will contain the SAME information that in the codemeta.json file but + *** WITH THE ZENODO SYNTAX. *** + + codemeta_filename: str or Path + path to the codemeta.json file + zenodo_outname: str or Path + path and name to the zenodo metada json file + NOT TO BE CHANGED. The file must be named `.zenodo.json` and be stored + in the root directory of the library. + """ + converter = CodeMeta2ZenodoController() + converter.load_codemeta(codemeta_filename) + converter.convert() + converter.add_escape2020_community() + converter.add_escape2020_grant() + converter.write_zenodo(zenodo_outname) diff --git a/eossr/scripts/__init__.py b/eossr/scripts/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/eossr/scripts/eossr_codemeta2zenodo.py b/eossr/scripts/eossr_codemeta2zenodo.py new file mode 100644 index 0000000000000000000000000000000000000000..1a50f91fcbd67e0ea7fcf29a9aa524523dae6a63 --- /dev/null +++ b/eossr/scripts/eossr_codemeta2zenodo.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python + +import sys +import argparse +from pathlib import Path +from distutils.util import strtobool +from codemeta2zenodo.crosswalk.codemeta2zenodo_crosswalk import parse_codemeta_and_write_zenodo_metadata_file + + +def query_yes_no(question, default="yes"): + """ + Ask a yes/no question via raw_input() and return their answer. + + :param question: str + question to the user + :param default: str - "yes", "no" or None + resumed answer if the user just hits <Enter>. + "yes" or "no" will set a default answer for the user + None will require a clear answer from the user + + :return: bool - True for "yes", False for "no" + """ + valid = {"yes": True, "y": True, "ye": True, + "no": False, "n": False} + if default is None: + prompt = " [y/n] " + elif default == "yes": + prompt = " [Y/n] " + elif default == "no": + prompt = " [y/N] " + else: + raise ValueError("invalid default answer: '%s'" % default) + + while True: + sys.stdout.write(question + prompt) + choice = input().lower() + if default is not None and choice == '': + return valid[default] + else: + try: + return bool(strtobool(choice)) + except: + sys.stdout.write("Please respond with 'yes' or 'no' " + "(or 'y' or 'n').\n") + + +def query_continue(question, default="no"): + """ + Ask a question and if the answer is no, exit the program. + Calls `query_yes_no`. + + :param question: str + :param default: str + + :return answer: bool - answer from query_yes_no + """ + answer = query_yes_no(question, default=default) + if not answer: + sys.exit("Program stopped by user") + else: + return answer + + +def main(): + parser = argparse.ArgumentParser( + description="Converts a metadata descriptive files from the the CodeMeta to the Zenodo schema. " + "Creates a .zenodo.json file from a codemeta.json file." + ) + + parser.add_argument( + '--input_codemeta_file', '-i', type=str, + dest='codemeta_file', + help='Path to a codemeta.json file', + required=True + ) + + args = parser.parse_args() + + codemeta_file = Path(args.codemeta_file) + + # Check if file exists and it is named as it should + if not codemeta_file.exists(): + print("\n\tThe input file doest not exists. Exiting.") + sys.exit(-1) + + if not codemeta_file.name.startswith('codemeta') or not \ + codemeta_file.name.endswith('.json'): + print(f"\n\t{codemeta_file.name} either does not starts with the `codemeta` prefix or " + f"either it does not finishes with a `.json` suffix. Exiting") + sys.exit(-1) + + directory_codemeta = codemeta_file.parent.absolute() + zenodo_metadata_file = directory_codemeta / '.zenodo.json' + + # Check overwrite zenodo file if exists + if zenodo_metadata_file.exists(): + query_continue( + f"\nThe {zenodo_metadata_file.name} file already exists." + f"\nIf you continue you will overwrite the file with the metadata found in the {codemeta_file.name} file. " + f"\n\nAre you sure ?") + + # Parse the codemeta.json file and create the .zenodo.json file + parse_codemeta_and_write_zenodo_metadata_file( + codemeta_file, + zenodo_metadata_file + ) + print("\nConversion codemeta2zenodo done.\n") + + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..5a3b1bcce2e80aa72dd9d3bac15d70587c993ed5 --- /dev/null +++ b/setup.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python + +import re +from setuptools import setup, find_packages + +entry_points = {'console_scripts': [ + 'eossr-codemeta2zenodo = eossr.scripts.eossr_codemeta2zenodo:main' + ] +} + + +def get_property(prop, project): + result = re.search(r'{}\s*=\s*[\'"]([^\'"]*)[\'"]'.format(prop), + open(project + '/__init__.py').read()) + return result.group(1) + + +setup( + name='eossr', + version=get_property('__version__', 'eossr'), + description="ESCAPE OSSR library", + # install_requires=[], + packages=find_packages(), + # scripts=[], + # tests_require=['pytest'], + author='Thomas Vuillaume & Enrique Garcia', + author_email='vuillaume<at>lapp.in2p3.fr', + url='https://gitlab.in2p3.fr/escape2020/wp3/eossr', + license='MIT', + entry_points=entry_points +)