""" store_tools.factory """ import re from .base import (CDS, INS, is_conference, is_institute, is_thesis, MSG_INV_CONF, MSG_INV_CONF_KEY, MSG_NO_CONF, MSG_NO_SHELF, REG_CONF) from datetime import datetime from .exception import CdsException from .inveniostore import InvenioStore from .recordconf import RecordConf from .recordinst import RecordInst from .recordpubli import RecordPubli from .recordthesis import RecordThesis REX_T = "\$\$t([\w, ]+)" REX_U = "\$\$u([\w, ]+)" def add_affiliation_keys(recjson): """A the affiliation keys to the record describing an institute: * The XML record contains the affiliation keys used by inspirehep.net. They are located in the field 110__u and 110__t (future). * The JSON record does not contains this information. * This tool add the affiliation keys to the JSON record. They are located: +----------------+------------------------------------+ | field (limbra) | subfield | +----------------+------------------------------------+ | corporate_note | identifier, futur_identifier, name | +----------------+------------------------------------+ Args: recjson (dict): record data (MarcJSON) """ store = InvenioStore("inspirehep", shelf="institutions") url = f"https://old.inspirehep.net/record/{recjson['recid']}" rep = store.interrogate(url, ot="110", of="txt") txt = rep.text recjson["corporate_note"] = { "identifier": re.search(REX_U, txt).group(1), "future_identifier": re.search(REX_T, txt).group(1)} def add_conference_data(recjson): """Add the conference data to the recjson. Note: Encoding of conference information depends on the store. It adds the following field and subfield:: +---------------+-----------------------------------------------+ | field | subfield | +---------------+-----------------------------------------------+ | meeting_name | closing_date, coference_code, country, date, | | | location, opening_date, year | | meeting_note | recid, url | +---------------+-----------------------------------------------+ Args: recjson (dict): record data (MarcJSON) Note: * Fields are not added when there is no conference identifier and no conference key in the recjson. * The method CheckAndFix.is_conference will identify that case. """ # ........................................................................ # # Retrieve conference identifier and the host # - the algorithm depend on the store # - for cds use aleph_linking_page # - for inspire use publication_info.cnum # conf_id, conf_key, host = None, None, None if "aleph_linking_page" in recjson: di = recjson["aleph_linking_page"] conf_id = di["sysno"] conf_key = di.get("up_link", None) host = "cds.cern.ch" elif "publication_info" in recjson: data = recjson["publication_info"] data = (data if isinstance(data, list) else [data]) for di in data: if "cnum" in di: conf_key = di["cnum"] host = "inspirehep.net" break if conf_id is None and conf_key is None: return # ........................................................................ # # Get conference data # if conf_id is not None: conf_id = (conf_id if isinstance(conf_id, int) else int(conf_id)) kwargs = dict(conf_id=conf_id) else: kwargs = dict(key=conf_key) try: confjson = get_conference_data(host, **kwargs) except CdsException: return # ........................................................................ # # Add conference data to the recjson (cds.cern.ch) # if host in CDS: # extract the conference url # - information is in confjson[url] # - in most of the case it is a dictionary # - when it is a list take the first entry which is for the # home page while the second one is for the proceeding (cds 2270940) # - in other case the url is not defined (cds 2258914) confurl = "" if "url" in confjson: obj = confjson["url"] confurl = (obj["url"] if isinstance(obj, dict) else obj[0]["url"]) recjson["meeting_name"] = confjson["meeting_name"] recjson["meeting_note"] = {"recid": confjson["recid"], "url": confurl} # ........................................................................ # # Add conference data to the recjson (inspirehep.net) # elif host in INS: # location of the conference address = [el for el in confjson["addresses"] if el.get("country")][0] # date of the conference 6-12 Dec 2010 start, end = confjson["opening_date"], confjson["closing_date"] ds = datetime.strptime(start, "%Y-%m-%d") de = datetime.strptime(end, "%Y-%m-%d") if ds.month == de.month: sdate = f"{ds.day}-{de.day} " + ds.strftime("%b %Y") else: sdate = f"{ds.strftime('%-d %b')} - {de.strftime('%-d %b %Y')}" # URL of the conference (take the first value) urls = confjson.get("urls") if urls is None: url = "" elif isinstance(urls, list) and len(urls) > 0: url = urls[0]["value"] else: url = "???" # add recjson["meeting_name"] = [{ "closing_date": end, "coference_code": confjson["cnum"], "country": address["country_code"], "date": sdate, "location": f"{address['cities'][0]}, {address['country']}", "meeting": confjson["titles"][0]["title"], "opening_date": start, "year": confjson["opening_date"][:4]}] recjson["meeting_note"] = { "recid": confjson["control_number"], "url": url} def build_record(recjson): """Transform a JSON object into a record Args: recjson (dict): record data in a JSON format. Return Record: either RecordConf, RecordInst, RecodPubli or RecordThesis Raises: """ if is_conference(recjson): add_conference_data(recjson) upcast_record = RecordConf(recjson) elif is_institute(recjson): add_affiliation_keys(recjson) upcast_record = RecordInst(recjson) elif is_thesis(recjson): upcast_record = RecordThesis(recjson) else: upcast_record = RecordPubli(recjson) return upcast_record def build_store(host=None, shelf=None): """Return the interface to the publication store. Args: host (str): possible values are ``cds``, ``cds.cern.ch``,``inspirehep`` or ``inspirehep.net`` shelf (str): section of the store containing records. It depends on the host. Possible values are ``None``, ``literature``, ``conferences`` and ``institutions`` +----------------+--------------+-----------------------------+ | host | shelf | base API | +----------------+--------------+-----------------------------+ | cds.cern.ch | None | https://cds.cern.ch/ | +----------------+--------------+-----------------------------+ | inspirehep.net | None | https://old.inspirehep.net/ | | inspirehep.net | literature | https://old.inspirehep.net/ | | inspirehep.net | conferences | https://inspirehep.net/ | | inspirehep.net | institutions | https://old.inspirehep.net/ | +----------------+--------------+-----------------------------+ Returns: InvenioStore """ if host in CDS: store = InvenioStore( host="cds.cern.ch", api_record="https://cds.cern.ch/record", api_search="https://cds.cern.ch/search", shelf=shelf) elif host in INS and shelf in (None, "literature", "institutions"): store = InvenioStore( host="old.inspirehep.net", api_record="https://old.inspirehep.net/record", api_search="https://old.inspirehep.net/search", shelf=shelf) elif host in INS and shelf in ("conferences",): store = InvenioStore( host="inspirehep.net", api_record="https://inspirehep.net/api/conferences", api_search="https://inspirehep.net/api/conferences/?q=", shelf=shelf) else: raise CdsException(MSG_NO_SHELF % (shelf, host)) return store def get_conference_data(host, conf_id=None, key=None): """Get the conference data identified by its id or key. Args: host (str): possible values are ``store``, ``store.cern.ch``, ``inspirehep`` or ``inspirehep.net``. conf_id (int): the conference identifier in the store. This is the preferred way. key (str): the conference key in the store. Returns: dict: The conference data (MarcJSON). Raises: CdsException: - conference record with a wrong identifier - conference not found """ store = build_store(host, shelf="conferences") # ........................................................................ # # search by id in cds.cern.ch # if conf_id is not None and host in CDS: recjson = store.get_record(conf_id) if recjson["recid"] != conf_id: raise CdsException(MSG_INV_CONF) return recjson # ........................................................................ # # search by key in cds.cern.ch # if key is not None and host in CDS: ids = store.get_ids(p=key) for conf_id in ids: recjson = store.get_record(conf_id) if match_conference_key(recjson, key): return recjson raise CdsException(MSG_NO_CONF) # ........................................................................ # # search by id in inspirehep.net # if conf_id is not None and host in INS: obj = store.get_record(conf_id) if obj["id"] != str(conf_id): raise CdsException(MSG_INV_CONF) return obj["metadata"] # ........................................................................ # # search by key in inspirehep.net # if key is not None and host in INS: key = key.replace("/", "-") if not REG_CONF.match(key): raise CdsException(MSG_INV_CONF_KEY) obj = store.search(f"cnum:{key}") try: recjson = obj[0]["metadata"] except (KeyError, TypeError): raise CdsException(MSG_NO_CONF) if recjson["cnum"] != key: raise CdsException(MSG_NO_CONF) return recjson def match_conference_key(recjson, conf_key): """Return ``True`` when the record corresponds to a conference identified by its key. Args: recjson (dict): record formatted MarcJSON. conf_key (str): conference key Returns bool: """ if "meeting_name" in recjson: for di in recjson["meeting_name"]: subfield = "coference_code" if subfield in di and di[subfield] == conf_key: return True return False