""" invenio_tools.factory """ import requests from base import (is_conference, is_institute, is_thesis, MSG_INV_CONF, MSG_INV_CONF_KEY, MSG_NO_CONF, MSG_NO_CONF_ID_KEY, REG_CONF, REG_OAI) from exception import CdsException from inveniostore import InvenioStore from recordconf import RecordConf from recordinst import RecordInst from recordpubli import RecordPubli from recordthesis import RecordThesis def add_affiliation_keys(recjson): """A the affiliation keys to the record describing an institute: * The XML record contains the affiliation keys used by inspirehep.net. They are located in the field 110__u and 110__t (future). * The JSON record does not contains this information. * This tool add the affiliation keys to the JSON record. They are located: +----------------+------------------------------------+ | field (limbra) | subfield | +----------------+------------------------------------+ | corporate_note | identifier, futur_identifier, name | +----------------+------------------------------------+ Args recjson (dict): record data (MarcJSON) """ url = "https://inspirehep.net/record/%i" % recjson[u"recid"] rep = requests.get(url, params={"ot": "110", "of": "txt"}) # decode the string: '000recid 110__ $$aXXX$$bYYY$$tZZZ\n' txt = rep.content.replace("\n", "") li = txt[txt.find("$"):].split("$$") di = {} for el in li: if len(el) == 0: continue di[el[0:1]] = el[1:] recjson[u"corporate_note"] = {u"identifier": di["u"], u"future_identifier": di["t"]} def add_conference_data(recjson): """Add the conference data to the recjson. It adds the following field and subfield:: +---------------+-----------------------------------------------+ | field | subfield | +---------------+-----------------------------------------------+ | meeting_name | closing_date, coference_code, country, date, | | | location, opening_date, year | | meeting_note | recid, url | +---------------+-----------------------------------------------+ Args: recjson (dict): record data (MarcJSON) Note: * Fields are not added when there is no conference identifier and no conference key in the recjson. * The method CheckAndFix.is_conference will identify that case. """ # ........................................................................ # # Retrieve conference identifier and the host # - the algorithm depend on the store # - for cds use aleph_linking_page # - for inspire use publication_info.cnum # conf_id, conf_key, host = None, None, None if u"aleph_linking_page" in recjson: di = recjson[u"aleph_linking_page"] conf_id = di[u"sysno"] conf_key = di[u"up_link"] host = "cds.cern.ch" elif u"publication_info" in recjson: data = recjson[u"publication_info"] data = (data if isinstance(data, list) else [data]) for di in data: if u"cnum" in di: conf_key = di[u"cnum"] host = "inspirehep.net" break if conf_id is None and conf_key is None: return # ........................................................................ # # Get conference data # # get the data if conf_id is not None: conf_id = (conf_id if isinstance(conf_id, int) else int(conf_id)) confjson = get_conference_data(host, conf_id=conf_id) else: confjson = get_conference_data(host, key=conf_key) # # extract the conference url # * information is in confjson[url] # * in most of the case it is a dictionary # * it happen that it is a list. The first entry is for the conference # home page while the second one is for the proceeding (cds 2270940) # - in other case the url is not defined (cds 2258914) confurl = u"" if u"url" in confjson: obj = confjson[u"url"] confurl = (obj[u"url"] if isinstance(obj, dict) else obj[0][u"url"]) # ........................................................................ # # Add conference data to the recjson # recjson[u"meeting_name"] = confjson[u"meeting_name"] recjson[u"meeting_note"] = {u"recid": confjson[u"recid"], u"url": confurl} def build_record(recjson): """Transform a JSON object into a record Args: recjson (dict): record data in a JSON format. Return Record: either RecordConf, RecordInst, RecodPubli or RecordThesis Raises: """ if is_conference(recjson): add_conference_data(recjson) upcast_record = RecordConf(recjson) elif is_institute(recjson): add_affiliation_keys(recjson) upcast_record = RecordInst(recjson) elif is_thesis(recjson): upcast_record = RecordThesis(recjson) else: upcast_record = RecordPubli(recjson) return upcast_record def get_conference_data(host, conf_id=None, key=None): """Get the conference data identified by its id or key. Args: host (unicode): possible values are ``cds.cern.ch`` or ``inspirehep.net``. conf_id (int): the conference identifier in the store. This is the preferred way. key (unicode): the conference key in the store. Returns: dict: The conference data (MarcJSON). Raises: CdsException: - conference record with a wrong identifier - conference not found """ cds = InvenioStore(host) # ........................................................................ # # search by id # if conf_id is not None: recjson = cds.get_record(conf_id) if recjson["recid"] != conf_id: raise CdsException(MSG_INV_CONF) return recjson # ........................................................................ # # search by key in cds.cern.ch # if key is not None and host == "cds.cern.ch": ids = cds.get_ids(p=key) for conf_id in ids: recjson = cds.get_record(conf_id) if match_conference_key(recjson, key): return recjson raise CdsException(MSG_NO_CONF) # ........................................................................ # # search by key in inspirehep.net # if key is not None and host == "inspirehep.net": key = key.replace("/", "-") if not REG_CONF.match(key): raise CdsException(MSG_INV_CONF_KEY) ids = cds.get_ids(cc="Conferences", p="111__g:%s" % key) for conf_id in ids: recjson = cds.get_record(conf_id) if match_conference_key(recjson, key): return recjson raise CdsException(MSG_NO_CONF) def match_conference_key(recjson, conf_key): """Return ``True`` when the record corresponds to a conference identified by its key. Args: recjson (dict): record formatted MarcJSON. conf_key (unicode): conference key Returns bool: """ if u"meeting_name" in recjson: for di in recjson[u"meeting_name"]: subfield = u"coference_code" if subfield in di and di[subfield] == conf_key: return True return False