# -*- coding: utf-8 -*- """ invenio_tools.record """ import pprint from base import OAI, OAI_URL, REG_OAI from __builtin__ import isinstance class Record(dict): """The base class for MARC record. A dictionary with the following structure:: record[field][subfield] = value record[field][subfield] = [val1, val2, ....] record[field] = [dict1(subfield1=..., subfield2=...), dict2(subfield1=..., subfield2=...), ...] In the `MARC `_ standard, the ``field`` is a string containing at least three digit while the ``subfield`` is a letter. The type of the ``field`` is unicode and ``subfield`` is string. The class comes with a collection of methods to extract the record information masking the ``field`` and the ``subfield`` codification. The relation between methods and MARC fields are the following:: +---------------+--------+----------+ | | CDS | INSPIREP | +---------------+--------+----------+ | id | 001 | | | primary oai | 0248 a | 909CO o | | secondary oai | 035 a | 035 a | +---------------+--------+----------+ """ def __init__(self, *args): dict.__init__(self, *args) # private cache self.__host = None # meta data # the authors of my institutes signing the record # string containing a list of name separated by a comma self.my_authors = u"" def _get(self, field, subfield, force_list=False): """Get the value associated to the key C{field} and C{subfield}. Args: field (unicode): typical values are ``u"001"``, ``u"700"``, ``u"909CO"``, .... subfield (str): typical values are ``"a"``, ``"b``", .... force_list (bool): always return a *list* when ``True``. Returns: unicode or list: empty string / list when not defined. """ val = u'' if field in self and subfield in self[field]: val = self[field][subfield] elif field in self and isinstance(self[field], list): val = [] for el in self[field]: if subfield in el: val.append(el[subfield]) if force_list and not isinstance(val, list): if val: val = [val] else: val = [] return val def _oai_url(self, value): """Build the Open Archive Initiative URL. Args: value (unicode): OAI identifier, *e*g* ``oai:host:id`` Returns: unicode: the pattern of the string is ``http://host/record/id``. The string is empty when it is not defined or when the value is not well formed. """ match = REG_OAI.match(value) if match: return OAI_URL % (match.group(1), match.group(2)) return u"" def debug(self): """Print the record structure on the standard output. """ pprint.pprint(self) def host(self): """The store housing the record. Returns: unicode: ``inspirehep.net`` or ``cds.cern.ch`` or an empty string when not defined. """ # The value is compute once and cache in self.__host if self.__host is not None: return self.__host val = self.primary_oai() if not val: self.__host = None return u'' match = REG_OAI.match(val) if match: self.__host = match.group(1) return self.__host return u'' def id(self): """The id of the record in the store. Returns: unicode: the unique id of the record in the store """ return self[u"001"] def oai(self): """The Open Archive Initiative identifier(s). Returns: unicode: the primary and secondary OAI identifier are separated by a comma. The pattern of the identifier is ``oai:host:id`` or an empty string when it is not defined. """ oai_1 = self.primary_oai() if not oai_1: return u"" oai_2 = self.secondary_oai() if not oai_2: return oai_1 return u"%s, %s" % (oai_1, oai_2) def oai_url(self): """The Open Archive Initiative identifier URL(s). Returns: unicode: the primary and secondary URLs are separated by a comma. The pattern of the URL is ``http://host/record/id`` or an empty string when it is not defined or when the OAI is not well formed. """ oai_url_1 = self.primary_oai_url() if not oai_url_1: return u"" oai_url_2 = self.secondary_oai_url() if not oai_url_2: return oai_url_1 return u"%s, %s" % (oai_url_1, oai_url_2) def primary_oai(self): """The primary Open Archive Initiative identifier. The primary OAI identifier corresponds to the record of the store. It contains the value return by the method :meth:`.id`. Returns: unicode: the pattern of the string is ``oai:host:id``. It is an empty string when not defined """ # the location of the OAI information depends on the store # cds: (248, a), inspirehep: (909C0, o) if u"0248" in self: field, subfield = u"0248", "a" elif u"909CO" in self: field, subfield = u"909CO", "o" else: return u"" # standard case value = self._get(field, subfield) # in some case OAI is a list # select the OAI corresponding to the current ID. if isinstance(value, list): myid = self.id() for el in value: if el.endswith(myid): return el return u"" return value def primary_oai_url(self): """The Open Archive Initiative URL for the primary OAI. Note: A record can be deleted and replaced by a new one. In that case the OAI is not changed but the record has a new *id* and new *URL* which is return by this method. Returns: unicode: the pattern of the string is ``http://host/record/id``. The string is empty when it is not defined or when the OAI is not well formed. """ oai = self.primary_oai() rec_id = self.id() if oai.endswith(rec_id): return self._oai_url(self.primary_oai()) else: return OAI_URL % (self.host(), rec_id) def secondary_oai(self): """The secondary OAI identifier. If the current store is *cds.cern.ch*, the secondary OAI identifier corresponds to the record in the other store, *inspirehep.net*. Returns: unicode: the pattern of the string is ``oai:host:id``. It is an empty string when not defined """ host = self.host() if host == u"cds.cern.ch": invenio_key = u"inspire" secondary_host = u"inspirehep.net" elif host == u"inspirehep.net": invenio_key = u"cds" secondary_host = u"cds.cern.ch" else: return u"" if u"035" not in self: return u"" values = self[u"035"] if isinstance(values, list): for di in values: if "9" in di and "a" in di and di["9"].lower() == invenio_key: return OAI % (secondary_host, di["a"]) elif isinstance(values, dict) and "9" in values and "a" in values: if values["9"].lower() == invenio_key: return OAI % (secondary_host, values["a"]) return u"" def secondary_oai_url(self): """The Open Archive Initiative URL for the secondary OAI. Returns: unicode: the pattern of the string is ``http://host/record/id``. The string is empty when it is not defined or when the OAI is not well formed. """ return self._oai_url(self.secondary_oai()) def sysno(self): return self._get(u"970", "a")