# -*- coding: utf-8 -*- """ invenio_tools.base """ import re ARXIV = "arXiv" ARXIV_PDF = "http://arxiv.org/pdf/" MSG_NO_CONF = "Reject no conference information" MSG_NO_THESIS = "Reject no thesis information" OAI_URL = "http://%s/record/%s" REG_ARXIV_NUMBER = re.compile("\d+\.\d+") # name are encoded Family, L # Family, P L # Family, M -H # Family Name, J # Family-Name, J # Family, F Name # Family, First # To avoid to deal with unicode character # look for non empty string \S REG_AUTHOR = re.compile(r"(.+), (\S+)( |\-)*(\S+)*") REG_OAI = re.compile(r"oai:([a-z\.]+):([\d]+)") REG_YEAR = re.compile(r"(\d{4})") THESIS_DIR = u"dir." def is_conference(record): """True when the record describes a publication related to a conference. Args: record (Record): Return: bool: true when the MARC record describes a publication related to a conference. """ if u"111" in record: return True # try with the conference key # the location of this values depends on the store # cds.cern.ch (962, n) and inspirehep.net (773,w). if record.host().startswith("cds"): field, subfield = u"962", "n" else: field, subfield = u"773", "w" return len(record._get(field, subfield)) > 0 def is_institute(record): """True when the record describes an institute. Args: record (Record): Return: bool: true when the MARC record describes an institute """ # u'980': [ # {'b': [u'CK90', u'HEP200', u'PDGLIST', u'PPF', u'TOP500', u'WEB']}, # {'a': u'INSTITUTION'}, # {'a': u'CORE'} # ] if u"980" in record: if isinstance(record[u"980"], list): for di in record[u"980"]: for k, v in di.iteritems(): if k == "a" and v == u"INSTITUTION": return True elif isinstance(record[u"980"], dict) and "a" in record[u"980"] and \ record[u"980"]["a"] == u"INSTITUTION": return True return False def is_thesis(record): """True when the record describes a thesis. Args: record (Record): Return: bool: true when the MARC record describes a thesis """ li = record._get(u"980", "a", force_list=True) val = ", ".join(li) return 'THESIS' in val