""" store_tools.base """ import re ARXIV = "arXiv" ARXIV_PDF = "http://arxiv.org/pdf/" CDS = ("cds", "cds.cern.ch") INS = ("inspirehep", "inspirehep.net") MSG_INV_CONF = "Reject invalid conference information" MSG_INV_CONF_KEY = "Reject invalid conference key" MSG_NO_CONF = "Reject no conference information" MSG_NO_CONF_ID_KEY = "Reject no conference identifier and key" MSG_NO_COUNTRY = "Reject invalid country" MSG_NO_HOST = "Reject no host information in record" MSG_NO_PUBLISHER = "Reject invalid publisher" MSG_NO_SHELF = "No shelf %s for store %s" MSG_NO_THESIS = "Reject no thesis information" MSG_WELL_FORMED_COLLABORATION = "Reject collaboration is not well formed" OAI = "oai:%s:%s" OAI_URL = "http://%s/record/%s" REG_ARXIV_NUMBER = re.compile("\d+\.\d+") # name are encoded Family, First where first can be first-Second # many variant are possible with initial, dot, ... # group(1) is the family name # group(2) is the part of the first name before the separator (" ", "-") # group(3) is the part of the first name after the separator (" ", "-") REG_AUTHOR = re.compile(r"^([\w\- ]+), (\w+)\.?[\- ]*(\w+)*\.?$", re.UNICODE) REG_DATE = re.compile(r"(\d{4}-\d{2}-\d{2})") REG_CONF = re.compile("^C\d+-\d+-\d+(?:\.\d+)?$") REG_OAI = re.compile(r"oai:([a-z\.]+):([\d]+)") REG_YEAR = re.compile(r"(\d{4})") THESIS_DIR = "dir." def is_conference(recjson): """True when the record describes a publication related to a conference. Args: recjson (dict): record associated to a publication or to and institute. Return: bool: ``True`` when the record describes a publication related to a conference. """ # ConferencePaper in collection # find proceeding in both stores if "collection" in recjson: data = recjson["collection"] data = (data if isinstance(data, list) else [data]) value = "ConferencePaper" li = [di for di in data if "primary" in di and di["primary"] == value] if len(li) > 0: return True # try to identify talk in cds # look for a conference key or for a subject equal to Talk found = \ ("aleph_linking_page" in recjson) or \ ("subject" in recjson and "term" in recjson["subject"] and recjson["subject"]["term"] == "Talk") if found: return True # try to identify talk in inspirehep # look for a conference key found = \ ("publication_info" in recjson and "cnum" in recjson["publication_info"]) if found: return True return False def is_institute(recjson): """True when the record describes an institute. Args: recjson (dict): record associated to a publication or to and institute. Return: bool: ``True`` when the record describes an institute. """ # INSTITUTION in collection if "collection" in recjson: data = recjson["collection"] data = (data if isinstance(data, list) else [data]) value = "INSTITUTION" li = [di for di in data if "primary" in di and di["primary"] == value] if len(li) > 0: return True return False def is_thesis(recjson): """True when the record describes a thesis. Args: recjson (dict): record associated to a publication or to and institute. Return: bool: ``True`` when the record describes a thesis. """ # THESIS in collection if "collection" in recjson: data = recjson["collection"] data = (data if isinstance(data, list) else [data]) value = "THESIS" li = [di for di in data if "primary" in di and di["primary"] == value] if len(li) > 0: return True