base.py 3.59 KB
Newer Older
1
""" store_tools.base
2 3 4 5

"""
import re

6 7
ARXIV = "arXiv"
ARXIV_PDF = "http://arxiv.org/pdf/"
8

9 10
MSG_INV_CONF = "Reject invalid conference information"
MSG_INV_CONF_KEY = "Reject invalid conference key"
11
MSG_NO_CONF = "Reject no conference information"
12
MSG_NO_CONF_ID_KEY = "Reject no conference identifier and key"
LE GAC Renaud's avatar
LE GAC Renaud committed
13
MSG_NO_COUNTRY = "Reject invalid country"
14
MSG_NO_HOST = "Reject no host information in record"
LE GAC Renaud's avatar
LE GAC Renaud committed
15
MSG_NO_PUBLISHER = "Reject invalid publisher"
16
MSG_NO_THESIS = "Reject no thesis information"
LE GAC Renaud's avatar
LE GAC Renaud committed
17
MSG_WELL_FORMED_COLLABORATION = "Reject collaboration is not well formed"
18

19 20
OAI = "oai:%s:%s"
OAI_URL = "http://%s/record/%s"
21

22
REG_ARXIV_NUMBER = re.compile("\d+\.\d+")
23

24 25 26 27 28 29
# name are encoded Family, First where first can be first-Second
# many variant are possible with initial, dot, ...
# group(1) is the family name
# group(2) is the part of the first name before the separator (" ", "-")
# group(3) is the part of the first name after the separator (" ", "-")
REG_AUTHOR = re.compile(r"^([\w\- ]+), (\w+)\.?[\- ]*(\w+)*\.?$", re.UNICODE)
30

LE GAC Renaud's avatar
LE GAC Renaud committed
31 32
REG_DATE = re.compile(r"(\d{4}-\d{2}-\d{2})")
REG_CONF = re.compile("^C\d+-\d+-\d+(?:\.\d+)?$")
33 34
REG_OAI = re.compile(r"oai:([a-z\.]+):([\d]+)")
REG_YEAR = re.compile(r"(\d{4})")
35

36
THESIS_DIR = "dir."
37

38

39
def is_conference(recjson):
40 41 42
    """True when the record describes a publication related to a conference.

    Args:
43 44
        recjson (dict):
            record associated to a publication or to and institute.
45 46

    Return:
47 48
        bool:
            ``True`` when the record describes a publication related
49
            to a conference.
50 51

    """
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
    # ConferencePaper in collection
    # find proceeding in both stores
    if "collection" in recjson:
        data = recjson["collection"]
        data = (data if isinstance(data, list) else [data])

        value = "ConferencePaper"
        li = [di for di in data if "primary" in di and di["primary"] == value]

        if len(li) > 0:
            return True

    # try to identify talk in cds
    # look for a conference key or for a subject equal to Talk
    found = \
        ("aleph_linking_page" in recjson) or \
        ("subject" in recjson
         and "term" in recjson["subject"]
70
         and recjson["subject"]["term"] == "Talk")
71 72

    if found:
73 74
        return True

75 76 77 78 79 80 81 82
    # try to identify talk in inspirehep
    # look for a conference key
    found = \
        ("publication_info" in recjson
         and "cnum" in recjson["publication_info"])

    if found:
        return True
83

84
    return False
85 86


87
def is_institute(recjson):
88
    """True when the record describes an institute.
89 90

    Args:
91 92
        recjson (dict):
            record associated to a publication or to and institute.
93 94

    Return:
95 96
        bool:
            ``True`` when the record describes an institute.
97 98

    """
99 100 101 102 103 104 105 106 107 108
    # INSTITUTION in collection
    if "collection" in recjson:
        data = recjson["collection"]
        data = (data if isinstance(data, list) else [data])

        value = "INSTITUTION"
        li = [di for di in data if "primary" in di and di["primary"] == value]

        if len(li) > 0:
            return True
109 110

    return False
111 112


113
def is_thesis(recjson):
114 115 116
    """True when the record describes a thesis.

    Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
117 118
        recjson (dict):
            record associated to a publication or to and institute.
119 120

    Return:
LE GAC Renaud's avatar
LE GAC Renaud committed
121
        bool: ``True`` when the record describes a thesis.
122 123

    """
124 125 126 127 128 129 130 131 132 133
    # THESIS in collection
    if "collection" in recjson:
        data = recjson["collection"]
        data = (data if isinstance(data, list) else [data])

        value = "THESIS"
        li = [di for di in data if "primary" in di and di["primary"] == value]

        if len(li) > 0:
            return True