factory.py 7.73 KB
Newer Older
1 2 3
""" invenio_tools.factory

"""
LE GAC Renaud's avatar
LE GAC Renaud committed
4 5
import requests

6 7 8 9 10 11 12 13 14 15 16 17
from base import (is_conference,
                  is_institute,
                  is_thesis,
                  MSG_INV_CONF,
                  MSG_INV_CONF_KEY,
                  MSG_NO_CONF,
                  MSG_NO_CONF_ID_KEY,
                  REG_CONF,
                  REG_OAI)

from exception import CdsException
from inveniostore import InvenioStore
18 19 20 21 22 23
from recordconf import RecordConf
from recordinst import RecordInst
from recordpubli import RecordPubli
from recordthesis import RecordThesis


LE GAC Renaud's avatar
LE GAC Renaud committed
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
def add_affiliation_keys(recjson):
    """A the affiliation keys to the record describing an institute:

        * The XML record contains the affiliation keys used by inspirehep.net.
          They are located in the field 110__u and 110__t (future).

        * The JSON record does not contains this information.

        * This tool add the affiliation keys to the JSON record.
          They are located:

             +----------------+------------------------------------+
             | field (limbra) | subfield                           |
             +----------------+------------------------------------+
             | corporate_note | identifier, futur_identifier, name |
             +----------------+------------------------------------+

    Args
        recjson (dict): record data (MarcJSON)

    """
45
    url = "https://inspirehep.net/record/%i" % recjson[u"recid"]
LE GAC Renaud's avatar
LE GAC Renaud committed
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
    rep = requests.get(url, params={"ot": "110", "of": "txt"})

    # decode the string: '000recid 110__ $$aXXX$$bYYY$$tZZZ\n'
    txt = rep.content.replace("\n", "")
    li = txt[txt.find("$"):].split("$$")

    di = {}
    for el in li:
        if len(el) == 0:
            continue
        di[el[0:1]] = el[1:]

    recjson[u"corporate_note"] = {u"identifier": di["u"],
                                  u"future_identifier": di["t"]}


62 63 64 65 66 67 68 69 70
def add_conference_data(recjson):
    """Add the conference data to the recjson.

    It adds the following field and subfield::
         +---------------+-----------------------------------------------+
         | field         | subfield                                      |
         +---------------+-----------------------------------------------+
         | meeting_name  | closing_date, coference_code, country, date,  |
         |               | location, opening_date, year                  |
LE GAC Renaud's avatar
LE GAC Renaud committed
71
         | meeting_note  | recid, url                                    |
72 73 74 75 76
         +---------------+-----------------------------------------------+

    Args:
        recjson (dict): record data (MarcJSON)

LE GAC Renaud's avatar
LE GAC Renaud committed
77 78 79 80 81
    Note:
        * Fields are not added when there is no conference identifier and
          no conference key in the recjson.
        * The method CheckAndFix.is_conference will identify that case.

82 83 84
    """
    # ........................................................................
    #
LE GAC Renaud's avatar
LE GAC Renaud committed
85
    # Retrieve conference identifier and the host
86 87 88 89
    #     - the algorithm depend on the store
    #     - for cds use aleph_linking_page
    #     - for inspire use  publication_info.cnum
    #
LE GAC Renaud's avatar
LE GAC Renaud committed
90
    conf_id, conf_key, host = None, None, None
91 92 93 94 95

    if u"aleph_linking_page" in recjson:
        di = recjson[u"aleph_linking_page"]
        conf_id = di[u"sysno"]
        conf_key = di[u"up_link"]
LE GAC Renaud's avatar
LE GAC Renaud committed
96
        host = "cds.cern.ch"
97 98 99 100 101 102 103 104

    elif u"publication_info" in recjson:
        data = recjson[u"publication_info"]
        data = (data if isinstance(data, list) else [data])

        for di in data:
            if u"cnum" in di:
                conf_key = di[u"cnum"]
LE GAC Renaud's avatar
LE GAC Renaud committed
105
                host = "inspirehep.net"
106 107 108
                break

    if conf_id is None and conf_key is None:
LE GAC Renaud's avatar
LE GAC Renaud committed
109
        return
110 111 112 113 114 115 116 117

    # ........................................................................
    #
    # Get conference data
    #

    # get the data
    if conf_id is not None:
LE GAC Renaud's avatar
LE GAC Renaud committed
118
        conf_id = (conf_id if isinstance(conf_id, int) else int(conf_id))
119 120 121 122 123
        confjson = get_conference_data(host, conf_id=conf_id)

    else:
        confjson = get_conference_data(host, key=conf_key)

LE GAC Renaud's avatar
LE GAC Renaud committed
124 125 126 127 128 129 130 131 132 133 134 135 136
    #
    # extract the conference url
    #     * information is in confjson[url]
    #     * in most of the case it is a dictionary
    #     * it happen that it is a list. The first entry is for the conference
    #       home page while the second one is for the proceeding (cds 2270940)
    #     - in other case the url is not defined (cds 2258914)

    confurl = u""
    if u"url" in confjson:
        obj = confjson[u"url"]
        confurl = (obj[u"url"] if isinstance(obj, dict) else obj[0][u"url"])

137 138 139 140 141
    # ........................................................................
    #
    # Add conference data to the recjson
    #
    recjson[u"meeting_name"] = confjson[u"meeting_name"]
LE GAC Renaud's avatar
LE GAC Renaud committed
142
    recjson[u"meeting_note"] = {u"recid": confjson[u"recid"], u"url": confurl}
143 144 145


def build_record(recjson):
146 147 148
    """Transform a JSON object into a record

    Args:
149
        recjson (dict):
150 151 152 153 154 155 156 157 158
            record data in a JSON format.

    Return
        Record:
            either RecordConf, RecordInst, RecodPubli or RecordThesis

    Raises:

    """
159 160 161
    if is_conference(recjson):
        add_conference_data(recjson)
        upcast_record = RecordConf(recjson)
162

163
    elif is_institute(recjson):
LE GAC Renaud's avatar
LE GAC Renaud committed
164
        add_affiliation_keys(recjson)
165
        upcast_record = RecordInst(recjson)
166

167 168
    elif is_thesis(recjson):
        upcast_record = RecordThesis(recjson)
169 170

    else:
171
        upcast_record = RecordPubli(recjson)
172 173

    return upcast_record
174 175 176 177 178 179 180 181 182


def get_conference_data(host, conf_id=None, key=None):
    """Get the conference data identified by its id or key.

    Args:
        host (unicode):
            possible values are ``cds.cern.ch`` or ``inspirehep.net``.

LE GAC Renaud's avatar
LE GAC Renaud committed
183
        conf_id (int):
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
            the conference identifier in the store.
            This is the preferred way.

        key (unicode): the conference key in the store.

    Returns:
        dict:
            The conference data (MarcJSON).

    Raises:
        CdsException:
            - conference record with a wrong identifier
            - conference not found

    """
    cds = InvenioStore(host)

    # ........................................................................
    #
    # search by id
    #
    if conf_id is not None:
        recjson = cds.get_record(conf_id)
LE GAC Renaud's avatar
LE GAC Renaud committed
207
        if recjson["recid"] != conf_id:
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
            raise CdsException(MSG_INV_CONF)
        return recjson

    # ........................................................................
    #
    # search by key in cds.cern.ch
    #
    if key is not None and host == "cds.cern.ch":
        ids = cds.get_ids(p=key)

        for conf_id in ids:
            recjson = cds.get_record(conf_id)

            if match_conference_key(recjson, key):
                return recjson

        raise CdsException(MSG_NO_CONF)

    # ........................................................................
    #
    # search by key in inspirehep.net
    #
    if key is not None and host == "inspirehep.net":
        key = key.replace("/", "-")

        if not REG_CONF.match(key):
            raise CdsException(MSG_INV_CONF_KEY)

        ids = cds.get_ids(cc="Conferences", p="111__g:%s" % key)

        for conf_id in ids:
            recjson = cds.get_record(conf_id)

            if match_conference_key(recjson, key):
                return recjson

        raise CdsException(MSG_NO_CONF)


def match_conference_key(recjson, conf_key):
    """Return ``True`` when the record corresponds to a conference identified
    by its key.

    Args:
        recjson (dict):
            record formatted MarcJSON.

        conf_key (unicode):
            conference key

    Returns
        bool:

    """
    if u"meeting_name" in recjson:
        for di in recjson[u"meeting_name"]:

            subfield = u"coference_code"
            if subfield in di and di[subfield] == conf_key:
                return True

    return False