Commit 280a0bfe authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Upgrade InvenioStore to add the concept of shelf.

parent c9b8d605
......@@ -33,7 +33,7 @@ from .recordpubli import RecordPubli
from .recordthesis import RecordThesis
def load_record(host, record_id):
def load_record(host, record_id, shelf=None):
"""Helper function to load a single record from an invenio store.
Args:
......@@ -44,6 +44,23 @@ def load_record(host, record_id):
record_id (int):
the record identifier in the store
shelf (str):
section of the store containing records. It depends on the host.
Possible values are ``None``, ``literature``, ``conferences``
and ``institutions``
+----------------+--------------+-----------------------------+
| host | shelf | base API |
+----------------+--------------+-----------------------------+
| cds.cern.ch | None | https://cds.cern.ch/ |
+----------------+--------------+-----------------------------+
| inspirehep.net | None | https://old.inspirehep.net/ |
| inspirehep.net | literature | https://old.inspirehep.net/ |
| inspirehep.net | conferences | https://inspirehep.net/ |
| inspirehep.net | institutions | https://old.inspirehep.net/ |
+----------------+--------------+-----------------------------+
Returns:
Record:
either RecordPubli, RecordInst, RecordConf of RecordThesis.
......@@ -55,6 +72,6 @@ def load_record(host, record_id):
* no JSON object could be decoded.
"""
store = InvenioStore(host)
store = InvenioStore(host, shelf=shelf)
recjson = store.get_record(record_id)
return build_record(recjson)
......@@ -13,9 +13,14 @@ CDS_SEARCH_KEYS = ("req", "cc", "c", "ec", "p", "f", "rg", "sf", "so", "sp",
"d1y", "d1m", "d1d", "d2", "d2y", "d2m", "d2d", "dt",
"verbose", "ap", "ln", "ec")
CDS = ("cds", "cds.cern.ch")
INS = ("inspirehep", "inspirehep.net")
MSG_HTTP_DECODE = "Fail to decode HTTP response"
MSG_HTTP_ERROR = "HTTP Error"
MSG_NO_IDS = "Invalid list of record identifiers"
MSG_NO_SHELF = "No shelf %s for store %s"
MSG_NOT_IMPLEMENTED = "Method '%' not implemented for store '%' and shelf '%'<"
MSG_WRONG_KEYWORD = "Invalid keyword argument"
# maximum number of identifiers to be collected at once.
......@@ -30,23 +35,61 @@ class InvenioStore(object):
* a list of identifier satisfying search criteria.
* a record identified by its id.
"""
def __init__(self, host="cds.cern.ch"):
"""
Args:
host (str):
possible values are ``cds.cern.ch`` or ``inspirehep.net``.
possible values are ``cds``, ``cds.cern.ch``,``inspirehep``
or ``inspirehep.net``
shelf (str):
section of the store. It depends on the host.
Possible values are ``None``, ``literature``, ``conferences``
and ``institutions``
+----------------+--------------+-----------------------------+
| host | shelf | base API |
+----------------+--------------+-----------------------------+
| cds.cern.ch | None | https://cds.cern.ch/ |
+----------------+--------------+-----------------------------+
| inspirehep.net | None | https://old.inspirehep.net/ |
| inspirehep.net | literature | https://old.inspirehep.net/ |
| inspirehep.net | conferences | https://inspirehep.net/ |
| inspirehep.net | institutions | https://old.inspirehep.net/ |
+----------------+--------------+-----------------------------+
"""
self._host = host
def __init__(self, host="cds", shelf=None):
self._shelf = shelf
self._url = None
# base url for the API
if host in CDS and shelf is None:
api_search = "https://cds.cern.ch/search"
api_record = "https://cds.cern.ch/record"
host = "cds.cern.ch"
elif host in INS and shelf in (None, "literature", "institutions"):
api_search = "https://old.inspirehep.net/search"
api_record = "https://old.inspirehep.net/record"
host = "old.inspirehep.net"
elif host in INS and shelf in ("conferences",):
api_search = None
api_record = "https://inspirehep.net/api/conferences"
host = "inspirehep.net"
else:
raise CdsException(MSG_NO_SHELF % (shelf, host))
# start a session, a persistent connection with the server
# let the session handle the number of retry
session = requests.Session()
session.mount(f"http://{host}", HTTPAdapter(max_retries=3))
session.mount(f"https://{host}", HTTPAdapter(max_retries=3))
self._api_search = api_search
self._api_record = api_record
self._host = host
self._session = session
def __del__(self):
......@@ -59,7 +102,8 @@ class InvenioStore(object):
Args:
url (str):
URL string, *e.g.*::
the URL string depends on the store and on the invenio
version which is ruuning, *e.g.*::
* ``https://cds.cern.ch/record/123456/of=recjson``
* ``https://cds.cern.ch/search?of=id&....
......@@ -72,7 +116,7 @@ class InvenioStore(object):
The keyword arguments are those of the invenio web interface.
Details are in https://inspirehep.net/help/hacking/search-engine-api
Examples how to use the invenio API:
Examples how to use the old invenio API:
https://inspirehep.net/info/hep/api?ln=fr#json_fnames
List of keyword in the JSON record:
......@@ -300,12 +344,6 @@ class InvenioStore(object):
"""
self._url = url
# FIXME March 30, 2020:
# * new version of inspirehep.net
# * API not yet ready
# * recommend to use old.inspirehep.net
url = url.replace("//inspirehep.net", "//old.inspirehep.net")
r = self._session.get(url, timeout=timeout, params=kwargs)
r.raise_for_status()
......@@ -397,6 +435,11 @@ class InvenioStore(object):
* not well formed list of ids.
"""
host = self._host
if host != "old.inspirehep.net":
msg = MSG_NOT_IMPLEMENTED % ("get_ids", host, self._shelf)
raise CdsException(msg)
for k in kwargs:
if k not in CDS_SEARCH_KEYS:
raise CdsException(MSG_WRONG_KEYWORD, k)
......@@ -418,8 +461,7 @@ class InvenioStore(object):
while scan:
kwargs["jrec"] += N_IDS
url = "https://%s/search" % self._host
rep = self.interogate(url, timeout=30, **kwargs)
rep = self.interogate(self._api_search, timeout=30, **kwargs)
try:
li = rep.json()
......@@ -458,10 +500,13 @@ class InvenioStore(object):
* no JSON object could be decoded.
"""
self._try = 0
url = "%s/%s" % (self._api_record, rec_id)
url = "https://%s/record/%s" % (self._host, rec_id)
rep = self.interogate(url, timeout=30, of="recjson")
kwargs = {}
if self._host in ("cds.cern.ch", "old.inspirehep.net"):
kwargs = {"of": "recjson"}
rep = self.interogate(url, timeout=30, **kwargs)
try:
li = rep.json()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment