Commit 280a0bfe authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Upgrade InvenioStore to add the concept of shelf.

parent c9b8d605
...@@ -33,7 +33,7 @@ from .recordpubli import RecordPubli ...@@ -33,7 +33,7 @@ from .recordpubli import RecordPubli
from .recordthesis import RecordThesis from .recordthesis import RecordThesis
def load_record(host, record_id): def load_record(host, record_id, shelf=None):
"""Helper function to load a single record from an invenio store. """Helper function to load a single record from an invenio store.
Args: Args:
...@@ -44,6 +44,23 @@ def load_record(host, record_id): ...@@ -44,6 +44,23 @@ def load_record(host, record_id):
record_id (int): record_id (int):
the record identifier in the store the record identifier in the store
shelf (str):
section of the store containing records. It depends on the host.
Possible values are ``None``, ``literature``, ``conferences``
and ``institutions``
+----------------+--------------+-----------------------------+
| host | shelf | base API |
+----------------+--------------+-----------------------------+
| cds.cern.ch | None | https://cds.cern.ch/ |
+----------------+--------------+-----------------------------+
| inspirehep.net | None | https://old.inspirehep.net/ |
| inspirehep.net | literature | https://old.inspirehep.net/ |
| inspirehep.net | conferences | https://inspirehep.net/ |
| inspirehep.net | institutions | https://old.inspirehep.net/ |
+----------------+--------------+-----------------------------+
Returns: Returns:
Record: Record:
either RecordPubli, RecordInst, RecordConf of RecordThesis. either RecordPubli, RecordInst, RecordConf of RecordThesis.
...@@ -55,6 +72,6 @@ def load_record(host, record_id): ...@@ -55,6 +72,6 @@ def load_record(host, record_id):
* no JSON object could be decoded. * no JSON object could be decoded.
""" """
store = InvenioStore(host) store = InvenioStore(host, shelf=shelf)
recjson = store.get_record(record_id) recjson = store.get_record(record_id)
return build_record(recjson) return build_record(recjson)
...@@ -13,9 +13,14 @@ CDS_SEARCH_KEYS = ("req", "cc", "c", "ec", "p", "f", "rg", "sf", "so", "sp", ...@@ -13,9 +13,14 @@ CDS_SEARCH_KEYS = ("req", "cc", "c", "ec", "p", "f", "rg", "sf", "so", "sp",
"d1y", "d1m", "d1d", "d2", "d2y", "d2m", "d2d", "dt", "d1y", "d1m", "d1d", "d2", "d2y", "d2m", "d2d", "dt",
"verbose", "ap", "ln", "ec") "verbose", "ap", "ln", "ec")
CDS = ("cds", "cds.cern.ch")
INS = ("inspirehep", "inspirehep.net")
MSG_HTTP_DECODE = "Fail to decode HTTP response" MSG_HTTP_DECODE = "Fail to decode HTTP response"
MSG_HTTP_ERROR = "HTTP Error" MSG_HTTP_ERROR = "HTTP Error"
MSG_NO_IDS = "Invalid list of record identifiers" MSG_NO_IDS = "Invalid list of record identifiers"
MSG_NO_SHELF = "No shelf %s for store %s"
MSG_NOT_IMPLEMENTED = "Method '%' not implemented for store '%' and shelf '%'<"
MSG_WRONG_KEYWORD = "Invalid keyword argument" MSG_WRONG_KEYWORD = "Invalid keyword argument"
# maximum number of identifiers to be collected at once. # maximum number of identifiers to be collected at once.
...@@ -30,23 +35,61 @@ class InvenioStore(object): ...@@ -30,23 +35,61 @@ class InvenioStore(object):
* a list of identifier satisfying search criteria. * a list of identifier satisfying search criteria.
* a record identified by its id. * a record identified by its id.
Args:
host (str):
possible values are ``cds``, ``cds.cern.ch``,``inspirehep``
or ``inspirehep.net``
shelf (str):
section of the store. It depends on the host.
Possible values are ``None``, ``literature``, ``conferences``
and ``institutions``
+----------------+--------------+-----------------------------+
| host | shelf | base API |
+----------------+--------------+-----------------------------+
| cds.cern.ch | None | https://cds.cern.ch/ |
+----------------+--------------+-----------------------------+
| inspirehep.net | None | https://old.inspirehep.net/ |
| inspirehep.net | literature | https://old.inspirehep.net/ |
| inspirehep.net | conferences | https://inspirehep.net/ |
| inspirehep.net | institutions | https://old.inspirehep.net/ |
+----------------+--------------+-----------------------------+
""" """
def __init__(self, host="cds.cern.ch"): def __init__(self, host="cds", shelf=None):
"""
Args:
host (str):
possible values are ``cds.cern.ch`` or ``inspirehep.net``.
""" self._shelf = shelf
self._host = host
self._url = None self._url = None
# base url for the API
if host in CDS and shelf is None:
api_search = "https://cds.cern.ch/search"
api_record = "https://cds.cern.ch/record"
host = "cds.cern.ch"
elif host in INS and shelf in (None, "literature", "institutions"):
api_search = "https://old.inspirehep.net/search"
api_record = "https://old.inspirehep.net/record"
host = "old.inspirehep.net"
elif host in INS and shelf in ("conferences",):
api_search = None
api_record = "https://inspirehep.net/api/conferences"
host = "inspirehep.net"
else:
raise CdsException(MSG_NO_SHELF % (shelf, host))
# start a session, a persistent connection with the server # start a session, a persistent connection with the server
# let the session handle the number of retry # let the session handle the number of retry
session = requests.Session() session = requests.Session()
session.mount(f"http://{host}", HTTPAdapter(max_retries=3)) session.mount(f"https://{host}", HTTPAdapter(max_retries=3))
self._api_search = api_search
self._api_record = api_record
self._host = host
self._session = session self._session = session
def __del__(self): def __del__(self):
...@@ -59,7 +102,8 @@ class InvenioStore(object): ...@@ -59,7 +102,8 @@ class InvenioStore(object):
Args: Args:
url (str): url (str):
URL string, *e.g.*:: the URL string depends on the store and on the invenio
version which is ruuning, *e.g.*::
* ``https://cds.cern.ch/record/123456/of=recjson`` * ``https://cds.cern.ch/record/123456/of=recjson``
* ``https://cds.cern.ch/search?of=id&.... * ``https://cds.cern.ch/search?of=id&....
...@@ -72,7 +116,7 @@ class InvenioStore(object): ...@@ -72,7 +116,7 @@ class InvenioStore(object):
The keyword arguments are those of the invenio web interface. The keyword arguments are those of the invenio web interface.
Details are in https://inspirehep.net/help/hacking/search-engine-api Details are in https://inspirehep.net/help/hacking/search-engine-api
Examples how to use the invenio API: Examples how to use the old invenio API:
https://inspirehep.net/info/hep/api?ln=fr#json_fnames https://inspirehep.net/info/hep/api?ln=fr#json_fnames
List of keyword in the JSON record: List of keyword in the JSON record:
...@@ -300,12 +344,6 @@ class InvenioStore(object): ...@@ -300,12 +344,6 @@ class InvenioStore(object):
""" """
self._url = url self._url = url
# FIXME March 30, 2020:
# * new version of inspirehep.net
# * API not yet ready
# * recommend to use old.inspirehep.net
url = url.replace("//inspirehep.net", "//old.inspirehep.net")
r = self._session.get(url, timeout=timeout, params=kwargs) r = self._session.get(url, timeout=timeout, params=kwargs)
r.raise_for_status() r.raise_for_status()
...@@ -397,6 +435,11 @@ class InvenioStore(object): ...@@ -397,6 +435,11 @@ class InvenioStore(object):
* not well formed list of ids. * not well formed list of ids.
""" """
host = self._host
if host != "old.inspirehep.net":
msg = MSG_NOT_IMPLEMENTED % ("get_ids", host, self._shelf)
raise CdsException(msg)
for k in kwargs: for k in kwargs:
if k not in CDS_SEARCH_KEYS: if k not in CDS_SEARCH_KEYS:
raise CdsException(MSG_WRONG_KEYWORD, k) raise CdsException(MSG_WRONG_KEYWORD, k)
...@@ -418,8 +461,7 @@ class InvenioStore(object): ...@@ -418,8 +461,7 @@ class InvenioStore(object):
while scan: while scan:
kwargs["jrec"] += N_IDS kwargs["jrec"] += N_IDS
url = "https://%s/search" % self._host rep = self.interogate(self._api_search, timeout=30, **kwargs)
rep = self.interogate(url, timeout=30, **kwargs)
try: try:
li = rep.json() li = rep.json()
...@@ -458,10 +500,13 @@ class InvenioStore(object): ...@@ -458,10 +500,13 @@ class InvenioStore(object):
* no JSON object could be decoded. * no JSON object could be decoded.
""" """
self._try = 0 url = "%s/%s" % (self._api_record, rec_id)
kwargs = {}
if self._host in ("cds.cern.ch", "old.inspirehep.net"):
kwargs = {"of": "recjson"}
url = "https://%s/record/%s" % (self._host, rec_id) rep = self.interogate(url, timeout=30, **kwargs)
rep = self.interogate(url, timeout=30, of="recjson")
try: try:
li = rep.json() li = rep.json()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment