Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit 280a0bfe authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Upgrade InvenioStore to add the concept of shelf.

parent c9b8d605
......@@ -33,7 +33,7 @@ from .recordpubli import RecordPubli
from .recordthesis import RecordThesis
def load_record(host, record_id):
def load_record(host, record_id, shelf=None):
"""Helper function to load a single record from an invenio store.
Args:
......@@ -44,6 +44,23 @@ def load_record(host, record_id):
record_id (int):
the record identifier in the store
shelf (str):
section of the store containing records. It depends on the host.
Possible values are ``None``, ``literature``, ``conferences``
and ``institutions``
+----------------+--------------+-----------------------------+
| host | shelf | base API |
+----------------+--------------+-----------------------------+
| cds.cern.ch | None | https://cds.cern.ch/ |
+----------------+--------------+-----------------------------+
| inspirehep.net | None | https://old.inspirehep.net/ |
| inspirehep.net | literature | https://old.inspirehep.net/ |
| inspirehep.net | conferences | https://inspirehep.net/ |
| inspirehep.net | institutions | https://old.inspirehep.net/ |
+----------------+--------------+-----------------------------+
Returns:
Record:
either RecordPubli, RecordInst, RecordConf of RecordThesis.
......@@ -55,6 +72,6 @@ def load_record(host, record_id):
* no JSON object could be decoded.
"""
store = InvenioStore(host)
store = InvenioStore(host, shelf=shelf)
recjson = store.get_record(record_id)
return build_record(recjson)
......@@ -13,9 +13,14 @@ CDS_SEARCH_KEYS = ("req", "cc", "c", "ec", "p", "f", "rg", "sf", "so", "sp",
"d1y", "d1m", "d1d", "d2", "d2y", "d2m", "d2d", "dt",
"verbose", "ap", "ln", "ec")
CDS = ("cds", "cds.cern.ch")
INS = ("inspirehep", "inspirehep.net")
MSG_HTTP_DECODE = "Fail to decode HTTP response"
MSG_HTTP_ERROR = "HTTP Error"
MSG_NO_IDS = "Invalid list of record identifiers"
MSG_NO_SHELF = "No shelf %s for store %s"
MSG_NOT_IMPLEMENTED = "Method '%' not implemented for store '%' and shelf '%'<"
MSG_WRONG_KEYWORD = "Invalid keyword argument"
# maximum number of identifiers to be collected at once.
......@@ -30,23 +35,61 @@ class InvenioStore(object):
* a list of identifier satisfying search criteria.
* a record identified by its id.
"""
def __init__(self, host="cds.cern.ch"):
"""
Args:
host (str):
possible values are ``cds.cern.ch`` or ``inspirehep.net``.
possible values are ``cds``, ``cds.cern.ch``,``inspirehep``
or ``inspirehep.net``
shelf (str):
section of the store. It depends on the host.
Possible values are ``None``, ``literature``, ``conferences``
and ``institutions``
+----------------+--------------+-----------------------------+
| host | shelf | base API |
+----------------+--------------+-----------------------------+
| cds.cern.ch | None | https://cds.cern.ch/ |
+----------------+--------------+-----------------------------+
| inspirehep.net | None | https://old.inspirehep.net/ |
| inspirehep.net | literature | https://old.inspirehep.net/ |
| inspirehep.net | conferences | https://inspirehep.net/ |
| inspirehep.net | institutions | https://old.inspirehep.net/ |
+----------------+--------------+-----------------------------+
"""
self._host = host
def __init__(self, host="cds", shelf=None):
self._shelf = shelf
self._url = None
# base url for the API
if host in CDS and shelf is None:
api_search = "https://cds.cern.ch/search"
api_record = "https://cds.cern.ch/record"
host = "cds.cern.ch"
elif host in INS and shelf in (None, "literature", "institutions"):
api_search = "https://old.inspirehep.net/search"
api_record = "https://old.inspirehep.net/record"
host = "old.inspirehep.net"
elif host in INS and shelf in ("conferences",):
api_search = None
api_record = "https://inspirehep.net/api/conferences"
host = "inspirehep.net"
else:
raise CdsException(MSG_NO_SHELF % (shelf, host))
# start a session, a persistent connection with the server
# let the session handle the number of retry
session = requests.Session()
session.mount(f"http://{host}", HTTPAdapter(max_retries=3))
session.mount(f"https://{host}", HTTPAdapter(max_retries=3))
self._api_search = api_search
self._api_record = api_record
self._host = host
self._session = session
def __del__(self):
......@@ -59,7 +102,8 @@ class InvenioStore(object):
Args:
url (str):
URL string, *e.g.*::
the URL string depends on the store and on the invenio
version which is ruuning, *e.g.*::
* ``https://cds.cern.ch/record/123456/of=recjson``
* ``https://cds.cern.ch/search?of=id&....
......@@ -72,7 +116,7 @@ class InvenioStore(object):
The keyword arguments are those of the invenio web interface.
Details are in https://inspirehep.net/help/hacking/search-engine-api
Examples how to use the invenio API:
Examples how to use the old invenio API:
https://inspirehep.net/info/hep/api?ln=fr#json_fnames
List of keyword in the JSON record:
......@@ -300,12 +344,6 @@ class InvenioStore(object):
"""
self._url = url
# FIXME March 30, 2020:
# * new version of inspirehep.net
# * API not yet ready
# * recommend to use old.inspirehep.net
url = url.replace("//inspirehep.net", "//old.inspirehep.net")
r = self._session.get(url, timeout=timeout, params=kwargs)
r.raise_for_status()
......@@ -397,6 +435,11 @@ class InvenioStore(object):
* not well formed list of ids.
"""
host = self._host
if host != "old.inspirehep.net":
msg = MSG_NOT_IMPLEMENTED % ("get_ids", host, self._shelf)
raise CdsException(msg)
for k in kwargs:
if k not in CDS_SEARCH_KEYS:
raise CdsException(MSG_WRONG_KEYWORD, k)
......@@ -418,8 +461,7 @@ class InvenioStore(object):
while scan:
kwargs["jrec"] += N_IDS
url = "https://%s/search" % self._host
rep = self.interogate(url, timeout=30, **kwargs)
rep = self.interogate(self._api_search, timeout=30, **kwargs)
try:
li = rep.json()
......@@ -458,10 +500,13 @@ class InvenioStore(object):
* no JSON object could be decoded.
"""
self._try = 0
url = "%s/%s" % (self._api_record, rec_id)
url = "https://%s/record/%s" % (self._host, rec_id)
rep = self.interogate(url, timeout=30, of="recjson")
kwargs = {}
if self._host in ("cds.cern.ch", "old.inspirehep.net"):
kwargs = {"of": "recjson"}
rep = self.interogate(url, timeout=30, **kwargs)
try:
li = rep.json()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment