Commit 0d4a55c1 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Update InvenioStore to add the method get_field.

parent 3b82d484
...@@ -98,43 +98,51 @@ class InvenioStore(object): ...@@ -98,43 +98,51 @@ class InvenioStore(object):
if getattr(self, "_session", None) is not None: if getattr(self, "_session", None) is not None:
self._session.close() self._session.close()
def interrogate(self, url, timeout=10, **kwargs): def get_field(self, rec_id, fieldname):
"""Interrogate the store using the *URL*. """Retrieve the field value for the record identified by
It is retry several time when the service is not available. its *record id*.
Args: Args:
url (str): rec_id (int):
the URL string depends on the store and on the invenio record identifier in the store.
version which is running, *e.g.*::
* ``https://cds.cern.ch/record/123456/of=recjson`` fieldname (str):
* ``https://cds.cern.ch/search?of=id&.... name of the field in the JSON record.
timeout (float): Returns:
timeout for the HTTP request * int
* str
* None when the field is not found
Keyword Args: Raises:
CdsException::
The keyword arguments are those of the invenio search engine and * method is not implemented for all store, shelf pairs.
they depend on the version of invenio. It works for those relying on cds.cern.ch and
old.inspirehep.net
* the server return an HTTP error.
* JSON object could not be decoded.
See https://gitlab.in2p3.fr/limbra/limbra/-/blob/master/modules/invenio_tools/README.md """
for more information. host = self._host
if host not in ("cds.cern.ch", "old.inspirehep.net"):
msg = MSG_NOT_IMPLEMENTED % ("get_field", host, self._shelf)
raise CdsException(msg)
Returns: url = "%s/%s" % (self._api_record, rec_id)
requests.Response:
Raises: rep = self.interrogate(url, timeout=60, of="recjson", ot=fieldname)
RequestException:
something went wrong within the HTTP dialog
""" try:
self._url = url obj = rep.json()
r = self._session.get(url, timeout=timeout, params=kwargs) except ValueError:
r.raise_for_status() raise CdsException(MSG_HTTP_DECODE)
return r if isinstance(obj, list) and len(obj) == 1:
return obj[0][fieldname]
return None
def get_ids(self, **kwargs): def get_ids(self, **kwargs):
"""Return a list of *record id* matching search criteria. """Return a list of *record id* matching search criteria.
...@@ -155,7 +163,9 @@ class InvenioStore(object): ...@@ -155,7 +163,9 @@ class InvenioStore(object):
Raises: Raises:
CdsException:: CdsException::
* Method not implemented for the selected store, shelf pair; * Method not implemented for all store, shelf pairs.
It works for those relying on cds.cern.ch and
old.inspirehep.net
* keyword argument is invalid; * keyword argument is invalid;
* the server return an HTTP error; * the server return an HTTP error;
* JSON object can't be decoded; * JSON object can't be decoded;
...@@ -224,7 +234,7 @@ class InvenioStore(object): ...@@ -224,7 +234,7 @@ class InvenioStore(object):
CdsException:: CdsException::
* the server return an HTTP error. * the server return an HTTP error.
* no JSON object could be decoded. * JSON object could not be decoded.
* more than one record * more than one record
""" """
...@@ -250,6 +260,44 @@ class InvenioStore(object): ...@@ -250,6 +260,44 @@ class InvenioStore(object):
raise CdsException(MSG_HTTP_DECODE) raise CdsException(MSG_HTTP_DECODE)
def interrogate(self, url, timeout=10, **kwargs):
"""Interrogate the store using the *URL*.
It is retry several time when the service is not available.
Args:
url (str):
the URL string depends on the store and on the invenio
version which is running, *e.g.*::
* ``https://cds.cern.ch/record/123456/of=recjson``
* ``https://cds.cern.ch/search?of=id&....
timeout (float):
timeout for the HTTP request
Keyword Args:
The keyword arguments are those of the invenio search engine and
they depend on the version of invenio.
See https://gitlab.in2p3.fr/limbra/limbra/-/blob/master/modules/invenio_tools/README.md
for more information.
Returns:
requests.Response:
Raises:
RequestException:
something went wrong within the HTTP dialog
"""
self._url = url
r = self._session.get(url, timeout=timeout, params=kwargs)
r.raise_for_status()
return r
def last_search_url(self): def last_search_url(self):
""" """
Returns: Returns:
......
...@@ -106,11 +106,7 @@ def test_get_record_ins_institutions_01009(): ...@@ -106,11 +106,7 @@ def test_get_record_ins_institutions_01009():
def test_interrogate_citations_01010(): def test_interrogate_citations_01010():
# old inspirehep interface (new one not yet available) # old inspirehep interface (new one not yet available)
url = "https://old.inspirehep.net/record/1319638" store = InvenioStore("inspirehep.net", shelf="literature")
kwargs = dict(of="recjson", ot="number_of_citations") value = store.get_field(1319638, "number_of_citations")
store = InvenioStore("inspirehep.net")
rep = store.interrogate(url, timeout=60, **kwargs)
lst = rep.json()
assert str(lst[0]["number_of_citations"]).isdigit() assert str(value).isdigit()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment