Commit 56f65a7e authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Update InvenioStore to use BaseStore, limited to cds.cern.ch and...

Update InvenioStore to use BaseStore, limited to cds.cern.ch and old.inspirehep.net and Add draft InspirehepStore
parent 936b5436
""" inspirehepstore.py
"""
from .basestore import BaseStore, MSG_HTTP_DECODE, MSG_INVALID_RESPONSE
from .exception import CdsException
class InspirehepStore(BaseStore):
""" Interface to the inspirehep store release v2.
Note:
more information on the API in
https://github.com/inspirehep/rest-api-doc
Args:
host (str):
possible values are ``inspirehep.net``
max_retries (int):
HTTP requests are performed several time in case of failure.
Define the maximum number of trial
shelf (str):
section of the store containing records.
Possible values are ``literature``, ``conferences``
and ``institutions``
"""
def __init__(self,
host=None,
max_retries=3,
shelf=None):
super().__init__(api_record=f"https://{host}/api/{shelf}",
api_search=f"https://{host}/api/{shelf}/?q=",
host=host,
max_retries=max_retries,
shelf=self)
def get_field(self, rec_id, fieldname):
"""Retrieve the field value for the record identified by
its *record id*.
Args:
rec_id (int):
record identifier in the store.
fieldname (str):
name of the field in the JSON record.
Returns:
* int
* str
* None when the field is not found
Raises:
CdsException:
* the server return an HTTP error.
* JSON object could not be decoded.
"""
def get_ids(self, **kwargs):
"""Return a list of *record id* matching search criteria.
Keyword Args:
The keyword arguments are those of the inspirehep search engine
Mode information in
https://inspirehep.net/help/knowledge-base/inspire-paper-search/
Returns:
list:
* A list of numbers.
* The list is empty when the request failed on the server.
Raises:
CdsException::
* keyword argument is invalid;
* the server return an HTTP error;
* JSON object can't be decoded;
* not well formed list of ids.
"""
def get_record(self, rec_id):
"""Retrieve a record defined by its *record id*.
Args:
rec_id (int):
record identifier in the store.
Returns:
dict:
the record data (recjson).
Raises:
CdsException:
* the server return an HTTP error.
* JSON object could not be decoded.
* more than one record
"""
def interrogate(self, url, timeout=10, **kwargs):
"""Interrogate the store using the *URL*.
It is retry several time when the service is not available.
Args:
url (str):
timeout (float):
timeout for the HTTP request
Keyword Args:
The keyword arguments are those of the inspirehep search engine.
Mode information in
https://inspirehep.net/help/knowledge-base/inspire-paper-search/
Returns:
requests.Response:
Raises:
RequestException:
something went wrong within the HTTP dialog
"""
def last_search_url(self):
"""
Returns:
str:
the URL used in the last search.
"""
return self._url
def search(self, query, **kwargs):
"""Return a list of *JSON record* matching search criteria.
Args:
query (str):
Use the syntax of the web interface or elasticsearch one.
Keyword Args:
elasticsearch keywords
Returns:
* list of JSON records
Raises:
CdsException:
* the server return an HTTP error.
* JSON object could not be decoded.
"""
url = f"{self._api_search}{query}"
rep = self.interrogate(url, timeout=30, **kwargs)
try:
obj = rep.json()
except ValueError:
raise CdsException(MSG_HTTP_DECODE)
# the response is a dict with 3 keys: 'hits', 'links', 'sort_options'
# the hits section is a dict with 2 keys: hits (list), total (int)
try:
return obj["hits"]["hits"]
except (KeyError, TypeError):
raise CdsException(MSG_INVALID_RESPONSE)
""" store_tools.inveniostore
"""
import requests
from .basestore import (BaseStore,
MSG_HTTP_DECODE,
MSG_NO_IDS,
MSG_INVALID_RESPONSE)
from .exception import CdsException
from requests.adapters import HTTPAdapter
from builtins import getattr
CDS_SEARCH_KEYS = ("req", "cc", "c", "ec", "p", "f", "rg", "sf", "so", "sp",
"rm", "of", "ot", "as", "p1", "f1", "m1", "op1", "p2", "f2",
......@@ -14,11 +14,7 @@ CDS_SEARCH_KEYS = ("req", "cc", "c", "ec", "p", "f", "rg", "sf", "so", "sp",
"d1y", "d1m", "d1d", "d2", "d2y", "d2m", "d2d", "dt",
"verbose", "ap", "ln", "ec")
MSG_HTTP_DECODE = "Fail to decode HTTP response"
MSG_HTTP_ERROR = "HTTP Error"
MSG_INVALID_RESPONSE = "Invalid response"
MSG_NO_IDS = "Invalid list of record identifiers"
MSG_NOT_IMPLEMENTED = "Method '%s' not implemented for store %s and shelf %s"
MSG_INVALID_HOST = "Invalid host"
MSG_WRONG_KEYWORD = "Invalid keyword argument"
# maximum number of identifiers to be collected at once.
......@@ -26,74 +22,43 @@ MSG_WRONG_KEYWORD = "Invalid keyword argument"
N_IDS = 200
class InvenioStore(object):
"""Class to dialogue with `invenio <http://invenio-software.org/>`_ store.
class InvenioStore(BaseStore):
"""Interface to invenio store cds.cern.ch and old.inspirehep.net.
The class provides methods to request:
* a list of identifier satisfying search criteria.
* a record identified by its id.
Note:
In March 2020, inspirehep migrates to new API.
It is recommended to use InspirehepStore.
Args:
host (str):
possible values are ``cds``, ``cds.cern.ch``,``inspirehep``
or ``inspirehep.net``
api_search (str):
api_record (str):
possible values are ``cds.cern.ch`` and ``old.inspirehep.net``
max_retries (int):
HTTP requests are performed several time in case of failure.
Define the maximum number of trial
shelf (str):
section of the store containing records. It depends on the host.
Possible values are ``None``, ``literature``, ``conferences``
and ``institutions``
+----------------+--------------+-----------------------------+
| host | shelf | base API |
+----------------+--------------+-----------------------------+
| cds.cern.ch | None | https://cds.cern.ch/ |
+----------------+--------------+-----------------------------+
| inspirehep.net | None | https://old.inspirehep.net/ |
| inspirehep.net | literature | https://old.inspirehep.net/ |
| inspirehep.net | conferences | https://inspirehep.net/ |
| inspirehep.net | institutions | https://old.inspirehep.net/ |
+----------------+--------------+-----------------------------+
Raises:
CdsException:
* invalid host
"""
def __init__(self,
api_record=None,
api_search=None,
host=None,
max_retries=3,
shelf=None):
self._api_search = api_search
self._api_record = api_record
self._host = host
self._shelf = shelf
self._url = None
# start a session, a persistent connection with the server
# let the session handle the number of retry
session = requests.Session()
session.mount(f"https://{host}", HTTPAdapter(max_retries=max_retries))
self._session = session
def __del__(self):
# close the session
if getattr(self, "_session", None) is not None:
self._session.close()
max_retries=3):
if host not in ("cds.cern.ch", "old.inspirehep.net"):
raise CdsException(MSG_INVALID_HOST)
super().__init__(api_record=f"https://{host}/record",
api_search=f"https://{host}/search",
host=host,
max_retries=max_retries)
def get_field(self, rec_id, fieldname):
"""Retrieve the field value for the record identified by
its *record id*.
Note:
The method is implemented for store, shelf pairs
relying on ``cds.cern.ch`` and ``old.inspirehep.net``.
Args:
rec_id (int):
record identifier in the store.
......@@ -102,24 +67,15 @@ class InvenioStore(object):
name of the field in the JSON record.
Returns:
* int
* str
* value
* None when the field is not found
Raises:
CdsException:
* method is not implemented for all store, shelf pairs.
It works for those relying on cds.cern.ch and
old.inspirehep.net
* the server return an HTTP error.
* JSON object could not be decoded.
"""
host = self._host
if host not in ("cds.cern.ch", "old.inspirehep.net"):
msg = MSG_NOT_IMPLEMENTED % ("get_field", host, self._shelf)
raise CdsException(msg)
url = f"{self._api_record}/{rec_id}"
rep = self.interrogate(url, timeout=60, of="recjson", ot=fieldname)
......@@ -138,10 +94,6 @@ class InvenioStore(object):
def get_ids(self, **kwargs):
"""Return a list of *record id* matching search criteria.
Note:
The method is implemented for store, shelf pairs
relying on ``cds.cern.ch`` and ``old.inspirehep.net``.
Keyword Args:
The keyword arguments are those of the invenio search engine and
......@@ -157,21 +109,12 @@ class InvenioStore(object):
Raises:
CdsException::
* Method not implemented for all store, shelf pairs.
It works for those relying on cds.cern.ch and
old.inspirehep.net
* keyword argument is invalid;
* the server return an HTTP error;
* JSON object can't be decoded;
* not well formed list of ids.
"""
host = self._host
if host not in ("cds.cern.ch", "old.inspirehep.net"):
msg = MSG_NOT_IMPLEMENTED % ("get_ids", host, self._shelf)
raise CdsException(msg)
for k in kwargs:
if k not in CDS_SEARCH_KEYS:
raise CdsException(MSG_WRONG_KEYWORD, k)
......@@ -227,19 +170,13 @@ class InvenioStore(object):
Raises:
CdsException:
* the server return an HTTP error.
* JSON object could not be decoded.
* more than one record
"""
url = f"{self._api_record}/{rec_id}"
kwargs = {}
if self._host in ("cds.cern.ch", "old.inspirehep.net"):
kwargs = {"of": "recjson"}
rep = self.interrogate(url, timeout=30, **kwargs)
rep = self.interrogate(url, timeout=30, of="recjson")
try:
obj = rep.json()
......@@ -255,53 +192,6 @@ class InvenioStore(object):
raise CdsException(MSG_INVALID_RESPONSE)
def interrogate(self, url, timeout=10, **kwargs):
"""Interrogate the store using the *URL*.
It is retry several time when the service is not available.
Args:
url (str):
the URL string depends on the store and on the invenio
version which is running, *e.g.*::
* ``https://cds.cern.ch/record/123456/of=recjson``
* ``https://cds.cern.ch/search?of=id&....
timeout (float):
timeout for the HTTP request
Keyword Args:
The keyword arguments are those of the invenio search engine and
they depend on the version of invenio.
See https://gitlab.in2p3.fr/limbra/limbra/-/blob/master/modules/store_tools/README.md
for more information.
Returns:
requests.Response:
Raises:
RequestException:
something went wrong within the HTTP dialog
"""
self._url = url
r = self._session.get(url, timeout=timeout, params=kwargs)
r.raise_for_status()
return r
def last_search_url(self):
"""
Returns:
str:
the URL used in the last search.
"""
return self._url
def search(self, query, **kwargs):
"""Return a list of *JSON record* matching search criteria.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment