Commit f871c52c authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Update InvenioStore to add the method search.

parent 0d4a55c1
...@@ -19,6 +19,7 @@ INS = ("inspirehep", "inspirehep.net") ...@@ -19,6 +19,7 @@ INS = ("inspirehep", "inspirehep.net")
MSG_HTTP_DECODE = "Fail to decode HTTP response" MSG_HTTP_DECODE = "Fail to decode HTTP response"
MSG_HTTP_ERROR = "HTTP Error" MSG_HTTP_ERROR = "HTTP Error"
MSG_INVALID_RESPONSE = "Invalid response"
MSG_NO_IDS = "Invalid list of record identifiers" MSG_NO_IDS = "Invalid list of record identifiers"
MSG_NO_SHELF = "No shelf %s for store %s" MSG_NO_SHELF = "No shelf %s for store %s"
MSG_NOT_IMPLEMENTED = "Method '%s' not implemented for store %s and shelf %s" MSG_NOT_IMPLEMENTED = "Method '%s' not implemented for store %s and shelf %s"
...@@ -76,7 +77,7 @@ class InvenioStore(object): ...@@ -76,7 +77,7 @@ class InvenioStore(object):
host = "old.inspirehep.net" host = "old.inspirehep.net"
elif host in INS and shelf in ("conferences",): elif host in INS and shelf in ("conferences",):
api_search = None api_search = "https://inspirehep.net/api/conferences/?q="
api_record = "https://inspirehep.net/api/conferences" api_record = "https://inspirehep.net/api/conferences"
host = "inspirehep.net" host = "inspirehep.net"
...@@ -102,6 +103,10 @@ class InvenioStore(object): ...@@ -102,6 +103,10 @@ class InvenioStore(object):
"""Retrieve the field value for the record identified by """Retrieve the field value for the record identified by
its *record id*. its *record id*.
Note:
The method is implemented for store, shelf pairs
relying on ``cds.cern.ch`` and ``old.inspirehep.net``.
Args: Args:
rec_id (int): rec_id (int):
record identifier in the store. record identifier in the store.
...@@ -115,7 +120,7 @@ class InvenioStore(object): ...@@ -115,7 +120,7 @@ class InvenioStore(object):
* None when the field is not found * None when the field is not found
Raises: Raises:
CdsException:: CdsException:
* method is not implemented for all store, shelf pairs. * method is not implemented for all store, shelf pairs.
It works for those relying on cds.cern.ch and It works for those relying on cds.cern.ch and
...@@ -129,7 +134,7 @@ class InvenioStore(object): ...@@ -129,7 +134,7 @@ class InvenioStore(object):
msg = MSG_NOT_IMPLEMENTED % ("get_field", host, self._shelf) msg = MSG_NOT_IMPLEMENTED % ("get_field", host, self._shelf)
raise CdsException(msg) raise CdsException(msg)
url = "%s/%s" % (self._api_record, rec_id) url = f"{self._api_record}/{rec_id}"
rep = self.interrogate(url, timeout=60, of="recjson", ot=fieldname) rep = self.interrogate(url, timeout=60, of="recjson", ot=fieldname)
...@@ -147,6 +152,10 @@ class InvenioStore(object): ...@@ -147,6 +152,10 @@ class InvenioStore(object):
def get_ids(self, **kwargs): def get_ids(self, **kwargs):
"""Return a list of *record id* matching search criteria. """Return a list of *record id* matching search criteria.
Note:
The method is implemented for store, shelf pairs
relying on ``cds.cern.ch`` and ``old.inspirehep.net``.
Keyword Args: Keyword Args:
The keyword arguments are those of the invenio search engine and The keyword arguments are those of the invenio search engine and
...@@ -231,14 +240,14 @@ class InvenioStore(object): ...@@ -231,14 +240,14 @@ class InvenioStore(object):
the record data (recjson). the record data (recjson).
Raises: Raises:
CdsException:: CdsException:
* the server return an HTTP error. * the server return an HTTP error.
* JSON object could not be decoded. * JSON object could not be decoded.
* more than one record * more than one record
""" """
url = "%s/%s" % (self._api_record, rec_id) url = f"{self._api_record}/{rec_id}"
kwargs = {} kwargs = {}
if self._host in ("cds.cern.ch", "old.inspirehep.net"): if self._host in ("cds.cern.ch", "old.inspirehep.net"):
...@@ -258,7 +267,7 @@ class InvenioStore(object): ...@@ -258,7 +267,7 @@ class InvenioStore(object):
if isinstance(obj, list) and len(obj) == 1: if isinstance(obj, list) and len(obj) == 1:
return obj[0] return obj[0]
raise CdsException(MSG_HTTP_DECODE) raise CdsException(MSG_INVALID_RESPONSE)
def interrogate(self, url, timeout=10, **kwargs): def interrogate(self, url, timeout=10, **kwargs):
"""Interrogate the store using the *URL*. """Interrogate the store using the *URL*.
...@@ -306,3 +315,47 @@ class InvenioStore(object): ...@@ -306,3 +315,47 @@ class InvenioStore(object):
""" """
return self._url return self._url
def search(self, query, **kwargs):
"""Return a list of *JSON record* matching search criteria.
Note:
The method is implemented for store, shelf pairs
relying on ``inspirehep.net``.
Args:
query (str):
query for the inspirehep store.
Use the syntax of the web interface or elasticsearch one.
Keyword Args:
elasticsearch keywords
Returns:
* list of JSON records
Raises:
CdsException:
* the server return an HTTP error.
* JSON object could not be decoded.
"""
url = f"{self._api_search}{query}"
rep = self.interrogate(url, timeout=30, **kwargs)
try:
obj = rep.json()
except ValueError:
raise CdsException(MSG_HTTP_DECODE)
# the response is a dict with 3 keys: 'hits', 'links', 'sort_options'
# the hits section is a dict with 2 keys: hits (list), total (int)
try:
return obj["hits"]["hits"]
except (KeyError, TypeError):
raise CdsException(MSG_INVALID_RESPONSE)
...@@ -104,9 +104,16 @@ def test_get_record_ins_institutions_01009(): ...@@ -104,9 +104,16 @@ def test_get_record_ins_institutions_01009():
assert isinstance(recjson, dict) assert isinstance(recjson, dict)
def test_interrogate_citations_01010(): def test_get_field_ins_01010():
# old inspirehep interface (new one not yet available) # old inspirehep interface (new one not yet available)
store = InvenioStore("inspirehep.net", shelf="literature") store = InvenioStore("inspirehep.net", shelf="literature")
value = store.get_field(1319638, "number_of_citations") value = store.get_field(1319638, "number_of_citations")
assert str(value).isdigit() assert str(value).isdigit()
def test_search_ins_01011():
# new inspirehep interface for conferences
store = InvenioStore("inspirehep.net", shelf="conferences")
obj = store.search("cnum:C10-12-06")
assert len(obj) == 1 and obj[0]["metadata"]["cnum"] == "C10-12-06"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment