Commit 2b1514f3 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Update inveniostore to remove httplib and use the power of module requests.

parent e4ac21dd
""" invenio_tools.inveniostore
"""
import http.client
import json
import re
import requests
import time
from .exception import CdsException
from requests.adapters import HTTPAdapter
from requests.exceptions import ConnectionError, HTTPError
CDS_SEARCH_KEYS = ("req", "cc", "c", "ec", "p", "f", "rg", "sf", "so", "sp",
......@@ -47,10 +48,13 @@ class InvenioStore(object):
"""
self._host = host
self._url = None
self._try = 0
# start a session, a persistent connection with the server
self._session = requests.Session()
# let the session handle the number of retry
session = requests.Session()
session.mount(f"http://{host}", HTTPAdapter(max_retries=3))
self._session = session
def __del__(self):
# close the session
......@@ -64,30 +68,25 @@ class InvenioStore(object):
params (dict): parameters to be send with the URL
Returns:
unicode: the HTTP response
str: the HTTP response
Raises:
CdsException: when the server return an HTTP error
five consecutive time.
CdsException:
when the server returns connection or HTTP error.
"""
self._url = url
self._try += 1
r = self._session.get(url, params=params)
code = r.status_code
data = r.content
try:
r = self._session.get(url, params=params)
# the server is busy or return error wait one minute an retry.
# the number of trial is limited to 5
if code == http.client.SERVICE_UNAVAILABLE or code != http.client.OK:
if self._try == 5:
raise CdsException("%s %s" % (MSG_HTTP_ERROR, code))
except ConnectionError as ce:
raise CdsException(str(ce))
time.sleep(60)
self.interogate(url)
except HTTPError as he:
raise CdsException(str(he))
return data
return r.text
def get_ids(self, **kwargs):
"""Return a list of *record id* matching search criteria.
......@@ -162,14 +161,13 @@ class InvenioStore(object):
# Therefore to recuperate the complete list of ids we have to get them
# by block of 200. The later is the maximum value allowed by cds.
# We use the parameter rg and jrec to steer the scan.
# Thy have no effect on inspirehep.net.
# They have no effect on inspirehep.net.
kwargs["of"] = "id"
kwargs["rg"] = N_IDS
kwargs["jrec"] = -N_IDS
while scan:
self._try = 0
kwargs["jrec"] += N_IDS
url = "http://%s/search" % self._host
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment