Commit f19b63aa authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Update modules and tests to add and use store_tools.factory.build_store

parent bb229004
...@@ -24,7 +24,7 @@ from .base import (ARXIV, ...@@ -24,7 +24,7 @@ from .base import (ARXIV,
from .exception import (CdsException, from .exception import (CdsException,
RecordException) RecordException)
from .factory import build_record from .factory import build_record, build_store
from .inveniostore import InvenioStore from .inveniostore import InvenioStore
from .record import Record from .record import Record
from .recordconf import RecordConf from .recordconf import RecordConf
...@@ -72,6 +72,6 @@ def load_record(host, record_id, shelf=None): ...@@ -72,6 +72,6 @@ def load_record(host, record_id, shelf=None):
* no JSON object could be decoded. * no JSON object could be decoded.
""" """
store = InvenioStore(host, shelf=shelf) store = build_store(host, shelf=shelf)
recjson = store.get_record(record_id) recjson = store.get_record(record_id)
return build_record(recjson) return build_record(recjson)
...@@ -6,6 +6,9 @@ import re ...@@ -6,6 +6,9 @@ import re
ARXIV = "arXiv" ARXIV = "arXiv"
ARXIV_PDF = "http://arxiv.org/pdf/" ARXIV_PDF = "http://arxiv.org/pdf/"
CDS = ("cds", "cds.cern.ch")
INS = ("inspirehep", "inspirehep.net")
MSG_INV_CONF = "Reject invalid conference information" MSG_INV_CONF = "Reject invalid conference information"
MSG_INV_CONF_KEY = "Reject invalid conference key" MSG_INV_CONF_KEY = "Reject invalid conference key"
MSG_NO_CONF = "Reject no conference information" MSG_NO_CONF = "Reject no conference information"
...@@ -13,6 +16,7 @@ MSG_NO_CONF_ID_KEY = "Reject no conference identifier and key" ...@@ -13,6 +16,7 @@ MSG_NO_CONF_ID_KEY = "Reject no conference identifier and key"
MSG_NO_COUNTRY = "Reject invalid country" MSG_NO_COUNTRY = "Reject invalid country"
MSG_NO_HOST = "Reject no host information in record" MSG_NO_HOST = "Reject no host information in record"
MSG_NO_PUBLISHER = "Reject invalid publisher" MSG_NO_PUBLISHER = "Reject invalid publisher"
MSG_NO_SHELF = "No shelf %s for store %s"
MSG_NO_THESIS = "Reject no thesis information" MSG_NO_THESIS = "Reject no thesis information"
MSG_WELL_FORMED_COLLABORATION = "Reject collaboration is not well formed" MSG_WELL_FORMED_COLLABORATION = "Reject collaboration is not well formed"
......
...@@ -3,17 +3,20 @@ ...@@ -3,17 +3,20 @@
""" """
import re import re
from .base import (is_conference, from .base import (CDS,
INS,
is_conference,
is_institute, is_institute,
is_thesis, is_thesis,
MSG_INV_CONF, MSG_INV_CONF,
MSG_INV_CONF_KEY, MSG_INV_CONF_KEY,
MSG_NO_CONF, MSG_NO_CONF,
MSG_NO_SHELF,
REG_CONF) REG_CONF)
from datetime import datetime from datetime import datetime
from .exception import CdsException from .exception import CdsException
from .inveniostore import CDS, INS, InvenioStore from .inveniostore import InvenioStore
from .recordconf import RecordConf from .recordconf import RecordConf
from .recordinst import RecordInst from .recordinst import RecordInst
from .recordpubli import RecordPubli from .recordpubli import RecordPubli
...@@ -220,12 +223,67 @@ def build_record(recjson): ...@@ -220,12 +223,67 @@ def build_record(recjson):
return upcast_record return upcast_record
def build_store(host=None, shelf=None):
"""Return the interface to the publication store.
Args:
host (str):
possible values are ``cds``, ``cds.cern.ch``,``inspirehep``
or ``inspirehep.net``
shelf (str):
section of the store containing records. It depends on the host.
Possible values are ``None``, ``literature``, ``conferences``
and ``institutions``
+----------------+--------------+-----------------------------+
| host | shelf | base API |
+----------------+--------------+-----------------------------+
| cds.cern.ch | None | https://cds.cern.ch/ |
+----------------+--------------+-----------------------------+
| inspirehep.net | None | https://old.inspirehep.net/ |
| inspirehep.net | literature | https://old.inspirehep.net/ |
| inspirehep.net | conferences | https://inspirehep.net/ |
| inspirehep.net | institutions | https://old.inspirehep.net/ |
+----------------+--------------+-----------------------------+
Returns:
InvenioStore
"""
if host in CDS:
store = InvenioStore(
host="cds.cern.ch",
api_record="https://cds.cern.ch/record",
api_search="https://cds.cern.ch/search",
shelf=shelf)
elif host in INS and shelf in (None, "literature", "institutions"):
store = InvenioStore(
host="old.inspirehep.net",
api_record="https://old.inspirehep.net/record",
api_search="https://old.inspirehep.net/search",
shelf=shelf)
elif host in INS and shelf in ("conferences",):
store = InvenioStore(
host="inspirehep.net",
api_record="https://inspirehep.net/api/conferences",
api_search="https://inspirehep.net/api/conferences/?q=",
shelf=shelf)
else:
raise CdsException(MSG_NO_SHELF % (shelf, host))
return store
def get_conference_data(host, conf_id=None, key=None): def get_conference_data(host, conf_id=None, key=None):
"""Get the conference data identified by its id or key. """Get the conference data identified by its id or key.
Args: Args:
host (str): host (str):
possible values are ``cds``, ``cds.cern.ch``, ``inspirehep`` possible values are ``store``, ``store.cern.ch``, ``inspirehep``
or ``inspirehep.net``. or ``inspirehep.net``.
conf_id (int): conf_id (int):
...@@ -244,14 +302,14 @@ def get_conference_data(host, conf_id=None, key=None): ...@@ -244,14 +302,14 @@ def get_conference_data(host, conf_id=None, key=None):
- conference not found - conference not found
""" """
cds = InvenioStore(host, shelf="conferences") store = build_store(host, shelf="conferences")
# ........................................................................ # ........................................................................
# #
# search by id in cds.cern.ch # search by id in cds.cern.ch
# #
if conf_id is not None and host in CDS: if conf_id is not None and host in CDS:
recjson = cds.get_record(conf_id) recjson = store.get_record(conf_id)
if recjson["recid"] != conf_id: if recjson["recid"] != conf_id:
raise CdsException(MSG_INV_CONF) raise CdsException(MSG_INV_CONF)
return recjson return recjson
...@@ -261,10 +319,10 @@ def get_conference_data(host, conf_id=None, key=None): ...@@ -261,10 +319,10 @@ def get_conference_data(host, conf_id=None, key=None):
# search by key in cds.cern.ch # search by key in cds.cern.ch
# #
if key is not None and host in CDS: if key is not None and host in CDS:
ids = cds.get_ids(p=key) ids = store.get_ids(p=key)
for conf_id in ids: for conf_id in ids:
recjson = cds.get_record(conf_id) recjson = store.get_record(conf_id)
if match_conference_key(recjson, key): if match_conference_key(recjson, key):
return recjson return recjson
...@@ -276,7 +334,7 @@ def get_conference_data(host, conf_id=None, key=None): ...@@ -276,7 +334,7 @@ def get_conference_data(host, conf_id=None, key=None):
# search by id in inspirehep.net # search by id in inspirehep.net
# #
if conf_id is not None and host in INS: if conf_id is not None and host in INS:
obj = cds.get_record(conf_id) obj = store.get_record(conf_id)
if obj["id"] != str(conf_id): if obj["id"] != str(conf_id):
raise CdsException(MSG_INV_CONF) raise CdsException(MSG_INV_CONF)
return obj["metadata"] return obj["metadata"]
...@@ -291,7 +349,7 @@ def get_conference_data(host, conf_id=None, key=None): ...@@ -291,7 +349,7 @@ def get_conference_data(host, conf_id=None, key=None):
if not REG_CONF.match(key): if not REG_CONF.match(key):
raise CdsException(MSG_INV_CONF_KEY) raise CdsException(MSG_INV_CONF_KEY)
obj = cds.search(f"cnum:{key}") obj = store.search(f"cnum:{key}")
try: try:
recjson = obj[0]["metadata"] recjson = obj[0]["metadata"]
......
...@@ -14,14 +14,10 @@ CDS_SEARCH_KEYS = ("req", "cc", "c", "ec", "p", "f", "rg", "sf", "so", "sp", ...@@ -14,14 +14,10 @@ CDS_SEARCH_KEYS = ("req", "cc", "c", "ec", "p", "f", "rg", "sf", "so", "sp",
"d1y", "d1m", "d1d", "d2", "d2y", "d2m", "d2d", "dt", "d1y", "d1m", "d1d", "d2", "d2y", "d2m", "d2d", "dt",
"verbose", "ap", "ln", "ec") "verbose", "ap", "ln", "ec")
CDS = ("cds", "cds.cern.ch")
INS = ("inspirehep", "inspirehep.net")
MSG_HTTP_DECODE = "Fail to decode HTTP response" MSG_HTTP_DECODE = "Fail to decode HTTP response"
MSG_HTTP_ERROR = "HTTP Error" MSG_HTTP_ERROR = "HTTP Error"
MSG_INVALID_RESPONSE = "Invalid response" MSG_INVALID_RESPONSE = "Invalid response"
MSG_NO_IDS = "Invalid list of record identifiers" MSG_NO_IDS = "Invalid list of record identifiers"
MSG_NO_SHELF = "No shelf %s for store %s"
MSG_NOT_IMPLEMENTED = "Method '%s' not implemented for store %s and shelf %s" MSG_NOT_IMPLEMENTED = "Method '%s' not implemented for store %s and shelf %s"
MSG_WRONG_KEYWORD = "Invalid keyword argument" MSG_WRONG_KEYWORD = "Invalid keyword argument"
...@@ -42,6 +38,12 @@ class InvenioStore(object): ...@@ -42,6 +38,12 @@ class InvenioStore(object):
possible values are ``cds``, ``cds.cern.ch``,``inspirehep`` possible values are ``cds``, ``cds.cern.ch``,``inspirehep``
or ``inspirehep.net`` or ``inspirehep.net``
api_search (str):
api_record (str):
max_retries (int):
shelf (str): shelf (str):
section of the store containing records. It depends on the host. section of the store containing records. It depends on the host.
Possible values are ``None``, ``literature``, ``conferences`` Possible values are ``None``, ``literature``, ``conferences``
...@@ -60,38 +62,23 @@ class InvenioStore(object): ...@@ -60,38 +62,23 @@ class InvenioStore(object):
""" """
def __init__(self, host="cds", shelf=None): def __init__(self,
api_record=None,
api_search=None,
host=None,
max_retries=3,
shelf=None):
self._api_search = api_search
self._api_record = api_record
self._host = host
self._shelf = shelf self._shelf = shelf
self._url = None self._url = None
# base url for the API
if host in CDS:
api_search = "https://cds.cern.ch/search"
api_record = "https://cds.cern.ch/record"
host = "cds.cern.ch"
elif host in INS and shelf in (None, "literature", "institutions"):
api_search = "https://old.inspirehep.net/search"
api_record = "https://old.inspirehep.net/record"
host = "old.inspirehep.net"
elif host in INS and shelf in ("conferences",):
api_search = "https://inspirehep.net/api/conferences/?q="
api_record = "https://inspirehep.net/api/conferences"
host = "inspirehep.net"
else:
raise CdsException(MSG_NO_SHELF % (shelf, host))
# start a session, a persistent connection with the server # start a session, a persistent connection with the server
# let the session handle the number of retry # let the session handle the number of retry
session = requests.Session() session = requests.Session()
session.mount(f"https://{host}", HTTPAdapter(max_retries=3)) session.mount(f"https://{host}", HTTPAdapter(max_retries=max_retries))
self._api_search = api_search
self._api_record = api_record
self._host = host
self._session = session self._session = session
def __del__(self): def __del__(self):
......
...@@ -6,19 +6,19 @@ Test methods of the class InveniStore ...@@ -6,19 +6,19 @@ Test methods of the class InveniStore
import pytest import pytest
from store_tools.exception import CdsException from store_tools.exception import CdsException
from store_tools.inveniostore import InvenioStore from store_tools.factory import build_store
def test_constructor_exception_01001(): def test_constructor_exception_01001():
with pytest.raises(CdsException): with pytest.raises(CdsException):
InvenioStore("inspirehep", shelf="foo") build_store("inspirehep", shelf="foo")
def test_get_ids_cds_01002(): def test_get_ids_cds_01002():
"""Check the list of record ids for LHCb articles published in 2015. """Check the list of record ids for LHCb articles published in 2015.
""" """
store = InvenioStore("cds.cern.ch") store = build_store("cds.cern.ch")
kwargs = { kwargs = {
"f1": "year", "f1": "year",
...@@ -48,7 +48,7 @@ def test_get_ids_ins_01003(): ...@@ -48,7 +48,7 @@ def test_get_ids_ins_01003():
"""Check the list of record ids for LHCb articles published in 2010. """Check the list of record ids for LHCb articles published in 2010.
""" """
store = InvenioStore("inspirehep.net") store = build_store("inspirehep.net")
query = "find cn lhcb and tc p and not tc c and date 2010" query = "find cn lhcb and tc p and not tc c and date 2010"
...@@ -65,48 +65,48 @@ def test_get_ids_ins_01003(): ...@@ -65,48 +65,48 @@ def test_get_ids_ins_01003():
def test_get_ids_exception_01004(): def test_get_ids_exception_01004():
store = InvenioStore("inspirehep.net", shelf="conferences") store = build_store("inspirehep.net", shelf="conferences")
with pytest.raises(CdsException) as e_info: with pytest.raises(CdsException) as e_info:
store.get_ids(p="find cn lhcb") store.get_ids(p="find cn lhcb")
def test_get_record_cds_01005(): def test_get_record_cds_01005():
store = InvenioStore("cds.cern.ch") store = build_store("cds.cern.ch")
recjson = store.get_record(1951625) recjson = store.get_record(1951625)
assert isinstance(recjson, dict) assert isinstance(recjson, dict)
def test_get_record_ins_none_01006(): def test_get_record_ins_none_01006():
# old inspirehep interface # old inspirehep interface
store = InvenioStore("inspirehep.net", shelf=None) store = build_store("inspirehep.net", shelf=None)
recjson = store.get_record(1319638) recjson = store.get_record(1319638)
assert isinstance(recjson, dict) assert isinstance(recjson, dict)
def test_get_record_ins_literature_01007(): def test_get_record_ins_literature_01007():
# old inspirehep interface (new one not yet available) # old inspirehep interface (new one not yet available)
store = InvenioStore("inspirehep.net", shelf="literature") store = build_store("inspirehep.net", shelf="literature")
recjson = store.get_record(1319638) recjson = store.get_record(1319638)
assert isinstance(recjson, dict) assert isinstance(recjson, dict)
def test_get_record_ins_conferences_01008(): def test_get_record_ins_conferences_01008():
# new inspirehep interface for conferences # new inspirehep interface for conferences
store = InvenioStore("inspirehep.net", shelf="conferences") store = build_store("inspirehep.net", shelf="conferences")
recjson = store.get_record(980401) recjson = store.get_record(980401)
assert recjson["metadata"]["cnum"] == "C10-12-06" assert recjson["metadata"]["cnum"] == "C10-12-06"
def test_get_record_ins_institutions_01009(): def test_get_record_ins_institutions_01009():
# old inspirehep interface (new one not yet available) # old inspirehep interface (new one not yet available)
store = InvenioStore("inspirehep.net", shelf="institutions") store = build_store("inspirehep.net", shelf="institutions")
recjson = store.get_record(902989) recjson = store.get_record(902989)
assert isinstance(recjson, dict) assert isinstance(recjson, dict)
def test_get_field_ins_01010(): def test_get_field_ins_01010():
# old inspirehep interface (new one not yet available) # old inspirehep interface (new one not yet available)
store = InvenioStore("inspirehep.net", shelf="literature") store = build_store("inspirehep.net", shelf="literature")
value = store.get_field(1319638, "number_of_citations") value = store.get_field(1319638, "number_of_citations")
assert str(value).isdigit() assert str(value).isdigit()
...@@ -114,6 +114,6 @@ def test_get_field_ins_01010(): ...@@ -114,6 +114,6 @@ def test_get_field_ins_01010():
def test_search_ins_01011(): def test_search_ins_01011():
# new inspirehep interface for conferences # new inspirehep interface for conferences
store = InvenioStore("inspirehep.net", shelf="conferences") store = build_store("inspirehep.net", shelf="conferences")
obj = store.search("cnum:C10-12-06") obj = store.search("cnum:C10-12-06")
assert len(obj) == 1 and obj[0]["metadata"]["cnum"] == "C10-12-06" assert len(obj) == 1 and obj[0]["metadata"]["cnum"] == "C10-12-06"
...@@ -12,9 +12,9 @@ from store_tools.base import (is_conference, ...@@ -12,9 +12,9 @@ from store_tools.base import (is_conference,
from store_tools.factory import (add_affiliation_keys, from store_tools.factory import (add_affiliation_keys,
add_conference_data, add_conference_data,
build_record, build_record,
build_store,
get_conference_data) get_conference_data)
from store_tools.inveniostore import InvenioStore
from store_tools.recordconf import RecordConf from store_tools.recordconf import RecordConf
from store_tools.recordinst import RecordInst from store_tools.recordinst import RecordInst
from store_tools.recordpubli import RecordPubli from store_tools.recordpubli import RecordPubli
...@@ -79,7 +79,7 @@ def test_get_conference_data_ins_02002(): ...@@ -79,7 +79,7 @@ def test_get_conference_data_ins_02002():
def test_add_conference_data_cds_02003(): def test_add_conference_data_cds_02003():
store = InvenioStore("cds.cern.ch") store = build_store("cds.cern.ch")
recjson = store.get_record(1411352) recjson = store.get_record(1411352)
add_conference_data(recjson) add_conference_data(recjson)
...@@ -98,7 +98,7 @@ def test_add_conference_data_cds_02004(): ...@@ -98,7 +98,7 @@ def test_add_conference_data_cds_02004():
# #
# EXCEPTION # EXCEPTION
# #
store = InvenioStore("cds.cern.ch") store = build_store("cds.cern.ch")
# no conference URL # no conference URL
recjson = store.get_record(2258914) recjson = store.get_record(2258914)
...@@ -114,7 +114,7 @@ def test_add_conference_data_cds_02004(): ...@@ -114,7 +114,7 @@ def test_add_conference_data_cds_02004():
def test_add_conference_data_ins_02005(): def test_add_conference_data_ins_02005():
store = InvenioStore("inspirehep.net", shelf="literature") store = build_store("inspirehep.net", shelf="literature")
recjson = store.get_record(1089237) recjson = store.get_record(1089237)
add_conference_data(recjson) add_conference_data(recjson)
...@@ -130,7 +130,7 @@ def test_add_conference_data_ins_02005(): ...@@ -130,7 +130,7 @@ def test_add_conference_data_ins_02005():
def test_conference_cds_02006(): def test_conference_cds_02006():
store = InvenioStore("cds.cern.ch") store = build_store("cds.cern.ch")
recjson = store.get_record(1411352) recjson = store.get_record(1411352)
assert is_conference(recjson) assert is_conference(recjson)
...@@ -143,7 +143,7 @@ def test_conference_cds_02006(): ...@@ -143,7 +143,7 @@ def test_conference_cds_02006():
def test_conference_ins_02007(): def test_conference_ins_02007():
store = InvenioStore("inspirehep.net", shelf="literature") store = build_store("inspirehep.net", shelf="literature")
recjson = store.get_record(1276938) recjson = store.get_record(1276938)
assert is_conference(recjson) assert is_conference(recjson)
...@@ -156,7 +156,7 @@ def test_conference_ins_02007(): ...@@ -156,7 +156,7 @@ def test_conference_ins_02007():
def test_talk_cds_02008(): def test_talk_cds_02008():
store = InvenioStore("cds.cern.ch") store = build_store("cds.cern.ch")
recjson = store.get_record(2239092) recjson = store.get_record(2239092)
assert is_conference(recjson) assert is_conference(recjson)
...@@ -173,7 +173,7 @@ def test_talk_cds_02008(): ...@@ -173,7 +173,7 @@ def test_talk_cds_02008():
# #
def test_add_affiliation_keys_ins_02009(): def test_add_affiliation_keys_ins_02009():
"""CPPM""" """CPPM"""
store = InvenioStore("inspirehep.net", shelf="institutions") store = build_store("inspirehep.net", shelf="institutions")
recjson = store.get_record(902989) recjson = store.get_record(902989)
add_affiliation_keys(recjson) add_affiliation_keys(recjson)
...@@ -185,7 +185,7 @@ def test_add_affiliation_keys_ins_02009(): ...@@ -185,7 +185,7 @@ def test_add_affiliation_keys_ins_02009():
def test_institute_ins_02010(): def test_institute_ins_02010():
"""CPPM""" """CPPM"""
store = InvenioStore("inspirehep.net", shelf="institutions") store = build_store("inspirehep.net", shelf="institutions")
recjson = store.get_record(902989) recjson = store.get_record(902989)
assert not is_conference(recjson) assert not is_conference(recjson)
...@@ -203,7 +203,7 @@ def test_institute_ins_02010(): ...@@ -203,7 +203,7 @@ def test_institute_ins_02010():
def test_article_cds_02011(): def test_article_cds_02011():
"""Precision luminosity measurements at LHCb""" """Precision luminosity measurements at LHCb"""
store = InvenioStore("cds.cern.ch") store = build_store("cds.cern.ch")
recjson = store.get_record(1951625) recjson = store.get_record(1951625)
assert not is_conference(recjson) assert not is_conference(recjson)
...@@ -217,7 +217,7 @@ def test_article_cds_02011(): ...@@ -217,7 +217,7 @@ def test_article_cds_02011():