Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
limbra
limbra
Commits
280a0bfe
Commit
280a0bfe
authored
Apr 15, 2020
by
LE GAC Renaud
Browse files
Upgrade InvenioStore to add the concept of shelf.
parent
c9b8d605
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
85 additions
and
23 deletions
+85
-23
modules/invenio_tools/__init__.py
modules/invenio_tools/__init__.py
+19
-2
modules/invenio_tools/inveniostore.py
modules/invenio_tools/inveniostore.py
+66
-21
No files found.
modules/invenio_tools/__init__.py
View file @
280a0bfe
...
@@ -33,7 +33,7 @@ from .recordpubli import RecordPubli
...
@@ -33,7 +33,7 @@ from .recordpubli import RecordPubli
from
.recordthesis
import
RecordThesis
from
.recordthesis
import
RecordThesis
def
load_record
(
host
,
record_id
):
def
load_record
(
host
,
record_id
,
shelf
=
None
):
"""Helper function to load a single record from an invenio store.
"""Helper function to load a single record from an invenio store.
Args:
Args:
...
@@ -44,6 +44,23 @@ def load_record(host, record_id):
...
@@ -44,6 +44,23 @@ def load_record(host, record_id):
record_id (int):
record_id (int):
the record identifier in the store
the record identifier in the store
shelf (str):
section of the store containing records. It depends on the host.
Possible values are ``None``, ``literature``, ``conferences``
and ``institutions``
+----------------+--------------+-----------------------------+
| host | shelf | base API |
+----------------+--------------+-----------------------------+
| cds.cern.ch | None | https://cds.cern.ch/ |
+----------------+--------------+-----------------------------+
| inspirehep.net | None | https://old.inspirehep.net/ |
| inspirehep.net | literature | https://old.inspirehep.net/ |
| inspirehep.net | conferences | https://inspirehep.net/ |
| inspirehep.net | institutions | https://old.inspirehep.net/ |
+----------------+--------------+-----------------------------+
Returns:
Returns:
Record:
Record:
either RecordPubli, RecordInst, RecordConf of RecordThesis.
either RecordPubli, RecordInst, RecordConf of RecordThesis.
...
@@ -55,6 +72,6 @@ def load_record(host, record_id):
...
@@ -55,6 +72,6 @@ def load_record(host, record_id):
* no JSON object could be decoded.
* no JSON object could be decoded.
"""
"""
store
=
InvenioStore
(
host
)
store
=
InvenioStore
(
host
,
shelf
=
shelf
)
recjson
=
store
.
get_record
(
record_id
)
recjson
=
store
.
get_record
(
record_id
)
return
build_record
(
recjson
)
return
build_record
(
recjson
)
modules/invenio_tools/inveniostore.py
View file @
280a0bfe
...
@@ -13,9 +13,14 @@ CDS_SEARCH_KEYS = ("req", "cc", "c", "ec", "p", "f", "rg", "sf", "so", "sp",
...
@@ -13,9 +13,14 @@ CDS_SEARCH_KEYS = ("req", "cc", "c", "ec", "p", "f", "rg", "sf", "so", "sp",
"d1y"
,
"d1m"
,
"d1d"
,
"d2"
,
"d2y"
,
"d2m"
,
"d2d"
,
"dt"
,
"d1y"
,
"d1m"
,
"d1d"
,
"d2"
,
"d2y"
,
"d2m"
,
"d2d"
,
"dt"
,
"verbose"
,
"ap"
,
"ln"
,
"ec"
)
"verbose"
,
"ap"
,
"ln"
,
"ec"
)
CDS
=
(
"cds"
,
"cds.cern.ch"
)
INS
=
(
"inspirehep"
,
"inspirehep.net"
)
MSG_HTTP_DECODE
=
"Fail to decode HTTP response"
MSG_HTTP_DECODE
=
"Fail to decode HTTP response"
MSG_HTTP_ERROR
=
"HTTP Error"
MSG_HTTP_ERROR
=
"HTTP Error"
MSG_NO_IDS
=
"Invalid list of record identifiers"
MSG_NO_IDS
=
"Invalid list of record identifiers"
MSG_NO_SHELF
=
"No shelf %s for store %s"
MSG_NOT_IMPLEMENTED
=
"Method '%' not implemented for store '%' and shelf '%'<"
MSG_WRONG_KEYWORD
=
"Invalid keyword argument"
MSG_WRONG_KEYWORD
=
"Invalid keyword argument"
# maximum number of identifiers to be collected at once.
# maximum number of identifiers to be collected at once.
...
@@ -30,23 +35,61 @@ class InvenioStore(object):
...
@@ -30,23 +35,61 @@ class InvenioStore(object):
* a list of identifier satisfying search criteria.
* a list of identifier satisfying search criteria.
* a record identified by its id.
* a record identified by its id.
"""
def
__init__
(
self
,
host
=
"cds.cern.ch"
):
"""
Args:
Args:
host (str):
host (str):
possible values are ``cds.cern.ch`` or ``inspirehep.net``.
possible values are ``cds``, ``cds.cern.ch``,``inspirehep``
or ``inspirehep.net``
shelf (str):
section of the store. It depends on the host.
Possible values are ``None``, ``literature``, ``conferences``
and ``institutions``
+----------------+--------------+-----------------------------+
| host | shelf | base API |
+----------------+--------------+-----------------------------+
| cds.cern.ch | None | https://cds.cern.ch/ |
+----------------+--------------+-----------------------------+
| inspirehep.net | None | https://old.inspirehep.net/ |
| inspirehep.net | literature | https://old.inspirehep.net/ |
| inspirehep.net | conferences | https://inspirehep.net/ |
| inspirehep.net | institutions | https://old.inspirehep.net/ |
+----------------+--------------+-----------------------------+
"""
"""
self
.
_host
=
host
def
__init__
(
self
,
host
=
"cds"
,
shelf
=
None
):
self
.
_shelf
=
shelf
self
.
_url
=
None
self
.
_url
=
None
# base url for the API
if
host
in
CDS
and
shelf
is
None
:
api_search
=
"https://cds.cern.ch/search"
api_record
=
"https://cds.cern.ch/record"
host
=
"cds.cern.ch"
elif
host
in
INS
and
shelf
in
(
None
,
"literature"
,
"institutions"
):
api_search
=
"https://old.inspirehep.net/search"
api_record
=
"https://old.inspirehep.net/record"
host
=
"old.inspirehep.net"
elif
host
in
INS
and
shelf
in
(
"conferences"
,):
api_search
=
None
api_record
=
"https://inspirehep.net/api/conferences"
host
=
"inspirehep.net"
else
:
raise
CdsException
(
MSG_NO_SHELF
%
(
shelf
,
host
))
# start a session, a persistent connection with the server
# start a session, a persistent connection with the server
# let the session handle the number of retry
# let the session handle the number of retry
session
=
requests
.
Session
()
session
=
requests
.
Session
()
session
.
mount
(
f
"http://
{
host
}
"
,
HTTPAdapter
(
max_retries
=
3
))
session
.
mount
(
f
"http
s
://
{
host
}
"
,
HTTPAdapter
(
max_retries
=
3
))
self
.
_api_search
=
api_search
self
.
_api_record
=
api_record
self
.
_host
=
host
self
.
_session
=
session
self
.
_session
=
session
def
__del__
(
self
):
def
__del__
(
self
):
...
@@ -59,7 +102,8 @@ class InvenioStore(object):
...
@@ -59,7 +102,8 @@ class InvenioStore(object):
Args:
Args:
url (str):
url (str):
URL string, *e.g.*::
the URL string depends on the store and on the invenio
version which is ruuning, *e.g.*::
* ``https://cds.cern.ch/record/123456/of=recjson``
* ``https://cds.cern.ch/record/123456/of=recjson``
* ``https://cds.cern.ch/search?of=id&....
* ``https://cds.cern.ch/search?of=id&....
...
@@ -72,7 +116,7 @@ class InvenioStore(object):
...
@@ -72,7 +116,7 @@ class InvenioStore(object):
The keyword arguments are those of the invenio web interface.
The keyword arguments are those of the invenio web interface.
Details are in https://inspirehep.net/help/hacking/search-engine-api
Details are in https://inspirehep.net/help/hacking/search-engine-api
Examples how to use the invenio API:
Examples how to use the
old
invenio API:
https://inspirehep.net/info/hep/api?ln=fr#json_fnames
https://inspirehep.net/info/hep/api?ln=fr#json_fnames
List of keyword in the JSON record:
List of keyword in the JSON record:
...
@@ -300,12 +344,6 @@ class InvenioStore(object):
...
@@ -300,12 +344,6 @@ class InvenioStore(object):
"""
"""
self
.
_url
=
url
self
.
_url
=
url
# FIXME March 30, 2020:
# * new version of inspirehep.net
# * API not yet ready
# * recommend to use old.inspirehep.net
url
=
url
.
replace
(
"//inspirehep.net"
,
"//old.inspirehep.net"
)
r
=
self
.
_session
.
get
(
url
,
timeout
=
timeout
,
params
=
kwargs
)
r
=
self
.
_session
.
get
(
url
,
timeout
=
timeout
,
params
=
kwargs
)
r
.
raise_for_status
()
r
.
raise_for_status
()
...
@@ -397,6 +435,11 @@ class InvenioStore(object):
...
@@ -397,6 +435,11 @@ class InvenioStore(object):
* not well formed list of ids.
* not well formed list of ids.
"""
"""
host
=
self
.
_host
if
host
!=
"old.inspirehep.net"
:
msg
=
MSG_NOT_IMPLEMENTED
%
(
"get_ids"
,
host
,
self
.
_shelf
)
raise
CdsException
(
msg
)
for
k
in
kwargs
:
for
k
in
kwargs
:
if
k
not
in
CDS_SEARCH_KEYS
:
if
k
not
in
CDS_SEARCH_KEYS
:
raise
CdsException
(
MSG_WRONG_KEYWORD
,
k
)
raise
CdsException
(
MSG_WRONG_KEYWORD
,
k
)
...
@@ -418,8 +461,7 @@ class InvenioStore(object):
...
@@ -418,8 +461,7 @@ class InvenioStore(object):
while
scan
:
while
scan
:
kwargs
[
"jrec"
]
+=
N_IDS
kwargs
[
"jrec"
]
+=
N_IDS
url
=
"https://%s/search"
%
self
.
_host
rep
=
self
.
interogate
(
self
.
_api_search
,
timeout
=
30
,
**
kwargs
)
rep
=
self
.
interogate
(
url
,
timeout
=
30
,
**
kwargs
)
try
:
try
:
li
=
rep
.
json
()
li
=
rep
.
json
()
...
@@ -458,10 +500,13 @@ class InvenioStore(object):
...
@@ -458,10 +500,13 @@ class InvenioStore(object):
* no JSON object could be decoded.
* no JSON object could be decoded.
"""
"""
self
.
_try
=
0
url
=
"%s/%s"
%
(
self
.
_api_record
,
rec_id
)
url
=
"https://%s/record/%s"
%
(
self
.
_host
,
rec_id
)
kwargs
=
{}
rep
=
self
.
interogate
(
url
,
timeout
=
30
,
of
=
"recjson"
)
if
self
.
_host
in
(
"cds.cern.ch"
,
"old.inspirehep.net"
):
kwargs
=
{
"of"
:
"recjson"
}
rep
=
self
.
interogate
(
url
,
timeout
=
30
,
**
kwargs
)
try
:
try
:
li
=
rep
.
json
()
li
=
rep
.
json
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment