Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
limbra
limbra
Commits
f19b63aa
Commit
f19b63aa
authored
Jan 05, 2021
by
LE GAC Renaud
Browse files
Update modules and tests to add and use store_tools.factory.build_store
parent
bb229004
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
119 additions
and
70 deletions
+119
-70
modules/store_tools/__init__.py
modules/store_tools/__init__.py
+2
-2
modules/store_tools/base.py
modules/store_tools/base.py
+4
-0
modules/store_tools/factory.py
modules/store_tools/factory.py
+67
-9
modules/store_tools/inveniostore.py
modules/store_tools/inveniostore.py
+16
-29
tests/basis/test_01_InvenioStore.py
tests/basis/test_01_InvenioStore.py
+12
-12
tests/basis/test_02_record_factory.py
tests/basis/test_02_record_factory.py
+12
-12
tests/basis/test_03_Record.py
tests/basis/test_03_Record.py
+3
-3
tests/basis/test_09_Automaton.py
tests/basis/test_09_Automaton.py
+3
-3
No files found.
modules/store_tools/__init__.py
View file @
f19b63aa
...
...
@@ -24,7 +24,7 @@ from .base import (ARXIV,
from
.exception
import
(
CdsException
,
RecordException
)
from
.factory
import
build_record
from
.factory
import
build_record
,
build_store
from
.inveniostore
import
InvenioStore
from
.record
import
Record
from
.recordconf
import
RecordConf
...
...
@@ -72,6 +72,6 @@ def load_record(host, record_id, shelf=None):
* no JSON object could be decoded.
"""
store
=
InvenioS
tore
(
host
,
shelf
=
shelf
)
store
=
build_s
tore
(
host
,
shelf
=
shelf
)
recjson
=
store
.
get_record
(
record_id
)
return
build_record
(
recjson
)
modules/store_tools/base.py
View file @
f19b63aa
...
...
@@ -6,6 +6,9 @@ import re
ARXIV
=
"arXiv"
ARXIV_PDF
=
"http://arxiv.org/pdf/"
CDS
=
(
"cds"
,
"cds.cern.ch"
)
INS
=
(
"inspirehep"
,
"inspirehep.net"
)
MSG_INV_CONF
=
"Reject invalid conference information"
MSG_INV_CONF_KEY
=
"Reject invalid conference key"
MSG_NO_CONF
=
"Reject no conference information"
...
...
@@ -13,6 +16,7 @@ MSG_NO_CONF_ID_KEY = "Reject no conference identifier and key"
MSG_NO_COUNTRY
=
"Reject invalid country"
MSG_NO_HOST
=
"Reject no host information in record"
MSG_NO_PUBLISHER
=
"Reject invalid publisher"
MSG_NO_SHELF
=
"No shelf %s for store %s"
MSG_NO_THESIS
=
"Reject no thesis information"
MSG_WELL_FORMED_COLLABORATION
=
"Reject collaboration is not well formed"
...
...
modules/store_tools/factory.py
View file @
f19b63aa
...
...
@@ -3,17 +3,20 @@
"""
import
re
from
.base
import
(
is_conference
,
from
.base
import
(
CDS
,
INS
,
is_conference
,
is_institute
,
is_thesis
,
MSG_INV_CONF
,
MSG_INV_CONF_KEY
,
MSG_NO_CONF
,
MSG_NO_SHELF
,
REG_CONF
)
from
datetime
import
datetime
from
.exception
import
CdsException
from
.inveniostore
import
CDS
,
INS
,
InvenioStore
from
.inveniostore
import
InvenioStore
from
.recordconf
import
RecordConf
from
.recordinst
import
RecordInst
from
.recordpubli
import
RecordPubli
...
...
@@ -220,12 +223,67 @@ def build_record(recjson):
return
upcast_record
def
build_store
(
host
=
None
,
shelf
=
None
):
"""Return the interface to the publication store.
Args:
host (str):
possible values are ``cds``, ``cds.cern.ch``,``inspirehep``
or ``inspirehep.net``
shelf (str):
section of the store containing records. It depends on the host.
Possible values are ``None``, ``literature``, ``conferences``
and ``institutions``
+----------------+--------------+-----------------------------+
| host | shelf | base API |
+----------------+--------------+-----------------------------+
| cds.cern.ch | None | https://cds.cern.ch/ |
+----------------+--------------+-----------------------------+
| inspirehep.net | None | https://old.inspirehep.net/ |
| inspirehep.net | literature | https://old.inspirehep.net/ |
| inspirehep.net | conferences | https://inspirehep.net/ |
| inspirehep.net | institutions | https://old.inspirehep.net/ |
+----------------+--------------+-----------------------------+
Returns:
InvenioStore
"""
if
host
in
CDS
:
store
=
InvenioStore
(
host
=
"cds.cern.ch"
,
api_record
=
"https://cds.cern.ch/record"
,
api_search
=
"https://cds.cern.ch/search"
,
shelf
=
shelf
)
elif
host
in
INS
and
shelf
in
(
None
,
"literature"
,
"institutions"
):
store
=
InvenioStore
(
host
=
"old.inspirehep.net"
,
api_record
=
"https://old.inspirehep.net/record"
,
api_search
=
"https://old.inspirehep.net/search"
,
shelf
=
shelf
)
elif
host
in
INS
and
shelf
in
(
"conferences"
,):
store
=
InvenioStore
(
host
=
"inspirehep.net"
,
api_record
=
"https://inspirehep.net/api/conferences"
,
api_search
=
"https://inspirehep.net/api/conferences/?q="
,
shelf
=
shelf
)
else
:
raise
CdsException
(
MSG_NO_SHELF
%
(
shelf
,
host
))
return
store
def
get_conference_data
(
host
,
conf_id
=
None
,
key
=
None
):
"""Get the conference data identified by its id or key.
Args:
host (str):
possible values are ``
cds
``, ``
cds
.cern.ch``, ``inspirehep``
possible values are ``
store
``, ``
store
.cern.ch``, ``inspirehep``
or ``inspirehep.net``.
conf_id (int):
...
...
@@ -244,14 +302,14 @@ def get_conference_data(host, conf_id=None, key=None):
- conference not found
"""
cds
=
InvenioS
tore
(
host
,
shelf
=
"conferences"
)
store
=
build_s
tore
(
host
,
shelf
=
"conferences"
)
# ........................................................................
#
# search by id in cds.cern.ch
#
if
conf_id
is
not
None
and
host
in
CDS
:
recjson
=
cds
.
get_record
(
conf_id
)
recjson
=
store
.
get_record
(
conf_id
)
if
recjson
[
"recid"
]
!=
conf_id
:
raise
CdsException
(
MSG_INV_CONF
)
return
recjson
...
...
@@ -261,10 +319,10 @@ def get_conference_data(host, conf_id=None, key=None):
# search by key in cds.cern.ch
#
if
key
is
not
None
and
host
in
CDS
:
ids
=
cds
.
get_ids
(
p
=
key
)
ids
=
store
.
get_ids
(
p
=
key
)
for
conf_id
in
ids
:
recjson
=
cds
.
get_record
(
conf_id
)
recjson
=
store
.
get_record
(
conf_id
)
if
match_conference_key
(
recjson
,
key
):
return
recjson
...
...
@@ -276,7 +334,7 @@ def get_conference_data(host, conf_id=None, key=None):
# search by id in inspirehep.net
#
if
conf_id
is
not
None
and
host
in
INS
:
obj
=
cds
.
get_record
(
conf_id
)
obj
=
store
.
get_record
(
conf_id
)
if
obj
[
"id"
]
!=
str
(
conf_id
):
raise
CdsException
(
MSG_INV_CONF
)
return
obj
[
"metadata"
]
...
...
@@ -291,7 +349,7 @@ def get_conference_data(host, conf_id=None, key=None):
if
not
REG_CONF
.
match
(
key
):
raise
CdsException
(
MSG_INV_CONF_KEY
)
obj
=
cds
.
search
(
f
"cnum:
{
key
}
"
)
obj
=
store
.
search
(
f
"cnum:
{
key
}
"
)
try
:
recjson
=
obj
[
0
][
"metadata"
]
...
...
modules/store_tools/inveniostore.py
View file @
f19b63aa
...
...
@@ -14,14 +14,10 @@ CDS_SEARCH_KEYS = ("req", "cc", "c", "ec", "p", "f", "rg", "sf", "so", "sp",
"d1y"
,
"d1m"
,
"d1d"
,
"d2"
,
"d2y"
,
"d2m"
,
"d2d"
,
"dt"
,
"verbose"
,
"ap"
,
"ln"
,
"ec"
)
CDS
=
(
"cds"
,
"cds.cern.ch"
)
INS
=
(
"inspirehep"
,
"inspirehep.net"
)
MSG_HTTP_DECODE
=
"Fail to decode HTTP response"
MSG_HTTP_ERROR
=
"HTTP Error"
MSG_INVALID_RESPONSE
=
"Invalid response"
MSG_NO_IDS
=
"Invalid list of record identifiers"
MSG_NO_SHELF
=
"No shelf %s for store %s"
MSG_NOT_IMPLEMENTED
=
"Method '%s' not implemented for store %s and shelf %s"
MSG_WRONG_KEYWORD
=
"Invalid keyword argument"
...
...
@@ -42,6 +38,12 @@ class InvenioStore(object):
possible values are ``cds``, ``cds.cern.ch``,``inspirehep``
or ``inspirehep.net``
api_search (str):
api_record (str):
max_retries (int):
shelf (str):
section of the store containing records. It depends on the host.
Possible values are ``None``, ``literature``, ``conferences``
...
...
@@ -60,38 +62,23 @@ class InvenioStore(object):
"""
def
__init__
(
self
,
host
=
"cds"
,
shelf
=
None
):
def
__init__
(
self
,
api_record
=
None
,
api_search
=
None
,
host
=
None
,
max_retries
=
3
,
shelf
=
None
):
self
.
_api_search
=
api_search
self
.
_api_record
=
api_record
self
.
_host
=
host
self
.
_shelf
=
shelf
self
.
_url
=
None
# base url for the API
if
host
in
CDS
:
api_search
=
"https://cds.cern.ch/search"
api_record
=
"https://cds.cern.ch/record"
host
=
"cds.cern.ch"
elif
host
in
INS
and
shelf
in
(
None
,
"literature"
,
"institutions"
):
api_search
=
"https://old.inspirehep.net/search"
api_record
=
"https://old.inspirehep.net/record"
host
=
"old.inspirehep.net"
elif
host
in
INS
and
shelf
in
(
"conferences"
,):
api_search
=
"https://inspirehep.net/api/conferences/?q="
api_record
=
"https://inspirehep.net/api/conferences"
host
=
"inspirehep.net"
else
:
raise
CdsException
(
MSG_NO_SHELF
%
(
shelf
,
host
))
# start a session, a persistent connection with the server
# let the session handle the number of retry
session
=
requests
.
Session
()
session
.
mount
(
f
"https://
{
host
}
"
,
HTTPAdapter
(
max_retries
=
3
))
self
.
_api_search
=
api_search
self
.
_api_record
=
api_record
self
.
_host
=
host
session
.
mount
(
f
"https://
{
host
}
"
,
HTTPAdapter
(
max_retries
=
max_retries
))
self
.
_session
=
session
def
__del__
(
self
):
...
...
tests/basis/test_01_InvenioStore.py
View file @
f19b63aa
...
...
@@ -6,19 +6,19 @@ Test methods of the class InveniStore
import
pytest
from
store_tools.exception
import
CdsException
from
store_tools.
invenios
tor
e
import
InvenioS
tore
from
store_tools.
fac
tor
y
import
build_s
tore
def
test_constructor_exception_01001
():
with
pytest
.
raises
(
CdsException
):
InvenioS
tore
(
"inspirehep"
,
shelf
=
"foo"
)
build_s
tore
(
"inspirehep"
,
shelf
=
"foo"
)
def
test_get_ids_cds_01002
():
"""Check the list of record ids for LHCb articles published in 2015.
"""
store
=
InvenioS
tore
(
"cds.cern.ch"
)
store
=
build_s
tore
(
"cds.cern.ch"
)
kwargs
=
{
"f1"
:
"year"
,
...
...
@@ -48,7 +48,7 @@ def test_get_ids_ins_01003():
"""Check the list of record ids for LHCb articles published in 2010.
"""
store
=
InvenioS
tore
(
"inspirehep.net"
)
store
=
build_s
tore
(
"inspirehep.net"
)
query
=
"find cn lhcb and tc p and not tc c and date 2010"
...
...
@@ -65,48 +65,48 @@ def test_get_ids_ins_01003():
def
test_get_ids_exception_01004
():
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"conferences"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"conferences"
)
with
pytest
.
raises
(
CdsException
)
as
e_info
:
store
.
get_ids
(
p
=
"find cn lhcb"
)
def
test_get_record_cds_01005
():
store
=
InvenioS
tore
(
"cds.cern.ch"
)
store
=
build_s
tore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1951625
)
assert
isinstance
(
recjson
,
dict
)
def
test_get_record_ins_none_01006
():
# old inspirehep interface
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
None
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
None
)
recjson
=
store
.
get_record
(
1319638
)
assert
isinstance
(
recjson
,
dict
)
def
test_get_record_ins_literature_01007
():
# old inspirehep interface (new one not yet available)
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
recjson
=
store
.
get_record
(
1319638
)
assert
isinstance
(
recjson
,
dict
)
def
test_get_record_ins_conferences_01008
():
# new inspirehep interface for conferences
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"conferences"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"conferences"
)
recjson
=
store
.
get_record
(
980401
)
assert
recjson
[
"metadata"
][
"cnum"
]
==
"C10-12-06"
def
test_get_record_ins_institutions_01009
():
# old inspirehep interface (new one not yet available)
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"institutions"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"institutions"
)
recjson
=
store
.
get_record
(
902989
)
assert
isinstance
(
recjson
,
dict
)
def
test_get_field_ins_01010
():
# old inspirehep interface (new one not yet available)
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
value
=
store
.
get_field
(
1319638
,
"number_of_citations"
)
assert
str
(
value
).
isdigit
()
...
...
@@ -114,6 +114,6 @@ def test_get_field_ins_01010():
def
test_search_ins_01011
():
# new inspirehep interface for conferences
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"conferences"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"conferences"
)
obj
=
store
.
search
(
"cnum:C10-12-06"
)
assert
len
(
obj
)
==
1
and
obj
[
0
][
"metadata"
][
"cnum"
]
==
"C10-12-06"
tests/basis/test_02_record_factory.py
View file @
f19b63aa
...
...
@@ -12,9 +12,9 @@ from store_tools.base import (is_conference,
from
store_tools.factory
import
(
add_affiliation_keys
,
add_conference_data
,
build_record
,
build_store
,
get_conference_data
)
from
store_tools.inveniostore
import
InvenioStore
from
store_tools.recordconf
import
RecordConf
from
store_tools.recordinst
import
RecordInst
from
store_tools.recordpubli
import
RecordPubli
...
...
@@ -79,7 +79,7 @@ def test_get_conference_data_ins_02002():
def
test_add_conference_data_cds_02003
():
store
=
InvenioS
tore
(
"cds.cern.ch"
)
store
=
build_s
tore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1411352
)
add_conference_data
(
recjson
)
...
...
@@ -98,7 +98,7 @@ def test_add_conference_data_cds_02004():
#
# EXCEPTION
#
store
=
InvenioS
tore
(
"cds.cern.ch"
)
store
=
build_s
tore
(
"cds.cern.ch"
)
# no conference URL
recjson
=
store
.
get_record
(
2258914
)
...
...
@@ -114,7 +114,7 @@ def test_add_conference_data_cds_02004():
def
test_add_conference_data_ins_02005
():
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
recjson
=
store
.
get_record
(
1089237
)
add_conference_data
(
recjson
)
...
...
@@ -130,7 +130,7 @@ def test_add_conference_data_ins_02005():
def
test_conference_cds_02006
():
store
=
InvenioS
tore
(
"cds.cern.ch"
)
store
=
build_s
tore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1411352
)
assert
is_conference
(
recjson
)
...
...
@@ -143,7 +143,7 @@ def test_conference_cds_02006():
def
test_conference_ins_02007
():
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
recjson
=
store
.
get_record
(
1276938
)
assert
is_conference
(
recjson
)
...
...
@@ -156,7 +156,7 @@ def test_conference_ins_02007():
def
test_talk_cds_02008
():
store
=
InvenioS
tore
(
"cds.cern.ch"
)
store
=
build_s
tore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
2239092
)
assert
is_conference
(
recjson
)
...
...
@@ -173,7 +173,7 @@ def test_talk_cds_02008():
#
def
test_add_affiliation_keys_ins_02009
():
"""CPPM"""
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"institutions"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"institutions"
)
recjson
=
store
.
get_record
(
902989
)
add_affiliation_keys
(
recjson
)
...
...
@@ -185,7 +185,7 @@ def test_add_affiliation_keys_ins_02009():
def
test_institute_ins_02010
():
"""CPPM"""
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"institutions"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"institutions"
)
recjson
=
store
.
get_record
(
902989
)
assert
not
is_conference
(
recjson
)
...
...
@@ -203,7 +203,7 @@ def test_institute_ins_02010():
def
test_article_cds_02011
():
"""Precision luminosity measurements at LHCb"""
store
=
InvenioS
tore
(
"cds.cern.ch"
)
store
=
build_s
tore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1951625
)
assert
not
is_conference
(
recjson
)
...
...
@@ -217,7 +217,7 @@ def test_article_cds_02011():
def
test_article_inspirehep_02012
():
"""Precision luminosity measurements at LHCb"""
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
recjson
=
store
.
get_record
(
1319638
)
assert
not
is_conference
(
recjson
)
...
...
@@ -234,7 +234,7 @@ def test_article_inspirehep_02012():
#
def
test_thesis_cds_02013
():
store
=
InvenioS
tore
(
"cds.cern.ch"
)
store
=
build_s
tore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1632177
)
assert
not
is_conference
(
recjson
)
...
...
tests/basis/test_03_Record.py
View file @
f19b63aa
...
...
@@ -13,20 +13,20 @@ Test all methods of the Record class for a given article:
"""
import
pytest
from
store_tools.
invenios
tor
e
import
InvenioS
tore
from
store_tools.
fac
tor
y
import
build_s
tore
from
store_tools.record
import
Record
@
pytest
.
fixture
(
scope
=
"module"
)
def
reccds
():
store
=
InvenioS
tore
(
"cds.cern.ch"
)
store
=
build_s
tore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1951625
)
return
Record
(
recjson
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
recins
():
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
recjson
=
store
.
get_record
(
1319638
)
return
Record
(
recjson
)
...
...
tests/basis/test_09_Automaton.py
View file @
f19b63aa
...
...
@@ -9,7 +9,7 @@ from gluon import current
from
harvest_tools.automaton
import
Automaton
from
harvest_tools.msgcollection
import
MsgCollection
from
plugin_dbui
import
get_id
from
store_tools.
invenios
tor
e
import
InvenioS
tore
from
store_tools.
fac
tor
y
import
build_s
tore
@
pytest
.
fixture
(
scope
=
"module"
)
...
...
@@ -73,7 +73,7 @@ def test_process_recid_09002(svc):
svc
.
harvester
.
host
=
"cds.cern.ch"
svc
.
harvester
.
collections
=
collection
svc
.
store
=
InvenioStore
(
)
svc
.
store
=
build_store
(
host
=
"cds.cern.ch"
)
ctitle
=
"LHCb / article / %s"
%
collection
svc
.
collection_logs
.
append
(
MsgCollection
(
title
=
ctitle
))
...
...
@@ -106,7 +106,7 @@ def test_process_collection_09003(svc):
svc
.
harvester
.
host
=
"cds.cern.ch"
svc
.
harvester
.
collections
=
collection
svc
.
store
=
InvenioStore
(
)
svc
.
store
=
build_store
(
host
=
"cds.cern.ch"
)
# do it
assert
svc
.
process_collection
(
"LHCb Papers"
)
is
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment