Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
f19b63aa
Commit
f19b63aa
authored
Jan 05, 2021
by
LE GAC Renaud
Browse files
Update modules and tests to add and use store_tools.factory.build_store
parent
bb229004
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
119 additions
and
70 deletions
+119
-70
modules/store_tools/__init__.py
modules/store_tools/__init__.py
+2
-2
modules/store_tools/base.py
modules/store_tools/base.py
+4
-0
modules/store_tools/factory.py
modules/store_tools/factory.py
+67
-9
modules/store_tools/inveniostore.py
modules/store_tools/inveniostore.py
+16
-29
tests/basis/test_01_InvenioStore.py
tests/basis/test_01_InvenioStore.py
+12
-12
tests/basis/test_02_record_factory.py
tests/basis/test_02_record_factory.py
+12
-12
tests/basis/test_03_Record.py
tests/basis/test_03_Record.py
+3
-3
tests/basis/test_09_Automaton.py
tests/basis/test_09_Automaton.py
+3
-3
No files found.
modules/store_tools/__init__.py
View file @
f19b63aa
...
...
@@ -24,7 +24,7 @@ from .base import (ARXIV,
from
.exception
import
(
CdsException
,
RecordException
)
from
.factory
import
build_record
from
.factory
import
build_record
,
build_store
from
.inveniostore
import
InvenioStore
from
.record
import
Record
from
.recordconf
import
RecordConf
...
...
@@ -72,6 +72,6 @@ def load_record(host, record_id, shelf=None):
* no JSON object could be decoded.
"""
store
=
InvenioS
tore
(
host
,
shelf
=
shelf
)
store
=
build_s
tore
(
host
,
shelf
=
shelf
)
recjson
=
store
.
get_record
(
record_id
)
return
build_record
(
recjson
)
modules/store_tools/base.py
View file @
f19b63aa
...
...
@@ -6,6 +6,9 @@ import re
ARXIV
=
"arXiv"
ARXIV_PDF
=
"http://arxiv.org/pdf/"
CDS
=
(
"cds"
,
"cds.cern.ch"
)
INS
=
(
"inspirehep"
,
"inspirehep.net"
)
MSG_INV_CONF
=
"Reject invalid conference information"
MSG_INV_CONF_KEY
=
"Reject invalid conference key"
MSG_NO_CONF
=
"Reject no conference information"
...
...
@@ -13,6 +16,7 @@ MSG_NO_CONF_ID_KEY = "Reject no conference identifier and key"
MSG_NO_COUNTRY
=
"Reject invalid country"
MSG_NO_HOST
=
"Reject no host information in record"
MSG_NO_PUBLISHER
=
"Reject invalid publisher"
MSG_NO_SHELF
=
"No shelf %s for store %s"
MSG_NO_THESIS
=
"Reject no thesis information"
MSG_WELL_FORMED_COLLABORATION
=
"Reject collaboration is not well formed"
...
...
modules/store_tools/factory.py
View file @
f19b63aa
...
...
@@ -3,17 +3,20 @@
"""
import
re
from
.base
import
(
is_conference
,
from
.base
import
(
CDS
,
INS
,
is_conference
,
is_institute
,
is_thesis
,
MSG_INV_CONF
,
MSG_INV_CONF_KEY
,
MSG_NO_CONF
,
MSG_NO_SHELF
,
REG_CONF
)
from
datetime
import
datetime
from
.exception
import
CdsException
from
.inveniostore
import
CDS
,
INS
,
InvenioStore
from
.inveniostore
import
InvenioStore
from
.recordconf
import
RecordConf
from
.recordinst
import
RecordInst
from
.recordpubli
import
RecordPubli
...
...
@@ -220,12 +223,67 @@ def build_record(recjson):
return
upcast_record
def
build_store
(
host
=
None
,
shelf
=
None
):
"""Return the interface to the publication store.
Args:
host (str):
possible values are ``cds``, ``cds.cern.ch``,``inspirehep``
or ``inspirehep.net``
shelf (str):
section of the store containing records. It depends on the host.
Possible values are ``None``, ``literature``, ``conferences``
and ``institutions``
+----------------+--------------+-----------------------------+
| host | shelf | base API |
+----------------+--------------+-----------------------------+
| cds.cern.ch | None | https://cds.cern.ch/ |
+----------------+--------------+-----------------------------+
| inspirehep.net | None | https://old.inspirehep.net/ |
| inspirehep.net | literature | https://old.inspirehep.net/ |
| inspirehep.net | conferences | https://inspirehep.net/ |
| inspirehep.net | institutions | https://old.inspirehep.net/ |
+----------------+--------------+-----------------------------+
Returns:
InvenioStore
"""
if
host
in
CDS
:
store
=
InvenioStore
(
host
=
"cds.cern.ch"
,
api_record
=
"https://cds.cern.ch/record"
,
api_search
=
"https://cds.cern.ch/search"
,
shelf
=
shelf
)
elif
host
in
INS
and
shelf
in
(
None
,
"literature"
,
"institutions"
):
store
=
InvenioStore
(
host
=
"old.inspirehep.net"
,
api_record
=
"https://old.inspirehep.net/record"
,
api_search
=
"https://old.inspirehep.net/search"
,
shelf
=
shelf
)
elif
host
in
INS
and
shelf
in
(
"conferences"
,):
store
=
InvenioStore
(
host
=
"inspirehep.net"
,
api_record
=
"https://inspirehep.net/api/conferences"
,
api_search
=
"https://inspirehep.net/api/conferences/?q="
,
shelf
=
shelf
)
else
:
raise
CdsException
(
MSG_NO_SHELF
%
(
shelf
,
host
))
return
store
def
get_conference_data
(
host
,
conf_id
=
None
,
key
=
None
):
"""Get the conference data identified by its id or key.
Args:
host (str):
possible values are ``
cds
``, ``
cds
.cern.ch``, ``inspirehep``
possible values are ``
store
``, ``
store
.cern.ch``, ``inspirehep``
or ``inspirehep.net``.
conf_id (int):
...
...
@@ -244,14 +302,14 @@ def get_conference_data(host, conf_id=None, key=None):
- conference not found
"""
cds
=
InvenioS
tore
(
host
,
shelf
=
"conferences"
)
store
=
build_s
tore
(
host
,
shelf
=
"conferences"
)
# ........................................................................
#
# search by id in cds.cern.ch
#
if
conf_id
is
not
None
and
host
in
CDS
:
recjson
=
cds
.
get_record
(
conf_id
)
recjson
=
store
.
get_record
(
conf_id
)
if
recjson
[
"recid"
]
!=
conf_id
:
raise
CdsException
(
MSG_INV_CONF
)
return
recjson
...
...
@@ -261,10 +319,10 @@ def get_conference_data(host, conf_id=None, key=None):
# search by key in cds.cern.ch
#
if
key
is
not
None
and
host
in
CDS
:
ids
=
cds
.
get_ids
(
p
=
key
)
ids
=
store
.
get_ids
(
p
=
key
)
for
conf_id
in
ids
:
recjson
=
cds
.
get_record
(
conf_id
)
recjson
=
store
.
get_record
(
conf_id
)
if
match_conference_key
(
recjson
,
key
):
return
recjson
...
...
@@ -276,7 +334,7 @@ def get_conference_data(host, conf_id=None, key=None):
# search by id in inspirehep.net
#
if
conf_id
is
not
None
and
host
in
INS
:
obj
=
cds
.
get_record
(
conf_id
)
obj
=
store
.
get_record
(
conf_id
)
if
obj
[
"id"
]
!=
str
(
conf_id
):
raise
CdsException
(
MSG_INV_CONF
)
return
obj
[
"metadata"
]
...
...
@@ -291,7 +349,7 @@ def get_conference_data(host, conf_id=None, key=None):
if
not
REG_CONF
.
match
(
key
):
raise
CdsException
(
MSG_INV_CONF_KEY
)
obj
=
cds
.
search
(
f
"cnum:
{
key
}
"
)
obj
=
store
.
search
(
f
"cnum:
{
key
}
"
)
try
:
recjson
=
obj
[
0
][
"metadata"
]
...
...
modules/store_tools/inveniostore.py
View file @
f19b63aa
...
...
@@ -14,14 +14,10 @@ CDS_SEARCH_KEYS = ("req", "cc", "c", "ec", "p", "f", "rg", "sf", "so", "sp",
"d1y"
,
"d1m"
,
"d1d"
,
"d2"
,
"d2y"
,
"d2m"
,
"d2d"
,
"dt"
,
"verbose"
,
"ap"
,
"ln"
,
"ec"
)
CDS
=
(
"cds"
,
"cds.cern.ch"
)
INS
=
(
"inspirehep"
,
"inspirehep.net"
)
MSG_HTTP_DECODE
=
"Fail to decode HTTP response"
MSG_HTTP_ERROR
=
"HTTP Error"
MSG_INVALID_RESPONSE
=
"Invalid response"
MSG_NO_IDS
=
"Invalid list of record identifiers"
MSG_NO_SHELF
=
"No shelf %s for store %s"
MSG_NOT_IMPLEMENTED
=
"Method '%s' not implemented for store %s and shelf %s"
MSG_WRONG_KEYWORD
=
"Invalid keyword argument"
...
...
@@ -42,6 +38,12 @@ class InvenioStore(object):
possible values are ``cds``, ``cds.cern.ch``,``inspirehep``
or ``inspirehep.net``
api_search (str):
api_record (str):
max_retries (int):
shelf (str):
section of the store containing records. It depends on the host.
Possible values are ``None``, ``literature``, ``conferences``
...
...
@@ -60,38 +62,23 @@ class InvenioStore(object):
"""
def
__init__
(
self
,
host
=
"cds"
,
shelf
=
None
):
def
__init__
(
self
,
api_record
=
None
,
api_search
=
None
,
host
=
None
,
max_retries
=
3
,
shelf
=
None
):
self
.
_api_search
=
api_search
self
.
_api_record
=
api_record
self
.
_host
=
host
self
.
_shelf
=
shelf
self
.
_url
=
None
# base url for the API
if
host
in
CDS
:
api_search
=
"https://cds.cern.ch/search"
api_record
=
"https://cds.cern.ch/record"
host
=
"cds.cern.ch"
elif
host
in
INS
and
shelf
in
(
None
,
"literature"
,
"institutions"
):
api_search
=
"https://old.inspirehep.net/search"
api_record
=
"https://old.inspirehep.net/record"
host
=
"old.inspirehep.net"
elif
host
in
INS
and
shelf
in
(
"conferences"
,):
api_search
=
"https://inspirehep.net/api/conferences/?q="
api_record
=
"https://inspirehep.net/api/conferences"
host
=
"inspirehep.net"
else
:
raise
CdsException
(
MSG_NO_SHELF
%
(
shelf
,
host
))
# start a session, a persistent connection with the server
# let the session handle the number of retry
session
=
requests
.
Session
()
session
.
mount
(
f
"https://
{
host
}
"
,
HTTPAdapter
(
max_retries
=
3
))
self
.
_api_search
=
api_search
self
.
_api_record
=
api_record
self
.
_host
=
host
session
.
mount
(
f
"https://
{
host
}
"
,
HTTPAdapter
(
max_retries
=
max_retries
))
self
.
_session
=
session
def
__del__
(
self
):
...
...
tests/basis/test_01_InvenioStore.py
View file @
f19b63aa
...
...
@@ -6,19 +6,19 @@ Test methods of the class InveniStore
import
pytest
from
store_tools.exception
import
CdsException
from
store_tools.
invenios
tor
e
import
InvenioS
tore
from
store_tools.
fac
tor
y
import
build_s
tore
def
test_constructor_exception_01001
():
with
pytest
.
raises
(
CdsException
):
InvenioS
tore
(
"inspirehep"
,
shelf
=
"foo"
)
build_s
tore
(
"inspirehep"
,
shelf
=
"foo"
)
def
test_get_ids_cds_01002
():
"""Check the list of record ids for LHCb articles published in 2015.
"""
store
=
InvenioS
tore
(
"cds.cern.ch"
)
store
=
build_s
tore
(
"cds.cern.ch"
)
kwargs
=
{
"f1"
:
"year"
,
...
...
@@ -48,7 +48,7 @@ def test_get_ids_ins_01003():
"""Check the list of record ids for LHCb articles published in 2010.
"""
store
=
InvenioS
tore
(
"inspirehep.net"
)
store
=
build_s
tore
(
"inspirehep.net"
)
query
=
"find cn lhcb and tc p and not tc c and date 2010"
...
...
@@ -65,48 +65,48 @@ def test_get_ids_ins_01003():
def
test_get_ids_exception_01004
():
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"conferences"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"conferences"
)
with
pytest
.
raises
(
CdsException
)
as
e_info
:
store
.
get_ids
(
p
=
"find cn lhcb"
)
def
test_get_record_cds_01005
():
store
=
InvenioS
tore
(
"cds.cern.ch"
)
store
=
build_s
tore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1951625
)
assert
isinstance
(
recjson
,
dict
)
def
test_get_record_ins_none_01006
():
# old inspirehep interface
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
None
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
None
)
recjson
=
store
.
get_record
(
1319638
)
assert
isinstance
(
recjson
,
dict
)
def
test_get_record_ins_literature_01007
():
# old inspirehep interface (new one not yet available)
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
recjson
=
store
.
get_record
(
1319638
)
assert
isinstance
(
recjson
,
dict
)
def
test_get_record_ins_conferences_01008
():
# new inspirehep interface for conferences
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"conferences"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"conferences"
)
recjson
=
store
.
get_record
(
980401
)
assert
recjson
[
"metadata"
][
"cnum"
]
==
"C10-12-06"
def
test_get_record_ins_institutions_01009
():
# old inspirehep interface (new one not yet available)
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"institutions"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"institutions"
)
recjson
=
store
.
get_record
(
902989
)
assert
isinstance
(
recjson
,
dict
)
def
test_get_field_ins_01010
():
# old inspirehep interface (new one not yet available)
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
value
=
store
.
get_field
(
1319638
,
"number_of_citations"
)
assert
str
(
value
).
isdigit
()
...
...
@@ -114,6 +114,6 @@ def test_get_field_ins_01010():
def
test_search_ins_01011
():
# new inspirehep interface for conferences
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"conferences"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"conferences"
)
obj
=
store
.
search
(
"cnum:C10-12-06"
)
assert
len
(
obj
)
==
1
and
obj
[
0
][
"metadata"
][
"cnum"
]
==
"C10-12-06"
tests/basis/test_02_record_factory.py
View file @
f19b63aa
...
...
@@ -12,9 +12,9 @@ from store_tools.base import (is_conference,
from
store_tools.factory
import
(
add_affiliation_keys
,
add_conference_data
,
build_record
,
build_store
,
get_conference_data
)
from
store_tools.inveniostore
import
InvenioStore
from
store_tools.recordconf
import
RecordConf
from
store_tools.recordinst
import
RecordInst
from
store_tools.recordpubli
import
RecordPubli
...
...
@@ -79,7 +79,7 @@ def test_get_conference_data_ins_02002():
def
test_add_conference_data_cds_02003
():
store
=
InvenioS
tore
(
"cds.cern.ch"
)
store
=
build_s
tore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1411352
)
add_conference_data
(
recjson
)
...
...
@@ -98,7 +98,7 @@ def test_add_conference_data_cds_02004():
#
# EXCEPTION
#
store
=
InvenioS
tore
(
"cds.cern.ch"
)
store
=
build_s
tore
(
"cds.cern.ch"
)
# no conference URL
recjson
=
store
.
get_record
(
2258914
)
...
...
@@ -114,7 +114,7 @@ def test_add_conference_data_cds_02004():
def
test_add_conference_data_ins_02005
():
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
recjson
=
store
.
get_record
(
1089237
)
add_conference_data
(
recjson
)
...
...
@@ -130,7 +130,7 @@ def test_add_conference_data_ins_02005():
def
test_conference_cds_02006
():
store
=
InvenioS
tore
(
"cds.cern.ch"
)
store
=
build_s
tore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1411352
)
assert
is_conference
(
recjson
)
...
...
@@ -143,7 +143,7 @@ def test_conference_cds_02006():
def
test_conference_ins_02007
():
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
recjson
=
store
.
get_record
(
1276938
)
assert
is_conference
(
recjson
)
...
...
@@ -156,7 +156,7 @@ def test_conference_ins_02007():
def
test_talk_cds_02008
():
store
=
InvenioS
tore
(
"cds.cern.ch"
)
store
=
build_s
tore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
2239092
)
assert
is_conference
(
recjson
)
...
...
@@ -173,7 +173,7 @@ def test_talk_cds_02008():
#
def
test_add_affiliation_keys_ins_02009
():
"""CPPM"""
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"institutions"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"institutions"
)
recjson
=
store
.
get_record
(
902989
)
add_affiliation_keys
(
recjson
)
...
...
@@ -185,7 +185,7 @@ def test_add_affiliation_keys_ins_02009():
def
test_institute_ins_02010
():
"""CPPM"""
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"institutions"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"institutions"
)
recjson
=
store
.
get_record
(
902989
)
assert
not
is_conference
(
recjson
)
...
...
@@ -203,7 +203,7 @@ def test_institute_ins_02010():
def
test_article_cds_02011
():
"""Precision luminosity measurements at LHCb"""
store
=
InvenioS
tore
(
"cds.cern.ch"
)
store
=
build_s
tore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1951625
)
assert
not
is_conference
(
recjson
)
...
...
@@ -217,7 +217,7 @@ def test_article_cds_02011():
def
test_article_inspirehep_02012
():
"""Precision luminosity measurements at LHCb"""
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
recjson
=
store
.
get_record
(
1319638
)
assert
not
is_conference
(
recjson
)
...
...
@@ -234,7 +234,7 @@ def test_article_inspirehep_02012():
#
def
test_thesis_cds_02013
():
store
=
InvenioS
tore
(
"cds.cern.ch"
)
store
=
build_s
tore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1632177
)
assert
not
is_conference
(
recjson
)
...
...
tests/basis/test_03_Record.py
View file @
f19b63aa
...
...
@@ -13,20 +13,20 @@ Test all methods of the Record class for a given article:
"""
import
pytest
from
store_tools.
invenios
tor
e
import
InvenioS
tore
from
store_tools.
fac
tor
y
import
build_s
tore
from
store_tools.record
import
Record
@
pytest
.
fixture
(
scope
=
"module"
)
def
reccds
():
store
=
InvenioS
tore
(
"cds.cern.ch"
)
store
=
build_s
tore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1951625
)
return
Record
(
recjson
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
recins
():
store
=
InvenioS
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
store
=
build_s
tore
(
"inspirehep.net"
,
shelf
=
"literature"
)
recjson
=
store
.
get_record
(
1319638
)
return
Record
(
recjson
)
...
...
tests/basis/test_09_Automaton.py
View file @
f19b63aa
...
...
@@ -9,7 +9,7 @@ from gluon import current
from
harvest_tools.automaton
import
Automaton
from
harvest_tools.msgcollection
import
MsgCollection
from
plugin_dbui
import
get_id
from
store_tools.
invenios
tor
e
import
InvenioS
tore
from
store_tools.
fac
tor
y
import
build_s
tore
@
pytest
.
fixture
(
scope
=
"module"
)
...
...
@@ -73,7 +73,7 @@ def test_process_recid_09002(svc):
svc
.
harvester
.
host
=
"cds.cern.ch"
svc
.
harvester
.
collections
=
collection
svc
.
store
=
InvenioStore
(
)
svc
.
store
=
build_store
(
host
=
"cds.cern.ch"
)
ctitle
=
"LHCb / article / %s"
%
collection
svc
.
collection_logs
.
append
(
MsgCollection
(
title
=
ctitle
))
...
...
@@ -106,7 +106,7 @@ def test_process_collection_09003(svc):
svc
.
harvester
.
host
=
"cds.cern.ch"
svc
.
harvester
.
collections
=
collection
svc
.
store
=
InvenioStore
(
)
svc
.
store
=
build_store
(
host
=
"cds.cern.ch"
)
# do it
assert
svc
.
process_collection
(
"LHCb Papers"
)
is
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment