Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
limbra
limbra
Commits
b030e0f1
Commit
b030e0f1
authored
Jan 08, 2021
by
LE GAC Renaud
Browse files
Add RecordHepInst and test_10_RecordHepInst remove RecordInst and friends
parent
8764280b
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
108 additions
and
98 deletions
+108
-98
modules/store_tools/__init__.py
modules/store_tools/__init__.py
+5
-2
modules/store_tools/factory.py
modules/store_tools/factory.py
+16
-11
modules/store_tools/recordhepinst.py
modules/store_tools/recordhepinst.py
+44
-85
tests/basis/test_10_RecordHepInst.py
tests/basis/test_10_RecordHepInst.py
+43
-0
No files found.
modules/store_tools/__init__.py
View file @
b030e0f1
...
...
@@ -28,7 +28,9 @@ from .factory import build_record, build_store
from
.inveniostore
import
InvenioStore
from
.record
import
Record
from
.recordconf
import
RecordConf
from
.recordinst
import
RecordInst
from
.recordhepconf
import
RecordHepConf
from
.recordhepinst
import
RecordHepInst
from
.recordheppubli
import
RecordHepPubli
from
.recordpubli
import
RecordPubli
from
.recordthesis
import
RecordThesis
...
...
@@ -63,7 +65,8 @@ def load_record(host, record_id, shelf=None):
Returns:
Record:
either RecordPubli, RecordInst, RecordConf of RecordThesis.
either RecordConf, RecordHepConf, RecodHepPubli, RecordHepInst,
RecordHepThesis, RecordHepInst, RecordPubli or RecordThesis
Raises:
CdsException::
...
...
modules/store_tools/factory.py
View file @
b030e0f1
...
...
@@ -19,13 +19,17 @@ from .inspirehepstore import InspirehepStore, SHELFS
from
.recordconf
import
RecordConf
from
.recordhepconf
import
RecordHepConf
from
.recordheppubli
import
RecordHepPubli
from
.recordinst
import
RecordInst
from
.record
hep
inst
import
Record
Hep
Inst
from
.recordpubli
import
RecordPubli
from
.recordthesis
import
RecordThesis
REX_T
=
"\$\$t([\w, ]+)"
REX_U
=
"\$\$u([\w, ]+)"
MSG_ERROR_INST
=
\
"get institutions information from inspirehep.net "
\
"by using the shelf 'institutions'"
MSG_FAIL_UPCAST
=
"Failed to upcast the JSON record"
...
...
@@ -107,8 +111,8 @@ def add_conference_data(recjson):
# extract the conference url
# - information is in confjson[url]
# - in most of the case it is a dictionary
# - when it is a list take the first entry which is for the
#
home
page while the second one is for the proceeding (cds 2270940)
# - when it is a list take the first entry which is for the
home
# page while the second one is for the proceeding (cds 2270940)
# - in other case the url is not defined (cds 2258914)
confurl
=
""
if
"url"
in
confjson
:
...
...
@@ -166,24 +170,25 @@ def build_record(recjson, shelf=None):
Note:
this tool is working for JSON object coming from cds.cern.ch,
old.inspirehep.net as well as inspirehep. In the latter case
the shelf ha
s to
be defined.
old.inspirehep.net as well as inspirehep.
net.
In the latter case
the shelf
s
ha
ll
be defined.
Args:
recjson (dict):
record data in a JSON format.
shelf (str):
section of the inspirehep store containing records.
section of the inspirehep
.hep
store containing records.
Possible values are ``literature``, ``conferences``
and ``institutions``
Return
Record:
either RecordConf, RecodHepPubli, RecordInst,
RecodPubli
or RecordThesis
either RecordConf,
RecordHepConf,
RecodHepPubli, Record
Hep
Inst,
RecordHepThesis, RecordHepInst, RecordPubli
or RecordThesis
Raises:
RecordException
"""
# ........................................................................
...
...
@@ -196,7 +201,7 @@ def build_record(recjson, shelf=None):
upcast_record
=
RecordConf
(
recjson
)
elif
is_institute
(
recjson
):
upcast_record
=
RecordInst
(
recjson
)
raise
RecordException
(
MSG_ERROR_INST
)
elif
is_thesis
(
recjson
):
upcast_record
=
RecordThesis
(
recjson
)
...
...
@@ -208,8 +213,8 @@ def build_record(recjson, shelf=None):
if
is_conference
(
recjson
)
and
shelf
==
"literature"
:
upcast_record
=
RecordHepConf
(
recjson
)
#
elif shelf == "institutions":
#
upcast_record = RecordHepInst(recjson)
elif
shelf
==
"institutions"
:
upcast_record
=
RecordHepInst
(
recjson
)
# elif is_thesis(recjson) and shelf == "literature":
# upcast_record = RecordHepThesis(recjson)
...
...
modules/store_tools/recordhepinst.py
View file @
b030e0f1
""" store_tools.recordinst
""" store_tools.record
hep
inst
"""
from
.base
import
is_institute
from
.exception
import
RecordException
from
.record
import
Record
from
pprint
import
pprint
MSG_INVALID_HOST
=
"Invalid record host"
MSG_INVALID_RECORD
=
"Invalid record, it is not describing an institute"
class
RecordInst
(
Record
):
"""The record describing an institute.
Fields are:
+-----------------------------+----------------------------------+
| field (inspirehep) | subfield |
+=============================+==================================+
| FIXME_OAI | id, set |
+-----------------------------+----------------------------------+
| administrative_history | |
+-----------------------------+----------------------------------+
| authority_institution | institution |
+-----------------------------+----------------------------------+
| cataloguer_info | creation_date, modification_date |
+-----------------------------+----------------------------------+
| collection | primary, secondary |
+-----------------------------+----------------------------------+
| corporate_name | name |
+-----------------------------+----------------------------------+
| creation_date | |
+-----------------------------+----------------------------------+
| files | |
+-----------------------------+----------------------------------+
| filetypes | |
+-----------------------------+----------------------------------+
| number_of_citations | |
+-----------------------------+----------------------------------+
| number_of_comments | |
+-----------------------------+----------------------------------+
| number_of_reviews | |
+-----------------------------+----------------------------------+
| persistent_identifiers_keys | |
+-----------------------------+----------------------------------+
| recid | |
+-----------------------------+----------------------------------+
| source_of_description | note |
+-----------------------------+----------------------------------+
| system_control_number | institute, value |
+-----------------------------+----------------------------------+
| url | |
+-----------------------------+----------------------------------+
| version_id | |
+-----------------------------+----------------------------------+
One field is added by limbra:
+-----------------------------+----------------------------------+
| field (limbra) | subfield |
+=============================+==================================+
| corporate_note | identifier, futur_identifier, |
| | name |
+-----------------------------+----------------------------------+
class
RecordHepInst
(
dict
):
"""Institution record from inspirehep.net version v2.
Schema for institution is documented here:
https://inspire-schemas.readthedocs.io/en/latest/schemas/
Args:
recjson (dict):
institute data (MarcJSON)
"""
...
...
@@ -72,44 +26,51 @@ class RecordInst(Record):
if
not
is_institute
(
recjson
):
raise
RecordException
(
MSG_INVALID_RECORD
)
Record
.
__init__
(
self
,
recjson
)
super
().
__init__
(
recjson
)
def
debug
(
self
):
pprint
(
self
)
def
host
(
self
):
return
"inspirehep.net"
def
id
(
self
):
return
self
[
"control_number"
]
def
future
_identifier
(
self
):
"""
Future
identifier of the institute.
def
legacy
_identifier
(
self
):
"""
Legacy
identifier of the institute.
Returns:
str:
the
future inspirehep identifier
or an empty string
if t
he identifier
is not defined.
the
legacy identifier used by inspirehep
or an empty string
if
i
t is not defined.
"""
return
self
.
_
get
(
"
corporate_note"
,
"future_identifier
"
)
return
self
.
get
(
"
legacy_ICN"
,
"
"
)
def
identifier
(
self
):
"""Identifier of the institute.
Returns:
str:
the current inspirehep identifier (2015) or an empty
string if it is not defined.
* the current identifier used by inspirehep (> 2014)
* an empty string if it is not defined
* the first identifier when there is more than one
"""
return
self
.
_
get
(
"
corporate_note"
,
"identifier"
)
return
self
.
get
(
"
ICN"
,
[
""
])[
0
]
def
name
(
self
):
""" Name of the institute.
Returns:
str:
- the name of the institute.
- an empty string when it is not defined.
* the name of the institute.
* an empty string when it is not defined
* the first name when there is more than one
"""
value
=
self
.
_get
(
"corporate_name"
,
"name"
)
if
isinstance
(
value
,
list
)
and
len
(
value
)
==
1
:
return
value
[
0
]
return
""
return
self
.
get
(
"institution_hierarchy"
,
[{}])[
0
].
get
(
"name"
,
""
)
def
rex
(
self
):
""" Regular expression to search authors affiliate to the institute.
...
...
@@ -117,20 +78,18 @@ class RecordInst(Record):
Returns:
str:
the regular expression to search author affiliate
to the institute in the store ``cds.cern.ch``
or
``inspirehep.net``.
to the institute in the store ``cds.cern.ch``
,
``old.inspirehep.net`` as well as
``inspirehep.net``.
"""
li
=
[
self
.
identifier
(),
self
.
future_identifier
(),
self
.
name
()]
# protection against empty string
# happen when one the identifier / full name is not defined
# trigger by inspirehep.net/record/903100 where name is not defined.
if
""
in
li
:
li
.
sort
()
li
.
reverse
()
idx
=
li
.
index
(
""
)
return
r
"|"
.
join
(
li
[:
idx
])
else
:
return
r
"|"
.
join
(
li
)
lst
=
self
.
get
(
"ICN"
,
[])
legacy_ICN
=
self
.
get
(
"legacy_ICN"
,
""
)
if
len
(
legacy_ICN
)
>
0
:
lst
.
append
(
legacy_ICN
)
name
=
self
.
name
()
if
len
(
name
)
>
0
:
lst
.
append
(
name
)
return
r
"|"
.
join
(
lst
)
tests/basis/test_10_RecordInst.py
→
tests/basis/test_10_Record
Hep
Inst.py
View file @
b030e0f1
"""test_10_RecordInst
"""test_10_Record
Hep
Inst
Test specific methods of the RecordInst class for CPPM
"""
import
pytest
from
store_tools
import
load_record
,
RecordException
,
RecordInst
from
store_tools
import
load_record
,
RecordException
,
Record
Hep
Inst
@
pytest
.
fixture
(
scope
=
"module"
)
...
...
@@ -16,17 +16,9 @@ def record():
def
test_exception_host_ins_10001
():
record
=
load_record
(
"cds.cern.ch"
,
1951625
)
with
pytest
.
raises
(
RecordException
):
RecordInst
(
record
)
Record
Hep
Inst
(
record
)
def
test_exception_record_ins_10002
():
record
=
load_record
(
"inspirehep.net"
,
1319638
,
shelf
=
"institutions"
)
with
pytest
.
raises
(
RecordException
):
RecordInst
(
record
)
def
test_future_identifer_ins_10003
(
record
):
assert
record
.
future_identifier
()
==
u
'CPPM, Marseille'
# v1.4.0 remove obsolete test_exception_record_ins_10002
def
test_id_ins_10004
(
record
):
...
...
@@ -34,15 +26,18 @@ def test_id_ins_10004(record):
def
test_identifier_ins_10005
(
record
):
assert
record
.
identifier
()
==
u
'Marseille, CPPM'
assert
record
.
identifier
()
==
'CPPM, Marseille'
def
test_legacy_identifer_ins_10003
(
record
):
assert
record
.
legacy_identifier
()
==
'Marseille, CPPM'
def
test_name_ins_10006
(
record
):
assert
record
.
name
()
==
\
u
'Centre de Physique des Particules de Marseille (CPPM)'
assert
record
.
name
()
==
'Centre de Physique des Particules de Marseille'
def
test_rex_ins_10007
(
record
):
assert
record
.
rex
()
==
\
r
"Marseille
, CPPM|CPPM,
Marseille|"
\
"Centre de Physique des Particules de Marseille
(CPPM)
"
r
"
CPPM,
Marseille
|
Marseille
, CPPM
|"
\
"Centre de Physique des Particules de Marseille"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment