Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit b030e0f1 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Add RecordHepInst and test_10_RecordHepInst remove RecordInst and friends

parent 8764280b
......@@ -28,7 +28,9 @@ from .factory import build_record, build_store
from .inveniostore import InvenioStore
from .record import Record
from .recordconf import RecordConf
from .recordinst import RecordInst
from .recordhepconf import RecordHepConf
from .recordhepinst import RecordHepInst
from .recordheppubli import RecordHepPubli
from .recordpubli import RecordPubli
from .recordthesis import RecordThesis
......@@ -63,7 +65,8 @@ def load_record(host, record_id, shelf=None):
Returns:
Record:
either RecordPubli, RecordInst, RecordConf of RecordThesis.
either RecordConf, RecordHepConf, RecodHepPubli, RecordHepInst,
RecordHepThesis, RecordHepInst, RecordPubli or RecordThesis
Raises:
CdsException::
......
......@@ -19,13 +19,17 @@ from .inspirehepstore import InspirehepStore, SHELFS
from .recordconf import RecordConf
from .recordhepconf import RecordHepConf
from .recordheppubli import RecordHepPubli
from .recordinst import RecordInst
from .recordhepinst import RecordHepInst
from .recordpubli import RecordPubli
from .recordthesis import RecordThesis
REX_T = "\$\$t([\w, ]+)"
REX_U = "\$\$u([\w, ]+)"
MSG_ERROR_INST = \
"get institutions information from inspirehep.net " \
"by using the shelf 'institutions'"
MSG_FAIL_UPCAST = "Failed to upcast the JSON record"
......@@ -107,8 +111,8 @@ def add_conference_data(recjson):
# extract the conference url
# - information is in confjson[url]
# - in most of the case it is a dictionary
# - when it is a list take the first entry which is for the
# home page while the second one is for the proceeding (cds 2270940)
# - when it is a list take the first entry which is for the home
# page while the second one is for the proceeding (cds 2270940)
# - in other case the url is not defined (cds 2258914)
confurl = ""
if "url" in confjson:
......@@ -166,24 +170,25 @@ def build_record(recjson, shelf=None):
Note:
this tool is working for JSON object coming from cds.cern.ch,
old.inspirehep.net as well as inspirehep. In the latter case
the shelf has to be defined.
old.inspirehep.net as well as inspirehep.net. In the latter case
the shelf shall be defined.
Args:
recjson (dict):
record data in a JSON format.
shelf (str):
section of the inspirehep store containing records.
section of the inspirehep.hep store containing records.
Possible values are ``literature``, ``conferences``
and ``institutions``
Return
Record:
either RecordConf, RecodHepPubli, RecordInst, RecodPubli
or RecordThesis
either RecordConf, RecordHepConf, RecodHepPubli, RecordHepInst,
RecordHepThesis, RecordHepInst, RecordPubli or RecordThesis
Raises:
RecordException
"""
# ........................................................................
......@@ -196,7 +201,7 @@ def build_record(recjson, shelf=None):
upcast_record = RecordConf(recjson)
elif is_institute(recjson):
upcast_record = RecordInst(recjson)
raise RecordException(MSG_ERROR_INST)
elif is_thesis(recjson):
upcast_record = RecordThesis(recjson)
......@@ -208,8 +213,8 @@ def build_record(recjson, shelf=None):
if is_conference(recjson) and shelf == "literature":
upcast_record = RecordHepConf(recjson)
# elif shelf == "institutions":
# upcast_record = RecordHepInst(recjson)
elif shelf == "institutions":
upcast_record = RecordHepInst(recjson)
# elif is_thesis(recjson) and shelf == "literature":
# upcast_record = RecordHepThesis(recjson)
......
""" store_tools.recordinst
""" store_tools.recordhepinst
"""
from .base import is_institute
from .exception import RecordException
from .record import Record
from pprint import pprint
MSG_INVALID_HOST = "Invalid record host"
MSG_INVALID_RECORD = "Invalid record, it is not describing an institute"
class RecordInst(Record):
"""The record describing an institute.
Fields are:
+-----------------------------+----------------------------------+
| field (inspirehep) | subfield |
+=============================+==================================+
| FIXME_OAI | id, set |
+-----------------------------+----------------------------------+
| administrative_history | |
+-----------------------------+----------------------------------+
| authority_institution | institution |
+-----------------------------+----------------------------------+
| cataloguer_info | creation_date, modification_date |
+-----------------------------+----------------------------------+
| collection | primary, secondary |
+-----------------------------+----------------------------------+
| corporate_name | name |
+-----------------------------+----------------------------------+
| creation_date | |
+-----------------------------+----------------------------------+
| files | |
+-----------------------------+----------------------------------+
| filetypes | |
+-----------------------------+----------------------------------+
| number_of_citations | |
+-----------------------------+----------------------------------+
| number_of_comments | |
+-----------------------------+----------------------------------+
| number_of_reviews | |
+-----------------------------+----------------------------------+
| persistent_identifiers_keys | |
+-----------------------------+----------------------------------+
| recid | |
+-----------------------------+----------------------------------+
| source_of_description | note |
+-----------------------------+----------------------------------+
| system_control_number | institute, value |
+-----------------------------+----------------------------------+
| url | |
+-----------------------------+----------------------------------+
| version_id | |
+-----------------------------+----------------------------------+
One field is added by limbra:
+-----------------------------+----------------------------------+
| field (limbra) | subfield |
+=============================+==================================+
| corporate_note | identifier, futur_identifier, |
| | name |
+-----------------------------+----------------------------------+
class RecordHepInst(dict):
"""Institution record from inspirehep.net version v2.
Schema for institution is documented here:
https://inspire-schemas.readthedocs.io/en/latest/schemas/
Args:
recjson (dict):
institute data (MarcJSON)
"""
......@@ -72,44 +26,51 @@ class RecordInst(Record):
if not is_institute(recjson):
raise RecordException(MSG_INVALID_RECORD)
Record.__init__(self, recjson)
super().__init__(recjson)
def debug(self):
pprint(self)
def host(self):
return "inspirehep.net"
def id(self):
return self["control_number"]
def future_identifier(self):
"""Future identifier of the institute.
def legacy_identifier(self):
"""Legacy identifier of the institute.
Returns:
str:
the future inspirehep identifier or an empty string
if the identifier is not defined.
the legacy identifier used by inspirehep or an empty string
if it is not defined.
"""
return self._get("corporate_note", "future_identifier")
return self.get("legacy_ICN", "")
def identifier(self):
"""Identifier of the institute.
Returns:
str:
the current inspirehep identifier (2015) or an empty
string if it is not defined.
* the current identifier used by inspirehep (> 2014)
* an empty string if it is not defined
* the first identifier when there is more than one
"""
return self._get("corporate_note", "identifier")
return self.get("ICN", [""])[0]
def name(self):
""" Name of the institute.
Returns:
str:
- the name of the institute.
- an empty string when it is not defined.
* the name of the institute.
* an empty string when it is not defined
* the first name when there is more than one
"""
value = self._get("corporate_name", "name")
if isinstance(value, list) and len(value) == 1:
return value[0]
return ""
return self.get("institution_hierarchy", [{}])[0].get("name", "")
def rex(self):
""" Regular expression to search authors affiliate to the institute.
......@@ -117,20 +78,18 @@ class RecordInst(Record):
Returns:
str:
the regular expression to search author affiliate
to the institute in the store ``cds.cern.ch`` or
``inspirehep.net``.
to the institute in the store ``cds.cern.ch``,
``old.inspirehep.net`` as well as ``inspirehep.net``.
"""
li = [self.identifier(), self.future_identifier(), self.name()]
# protection against empty string
# happen when one the identifier / full name is not defined
# trigger by inspirehep.net/record/903100 where name is not defined.
if "" in li:
li.sort()
li.reverse()
idx = li.index("")
return r"|".join(li[:idx])
else:
return r"|".join(li)
lst = self.get("ICN", [])
legacy_ICN = self.get("legacy_ICN", "")
if len(legacy_ICN) > 0:
lst.append(legacy_ICN)
name = self.name()
if len(name) > 0:
lst.append(name)
return r"|".join(lst)
"""test_10_RecordInst
"""test_10_RecordHepInst
Test specific methods of the RecordInst class for CPPM
"""
import pytest
from store_tools import load_record, RecordException, RecordInst
from store_tools import load_record, RecordException, RecordHepInst
@pytest.fixture(scope="module")
......@@ -16,17 +16,9 @@ def record():
def test_exception_host_ins_10001():
record = load_record("cds.cern.ch", 1951625)
with pytest.raises(RecordException):
RecordInst(record)
RecordHepInst(record)
def test_exception_record_ins_10002():
record = load_record("inspirehep.net", 1319638, shelf="institutions")
with pytest.raises(RecordException):
RecordInst(record)
def test_future_identifer_ins_10003(record):
assert record.future_identifier() == u'CPPM, Marseille'
# v1.4.0 remove obsolete test_exception_record_ins_10002
def test_id_ins_10004(record):
......@@ -34,15 +26,18 @@ def test_id_ins_10004(record):
def test_identifier_ins_10005(record):
assert record.identifier() == u'Marseille, CPPM'
assert record.identifier() == 'CPPM, Marseille'
def test_legacy_identifer_ins_10003(record):
assert record.legacy_identifier() == 'Marseille, CPPM'
def test_name_ins_10006(record):
assert record.name() == \
u'Centre de Physique des Particules de Marseille (CPPM)'
assert record.name() == 'Centre de Physique des Particules de Marseille'
def test_rex_ins_10007(record):
assert record.rex() == \
r"Marseille, CPPM|CPPM, Marseille|" \
"Centre de Physique des Particules de Marseille (CPPM)"
r"CPPM, Marseille|Marseille, CPPM|" \
"Centre de Physique des Particules de Marseille"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment