Commit 76a3ae99 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Migrate RecordInst.

parent d94fe829
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
""" invenio_tools.factory """ invenio_tools.factory
""" """
import requests
from base import (is_conference, from base import (is_conference,
is_institute, is_institute,
is_thesis, is_thesis,
...@@ -20,6 +22,44 @@ from recordpubli import RecordPubli ...@@ -20,6 +22,44 @@ from recordpubli import RecordPubli
from recordthesis import RecordThesis from recordthesis import RecordThesis
def add_affiliation_keys(recjson):
"""A the affiliation keys to the record describing an institute:
* The XML record contains the affiliation keys used by inspirehep.net.
They are located in the field 110__u and 110__t (future).
* The JSON record does not contains this information.
* This tool add the affiliation keys to the JSON record.
They are located:
+----------------+------------------------------------+
| field (limbra) | subfield |
+----------------+------------------------------------+
| corporate_note | identifier, futur_identifier, name |
+----------------+------------------------------------+
Args
recjson (dict): record data (MarcJSON)
"""
url = "http://inspirehep.net/record/%i" % recjson[u"recid"]
rep = requests.get(url, params={"ot": "110", "of": "txt"})
# decode the string: '000recid 110__ $$aXXX$$bYYY$$tZZZ\n'
txt = rep.content.replace("\n", "")
li = txt[txt.find("$"):].split("$$")
di = {}
for el in li:
if len(el) == 0:
continue
di[el[0:1]] = el[1:]
recjson[u"corporate_note"] = {u"identifier": di["u"],
u"future_identifier": di["t"]}
def add_conference_data(recjson): def add_conference_data(recjson):
"""Add the conference data to the recjson. """Add the conference data to the recjson.
...@@ -83,6 +123,7 @@ def add_conference_data(recjson): ...@@ -83,6 +123,7 @@ def add_conference_data(recjson):
# get the data # get the data
if conf_id is not None: if conf_id is not None:
conf_id = (conf_id if isinstance(conf_id, int) else int(conf_id))
confjson = get_conference_data(host, conf_id=conf_id) confjson = get_conference_data(host, conf_id=conf_id)
else: else:
...@@ -117,6 +158,7 @@ def build_record(recjson): ...@@ -117,6 +158,7 @@ def build_record(recjson):
upcast_record = RecordConf(recjson) upcast_record = RecordConf(recjson)
elif is_institute(recjson): elif is_institute(recjson):
add_affiliation_keys(recjson)
upcast_record = RecordInst(recjson) upcast_record = RecordInst(recjson)
elif is_thesis(recjson): elif is_thesis(recjson):
...@@ -135,7 +177,7 @@ def get_conference_data(host, conf_id=None, key=None): ...@@ -135,7 +177,7 @@ def get_conference_data(host, conf_id=None, key=None):
host (unicode): host (unicode):
possible values are ``cds.cern.ch`` or ``inspirehep.net``. possible values are ``cds.cern.ch`` or ``inspirehep.net``.
conf_id (unicode): conf_id (int):
the conference identifier in the store. the conference identifier in the store.
This is the preferred way. This is the preferred way.
...@@ -159,7 +201,7 @@ def get_conference_data(host, conf_id=None, key=None): ...@@ -159,7 +201,7 @@ def get_conference_data(host, conf_id=None, key=None):
# #
if conf_id is not None: if conf_id is not None:
recjson = cds.get_record(conf_id) recjson = cds.get_record(conf_id)
if str(recjson["recid"]) != conf_id: if recjson["recid"] != conf_id:
raise CdsException(MSG_INV_CONF) raise CdsException(MSG_INV_CONF)
return recjson return recjson
......
""" invenio_tools.recordinst """ invenio_tools.recordinst
""" """
<<<<<<< HEAD
from .base import is_institute from .base import is_institute
from .exception import RecordException from .exception import RecordException
from .record import Record from .record import Record
=======
from base import is_institute, REG_OAI
from exception import RecordException
from record import Record
>>>>>>> Migrate RecordInst.
MSG_INVALID_ARG = "Invalid argument record" MSG_INVALID_ARG = "Invalid argument record"
...@@ -12,70 +18,99 @@ MSG_INVALID_RECORD = "Invalid record, it is not describing an institute" ...@@ -12,70 +18,99 @@ MSG_INVALID_RECORD = "Invalid record, it is not describing an institute"
class RecordInst(Record): class RecordInst(Record):
"""The MARC record describing an institute. """The record describing an institute.
The relation between methods and MARC fields are the following:: Fields are::
------------------------+-------------+ +-----------------------------+----------------------------------+
| | INSPIREHEP | | field (inspirehep) | subfield |
------------------------+-------------+ +-----------------------------+----------------------------------+
| institute identifier | 110 u | | FIXME_OAI | id, set |
| future institute id | 110 t | | administrative_history | |
| name | 110 b | | authority_institution | institution |
| type of record | 980 a | | cataloguer_info | creation_date, modification_date |
------------------------+-------------+ | collection | primary, secondary |
| corporate_name | name, subordinate_unit |
| creation_date | |
| files | |
| filetypes | |
| number_of_citations | |
| number_of_comments | |
| number_of_reviews | |
| persistent_identifiers_keys | |
| recid | |
| source_of_description | note |
| system_control_number | institute, value |
| url | |
| version_id | |
+-----------------------------+----------------------------------+
One field is added by limbra:
+-----------------------------+----------------------------------+
| field (limbra) | subfield |
+-----------------------------+----------------------------------+
| corporate_note | identifier, futur_identifier, |
| | name |
+-----------------------------+----------------------------------+
Args: Args:
record (Record): recjson (dict):
institute data (MarcJSON)
""" """
def __init__(self, record): def __init__(self, recjson):
if not isinstance(record, Record): host = REG_OAI.match(recjson[u"FIXME_OAI"][u"id"]).group(1)
raise RecordException(MSG_INVALID_ARG) if host != "inspirehep.net":
raise RecordException(MSG_INVALID_HOST)
if not is_institute(record): if not is_institute(recjson):
raise RecordException(MSG_INVALID_RECORD) raise RecordException(MSG_INVALID_RECORD)
if record.host() != "inspirehep.net": Record.__init__(self, recjson)
raise RecordException(MSG_INVALID_HOST)
Record.__init__(self, record)
def future_identifier(self): def future_identifier(self):
"""Future identifier of the institute. """Future identifier of the institute.
Returns: Returns:
str: the future inspirehep identifier or an empty string unicode:
the future inspirehep identifier or an empty string
if the identifier is not defined. if the identifier is not defined.
""" """
return self._get("110", "t") return self._get(u"corporate_note", u"future_identifier")
def identifier(self): def identifier(self):
"""Identifier of the institute. """Identifier of the institute.
Returns: Returns:
str: the current inspirehep identifier (2015) or an empty unicode:
the current inspirehep identifier (2015) or an empty
string if it is not defined. string if it is not defined.
""" """
return self._get("110", "u") return self._get(u"corporate_note", u"identifier")
def name(self): def name(self):
""" Name of the institute. """ Name of the institute.
Returns: Returns:
str: the name of the institute or an empty string if unicode:
it is not defined. - the name of the institute.
- an empty string when it is not defined.
""" """
return self._get("110", "a") value = self._get(u"corporate_name", u"subordinate_unit")
if isinstance(value, list) and len(value) == 1:
return value[0]
return u""
def rex(self): def rex(self):
""" Regular expression to search authors affiliate to the institute. """ Regular expression to search authors affiliate to the institute.
Returns: Returns:
str: the regular expression to search author affiliate unicode:
the regular expression to search author affiliate
to the institute in the store ``cds.cern.ch`` or to the institute in the store ``cds.cern.ch`` or
``inspirehep.net``. ``inspirehep.net``.
......
...@@ -10,7 +10,8 @@ from invenio_tools.base import (is_conference, ...@@ -10,7 +10,8 @@ from invenio_tools.base import (is_conference,
is_institute, is_institute,
is_thesis) is_thesis)
from invenio_tools.factory import (add_conference_data, from invenio_tools.factory import (add_affiliation_keys,
add_conference_data,
build_record, build_record,
get_conference_data) get_conference_data)
...@@ -24,104 +25,7 @@ from invenio_tools.recordthesis import RecordThesis ...@@ -24,104 +25,7 @@ from invenio_tools.recordthesis import RecordThesis
# ............................................................................ # ............................................................................
# #
# Section to test introspection and instantiation # Conference proceeding and talk
#
def test_conference_cds():
store = InvenioStore("cds.cern.ch")
recjson = store.get_record(1411352)
assert is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordConf)
def test_conference_inspirehep():
store = InvenioStore("inspirehep.net")
recjson = store.get_record(1276938)
assert is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordConf)
def test_institute():
"""CPPM"""
store = InvenioStore("inspirehep.net")
recjson = store.get_record(902989)
assert not is_conference(recjson)
assert is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordInst)
def test_publi_cds():
"""Precision luminosity measurements at LHCb"""
store = InvenioStore("cds.cern.ch")
recjson = store.get_record(1951625)
assert not is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordPubli)
def test_publi_inspirehep():
"""Precision luminosity measurements at LHCb"""
store = InvenioStore("inspirehep.net")
recjson = store.get_record(1319638)
assert not is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordPubli)
def test_talk_cds():
store = InvenioStore("cds.cern.ch")
recjson = store.get_record(2239092)
assert is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordConf)
def test_thesis_cds():
store = InvenioStore("cds.cern.ch")
recjson = store.get_record(1632177)
assert not is_conference(recjson)
assert not is_institute(recjson)
assert is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordThesis)
# ............................................................................
#
# Section to test tool to get and add conference data
# #
def test_get_conference_data(): def test_get_conference_data():
""" check the different approach to get the conference data """ check the different approach to get the conference data
...@@ -196,10 +100,13 @@ def test_add_conference_data(): ...@@ -196,10 +100,13 @@ def test_add_conference_data():
add_conference_data(recjson) add_conference_data(recjson)
assert "meeting_name" in recjson assert "meeting_name" in recjson
assert "meeting" in recjson assert "meeting_note" in recjson
assert recjson["meeting_name"][0]["coference_code"] == "rome20101206" assert recjson["meeting_name"][0]["coference_code"] == "rome20101206"
assert recjson["meeting"]["url"] == "http://www.roma1.infn.it/discrete10"
assert recjson["meeting"]["recid"] == 1181092 assert recjson["meeting_note"]["url"] == \
"http://www.roma1.infn.it/discrete10"
assert recjson["meeting_note"]["recid"] == 1181092
# ........................................................................ # ........................................................................
# #
...@@ -210,7 +117,127 @@ def test_add_conference_data(): ...@@ -210,7 +117,127 @@ def test_add_conference_data():
add_conference_data(recjson) add_conference_data(recjson)
assert "meeting_name" in recjson assert "meeting_name" in recjson
assert "meeting" in recjson assert "meeting_note" in recjson
assert recjson["meeting_name"][0]["coference_code"] == "C10-12-06" assert recjson["meeting_name"][0]["coference_code"] == "C10-12-06"
assert recjson["meeting"]["url"] == "http://www.roma1.infn.it/discrete10"
assert recjson["meeting"]["recid"] == 980401 assert recjson["meeting_note"]["url"] == \
"http://www.roma1.infn.it/discrete10"
assert recjson["meeting_note"]["recid"] == 980401
def test_conference_cds():
store = InvenioStore("cds.cern.ch")
recjson = store.get_record(1411352)
assert is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordConf)
def test_conference_inspirehep():
store = InvenioStore("inspirehep.net")
recjson = store.get_record(1276938)
assert is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordConf)
def test_talk_cds():
store = InvenioStore("cds.cern.ch")
recjson = store.get_record(2239092)
assert is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordConf)
# ............................................................................
#
# Institute
#
def test_add_affiliation_keys():
"""CPPM"""
store = InvenioStore("inspirehep.net")
recjson = store.get_record(902989)
add_affiliation_keys(recjson)
assert u"corporate_note" in recjson
assert recjson[u"corporate_note"][u"identifier"] == "Marseille, CPPM"
assert recjson[u"corporate_note"][u"futur_identifier"] == "CPPM, Marseille"
def test_institute():
"""CPPM"""
store = InvenioStore("inspirehep.net")
recjson = store.get_record(902989)
assert not is_conference(recjson)
assert is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordInst)
# ............................................................................
#
# Article, ...
#
def test_publi_cds():
"""Precision luminosity measurements at LHCb"""
store = InvenioStore("cds.cern.ch")
recjson = store.get_record(1951625)
assert not is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordPubli)
def test_publi_inspirehep():
"""Precision luminosity measurements at LHCb"""
store = InvenioStore("inspirehep.net")
recjson = store.get_record(1319638)
assert not is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordPubli)
# ............................................................................
#
# Thesis
#
def test_thesis_cds():
store = InvenioStore("cds.cern.ch")
recjson = store.get_record(1632177)
assert not is_conference(recjson)
assert not is_institute(recjson)
assert is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordThesis)
# -*- coding: utf-8 -*-
"""test_06_RecordThesis
Test specific methods of the RecordInst class for CPPM
"""
import pytest
from invenio_tools import load_record
@pytest.fixture(scope="module")
def record():
return load_record("inspirehep.net", 902989)
def test_future_identifer(record):
assert record.future_identifier() == u'CPPM, Marseille'
def test_id(record):
assert record.id() == 902989
def test_identifier(record):
assert record.identifier() == u'Marseille, CPPM'
def test_name(record):
assert record.name() == \
u'Centre de Physique des Particules de Marseille (CPPM)'
def test_rex(record):
assert record.rex() == \
r"Marseille, CPPM|CPPM, Marseille|" \
"Centre de Physique des Particules de Marseille (CPPM)"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment