Commit 76a3ae99 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Migrate RecordInst.

parent d94fe829
......@@ -2,6 +2,8 @@
""" invenio_tools.factory
"""
import requests
from base import (is_conference,
is_institute,
is_thesis,
......@@ -20,6 +22,44 @@ from recordpubli import RecordPubli
from recordthesis import RecordThesis
def add_affiliation_keys(recjson):
"""A the affiliation keys to the record describing an institute:
* The XML record contains the affiliation keys used by inspirehep.net.
They are located in the field 110__u and 110__t (future).
* The JSON record does not contains this information.
* This tool add the affiliation keys to the JSON record.
They are located:
+----------------+------------------------------------+
| field (limbra) | subfield |
+----------------+------------------------------------+
| corporate_note | identifier, futur_identifier, name |
+----------------+------------------------------------+
Args
recjson (dict): record data (MarcJSON)
"""
url = "http://inspirehep.net/record/%i" % recjson[u"recid"]
rep = requests.get(url, params={"ot": "110", "of": "txt"})
# decode the string: '000recid 110__ $$aXXX$$bYYY$$tZZZ\n'
txt = rep.content.replace("\n", "")
li = txt[txt.find("$"):].split("$$")
di = {}
for el in li:
if len(el) == 0:
continue
di[el[0:1]] = el[1:]
recjson[u"corporate_note"] = {u"identifier": di["u"],
u"future_identifier": di["t"]}
def add_conference_data(recjson):
"""Add the conference data to the recjson.
......@@ -83,6 +123,7 @@ def add_conference_data(recjson):
# get the data
if conf_id is not None:
conf_id = (conf_id if isinstance(conf_id, int) else int(conf_id))
confjson = get_conference_data(host, conf_id=conf_id)
else:
......@@ -117,6 +158,7 @@ def build_record(recjson):
upcast_record = RecordConf(recjson)
elif is_institute(recjson):
add_affiliation_keys(recjson)
upcast_record = RecordInst(recjson)
elif is_thesis(recjson):
......@@ -135,7 +177,7 @@ def get_conference_data(host, conf_id=None, key=None):
host (unicode):
possible values are ``cds.cern.ch`` or ``inspirehep.net``.
conf_id (unicode):
conf_id (int):
the conference identifier in the store.
This is the preferred way.
......@@ -159,7 +201,7 @@ def get_conference_data(host, conf_id=None, key=None):
#
if conf_id is not None:
recjson = cds.get_record(conf_id)
if str(recjson["recid"]) != conf_id:
if recjson["recid"] != conf_id:
raise CdsException(MSG_INV_CONF)
return recjson
......
""" invenio_tools.recordinst
"""
<<<<<<< HEAD
from .base import is_institute
from .exception import RecordException
from .record import Record
=======
from base import is_institute, REG_OAI
from exception import RecordException
from record import Record
>>>>>>> Migrate RecordInst.
MSG_INVALID_ARG = "Invalid argument record"
......@@ -12,70 +18,99 @@ MSG_INVALID_RECORD = "Invalid record, it is not describing an institute"
class RecordInst(Record):
"""The MARC record describing an institute.
The relation between methods and MARC fields are the following::
------------------------+-------------+
| | INSPIREHEP |
------------------------+-------------+
| institute identifier | 110 u |
| future institute id | 110 t |
| name | 110 b |
| type of record | 980 a |
------------------------+-------------+
"""The record describing an institute.
Fields are::
+-----------------------------+----------------------------------+
| field (inspirehep) | subfield |
+-----------------------------+----------------------------------+
| FIXME_OAI | id, set |
| administrative_history | |
| authority_institution | institution |
| cataloguer_info | creation_date, modification_date |
| collection | primary, secondary |
| corporate_name | name, subordinate_unit |
| creation_date | |
| files | |
| filetypes | |
| number_of_citations | |
| number_of_comments | |
| number_of_reviews | |
| persistent_identifiers_keys | |
| recid | |
| source_of_description | note |
| system_control_number | institute, value |
| url | |
| version_id | |
+-----------------------------+----------------------------------+
One field is added by limbra:
+-----------------------------+----------------------------------+
| field (limbra) | subfield |
+-----------------------------+----------------------------------+
| corporate_note | identifier, futur_identifier, |
| | name |
+-----------------------------+----------------------------------+
Args:
record (Record):
recjson (dict):
institute data (MarcJSON)
"""
def __init__(self, record):
def __init__(self, recjson):
if not isinstance(record, Record):
raise RecordException(MSG_INVALID_ARG)
host = REG_OAI.match(recjson[u"FIXME_OAI"][u"id"]).group(1)
if host != "inspirehep.net":
raise RecordException(MSG_INVALID_HOST)
if not is_institute(record):
if not is_institute(recjson):
raise RecordException(MSG_INVALID_RECORD)
if record.host() != "inspirehep.net":
raise RecordException(MSG_INVALID_HOST)
Record.__init__(self, record)
Record.__init__(self, recjson)
def future_identifier(self):
"""Future identifier of the institute.
Returns:
str: the future inspirehep identifier or an empty string
unicode:
the future inspirehep identifier or an empty string
if the identifier is not defined.
"""
return self._get("110", "t")
return self._get(u"corporate_note", u"future_identifier")
def identifier(self):
"""Identifier of the institute.
Returns:
str: the current inspirehep identifier (2015) or an empty
unicode:
the current inspirehep identifier (2015) or an empty
string if it is not defined.
"""
return self._get("110", "u")
return self._get(u"corporate_note", u"identifier")
def name(self):
""" Name of the institute.
Returns:
str: the name of the institute or an empty string if
it is not defined.
unicode:
- the name of the institute.
- an empty string when it is not defined.
"""
return self._get("110", "a")
value = self._get(u"corporate_name", u"subordinate_unit")
if isinstance(value, list) and len(value) == 1:
return value[0]
return u""
def rex(self):
""" Regular expression to search authors affiliate to the institute.
Returns:
str: the regular expression to search author affiliate
unicode:
the regular expression to search author affiliate
to the institute in the store ``cds.cern.ch`` or
``inspirehep.net``.
......
......@@ -10,7 +10,8 @@ from invenio_tools.base import (is_conference,
is_institute,
is_thesis)
from invenio_tools.factory import (add_conference_data,
from invenio_tools.factory import (add_affiliation_keys,
add_conference_data,
build_record,
get_conference_data)
......@@ -24,104 +25,7 @@ from invenio_tools.recordthesis import RecordThesis
# ............................................................................
#
# Section to test introspection and instantiation
#
def test_conference_cds():
store = InvenioStore("cds.cern.ch")
recjson = store.get_record(1411352)
assert is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordConf)
def test_conference_inspirehep():
store = InvenioStore("inspirehep.net")
recjson = store.get_record(1276938)
assert is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordConf)
def test_institute():
"""CPPM"""
store = InvenioStore("inspirehep.net")
recjson = store.get_record(902989)
assert not is_conference(recjson)
assert is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordInst)
def test_publi_cds():
"""Precision luminosity measurements at LHCb"""
store = InvenioStore("cds.cern.ch")
recjson = store.get_record(1951625)
assert not is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordPubli)
def test_publi_inspirehep():
"""Precision luminosity measurements at LHCb"""
store = InvenioStore("inspirehep.net")
recjson = store.get_record(1319638)
assert not is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordPubli)
def test_talk_cds():
store = InvenioStore("cds.cern.ch")
recjson = store.get_record(2239092)
assert is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordConf)
def test_thesis_cds():
store = InvenioStore("cds.cern.ch")
recjson = store.get_record(1632177)
assert not is_conference(recjson)
assert not is_institute(recjson)
assert is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordThesis)
# ............................................................................
#
# Section to test tool to get and add conference data
# Conference proceeding and talk
#
def test_get_conference_data():
""" check the different approach to get the conference data
......@@ -196,10 +100,13 @@ def test_add_conference_data():
add_conference_data(recjson)
assert "meeting_name" in recjson
assert "meeting" in recjson
assert "meeting_note" in recjson
assert recjson["meeting_name"][0]["coference_code"] == "rome20101206"
assert recjson["meeting"]["url"] == "http://www.roma1.infn.it/discrete10"
assert recjson["meeting"]["recid"] == 1181092
assert recjson["meeting_note"]["url"] == \
"http://www.roma1.infn.it/discrete10"
assert recjson["meeting_note"]["recid"] == 1181092
# ........................................................................
#
......@@ -210,7 +117,127 @@ def test_add_conference_data():
add_conference_data(recjson)
assert "meeting_name" in recjson
assert "meeting" in recjson
assert "meeting_note" in recjson
assert recjson["meeting_name"][0]["coference_code"] == "C10-12-06"
assert recjson["meeting"]["url"] == "http://www.roma1.infn.it/discrete10"
assert recjson["meeting"]["recid"] == 980401
assert recjson["meeting_note"]["url"] == \
"http://www.roma1.infn.it/discrete10"
assert recjson["meeting_note"]["recid"] == 980401
def test_conference_cds():
store = InvenioStore("cds.cern.ch")
recjson = store.get_record(1411352)
assert is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordConf)
def test_conference_inspirehep():
store = InvenioStore("inspirehep.net")
recjson = store.get_record(1276938)
assert is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordConf)
def test_talk_cds():
store = InvenioStore("cds.cern.ch")
recjson = store.get_record(2239092)
assert is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordConf)
# ............................................................................
#
# Institute
#
def test_add_affiliation_keys():
"""CPPM"""
store = InvenioStore("inspirehep.net")
recjson = store.get_record(902989)
add_affiliation_keys(recjson)
assert u"corporate_note" in recjson
assert recjson[u"corporate_note"][u"identifier"] == "Marseille, CPPM"
assert recjson[u"corporate_note"][u"futur_identifier"] == "CPPM, Marseille"
def test_institute():
"""CPPM"""
store = InvenioStore("inspirehep.net")
recjson = store.get_record(902989)
assert not is_conference(recjson)
assert is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordInst)
# ............................................................................
#
# Article, ...
#
def test_publi_cds():
"""Precision luminosity measurements at LHCb"""
store = InvenioStore("cds.cern.ch")
recjson = store.get_record(1951625)
assert not is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordPubli)
def test_publi_inspirehep():
"""Precision luminosity measurements at LHCb"""
store = InvenioStore("inspirehep.net")
recjson = store.get_record(1319638)
assert not is_conference(recjson)
assert not is_institute(recjson)
assert not is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordPubli)
# ............................................................................
#
# Thesis
#
def test_thesis_cds():
store = InvenioStore("cds.cern.ch")
recjson = store.get_record(1632177)
assert not is_conference(recjson)
assert not is_institute(recjson)
assert is_thesis(recjson)
record = build_record(recjson)
assert isinstance(record, RecordThesis)
# -*- coding: utf-8 -*-
"""test_06_RecordThesis
Test specific methods of the RecordInst class for CPPM
"""
import pytest
from invenio_tools import load_record
@pytest.fixture(scope="module")
def record():
return load_record("inspirehep.net", 902989)
def test_future_identifer(record):
assert record.future_identifier() == u'CPPM, Marseille'
def test_id(record):
assert record.id() == 902989
def test_identifier(record):
assert record.identifier() == u'Marseille, CPPM'
def test_name(record):
assert record.name() == \
u'Centre de Physique des Particules de Marseille (CPPM)'
def test_rex(record):
assert record.rex() == \
r"Marseille, CPPM|CPPM, Marseille|" \
"Centre de Physique des Particules de Marseille (CPPM)"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment