Commit cf8a091a authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Migrate Record.

parent 66288cd2
......@@ -8,33 +8,67 @@ from .base import OAI, OAI_URL, REG_OAI
class Record(dict):
"""The base class for MARC record.
A dictionary with the following structure::
"""The base class for JSON record.
It is a dictionary with the following structure::
record[field][subfield] = value
record[field][subfield] = [val1, val2, ....]
record[field] = [dict1(subfield1=..., subfield2=...),
dict2(subfield1=..., subfield2=...), ...]
In the `MARC <http://www.loc.gov/marc/>`_ standard, the ``field``
is a string containing at least three digit while the ``subfield`` is
a letter. The type of the ``field`` is str and ``subfield`` is string.
for an article, typical field ares (cds 1951625, ins 1319638)::
+-----------------------------+-----------------------------+
| field (cds) | field (inspirehep) |
+-----------------------------+-----------------------------+
| | FIXME_OAI |
| abstract | abstract |
| accelerator_experiment | accelerator_experiment |
| agency_code | |
| authors | authors |
| base | |
| collection | collection |
| comment | comment |
| copyright_status | |
| corporate_name | corporate_name |
| creation_date | creation_date |
| doi | doi |
| email_message | |
| filenames | filenames |
| files | files |
| filetypes | filetypes |
| imprint | imprint |
| keywords | keywords |
| language | |
| license | license |
| number_of_authors | number_of_authors |
| number_of_citations | number_of_citations |
| number_of_comments | number_of_comments |
| number_of_reviews | number_of_reviews |
| oai | |
| other_report_number | |
| persistent_identifiers_keys | persistent_identifiers_keys |
| physical_description | physical_description |
| prepublication | prepublication |
| primary_report_number | primary_report_number |
| publication_info | publication_info |
| recid | recid |
| | reference |
| report_number | |
| | source_of_acquisition |
| status_week | |
| subject | subject |
| system_control_number | system_control_number |
| thesaurus_terms | thesaurus_terms |
| title | title |
| | title_additional |
| url | |
| version_id | version_id |
+-----------------------------+-----------------------------+
The class comes with a collection of methods to extract the record
information masking the ``field`` and the ``subfield`` codification.
The relation between methods and MARC fields are the following::
+---------------+--------+----------+
| | CDS | INSPIREP |
+---------------+--------+----------+
| id | 001 | |
| primary oai | 0248 a | 909CO o |
| secondary oai | 035 a | 035 a |
+---------------+--------+----------+
"""
def __init__(self, *args):
......@@ -49,19 +83,24 @@ class Record(dict):
self.my_authors = ""
def _get(self, field, subfield, force_list=False):
"""Get the value associated to the key C{field} and C{subfield}.
"""Get the value associated to the ``field`` and ``subfield``.
Args:
fiestrcode): typical values are ``"001"``, ``"700"``,
``"909CO"``, ....
field (unicode):
name of the field, *e.g.* ``authors``
subfield (str): typical values are ``"a"``, ``"b``", ....
subfield (unicode):
name of the subfield, *e.g.* ``full_name``
force_list (bool): always return a *list* when ``True``.
force_list (bool):
always return a *list* when ``True``.
Returns:
str or list: empty string / list when not defined.
unicode:
value or an empty string when not defined.
list:
list of values or an empty list when not defined
"""
val = ""
......@@ -78,21 +117,21 @@ class Record(dict):
val.append(el[subfield])
if force_list and not isinstance(val, list):
if val:
val = [val]
else:
val = []
val = ([val] if len(val) > 0 else [])
return val
def _oai_url(self, value):
@staticmethod
def _oai_url(value):
"""Build the Open Archive Initiative URL.
Args:
value (str): OAI identifier, *e*g* ``oai:host:id``
value (unicode):
OAI identifier, *e.g.* ``oai:host:id``
Returns:
str: the pattern of the string is ``http://host/record/id``.
unicode:
the pattern of the string is ``http://host/record/id``.
The string is empty when it is not defined or when the value
is not well formed.
......@@ -113,7 +152,8 @@ class Record(dict):
"""The store housing the record.
Returns:
str: ``inspirehep.net`` or ``cds.cern.ch`` or an empty string
unicode:
``inspirehep.net`` or ``cds.cern.ch`` or an empty string
when not defined.
"""
......@@ -137,16 +177,18 @@ class Record(dict):
"""The id of the record in the store.
Returns:
str: the unique id of the record in the store
int:
the unique id of the record in the store
"""
return self["001"]
return self[u"recid"]
def oai(self):
"""The Open Archive Initiative identifier(s).
Returns:
str: the primary and secondary OAI identifier are separated
unicode:
the primary and secondary OAI identifier are separated
by a comma. The pattern of the identifier is ``oai:host:id`` or
an empty string when it is not defined.
......@@ -184,21 +226,20 @@ class Record(dict):
def primary_oai(self):
"""The primary Open Archive Initiative identifier.
The primary OAI identifier corresponds to the record of the store.
It contains the value return by the method :meth:`.id`.
The primary OAI identifier matches the record identifier.
Returns:
str: the pattern of the string is ``oai:host:id``.
unicode:
the pattern of the string is ``oai:host:id``.
It is an empty string when not defined
"""
# the location of the OAI information depends on the store
# cds: (248, a), inspirehep: (909C0, o)
if "0248" in self:
field, subfield = "0248", "a"
if u"oai" in self:
field, subfield = u"oai", u"value"
elif "909CO" in self:
field, subfield = "909CO", "o"
elif u"FIXME_OAI" in self:
field, subfield = u"FIXME_OAI", u"id"
else:
return ""
......@@ -207,7 +248,7 @@ class Record(dict):
value = self._get(field, subfield)
# in some case OAI is a list
# select the OAI corresponding to the current ID.
# select the OAI corresponding to the record identifier.
if isinstance(value, list):
myid = self.id()
for el in value:
......@@ -227,13 +268,14 @@ class Record(dict):
a new *id* and new *URL* which is return by this method.
Returns:
str: the pattern of the string is ``http://host/record/id``.
unicode:
the pattern of the string is ``http://host/record/id``.
The string is empty when it is not defined or when the OAI
is not well formed.
"""
oai = self.primary_oai()
rec_id = self.id()
rec_id = str(self.id())
if oai.endswith(rec_id):
return self._oai_url(self.primary_oai())
......@@ -248,34 +290,25 @@ class Record(dict):
corresponds to the record in the other store, *inspirehep.net*.
Returns:
str: the pattern of the string is ``oai:host:id``.
unicode:
the pattern of the string is ``oai:host:id``.
It is an empty string when not defined
"""
host = self.host()
if host == "cds.cern.ch":
invenio_key = "inspire"
secondary_host = "inspirehep.net"
data = self[u"system_control_number"]
data = (data if isinstance(data, list) else [data])
elif host == "inspirehep.net":
invenio_key = "cds"
secondary_host = "cds.cern.ch"
# data is a list of dictionary
# keys are `institute`, `value` or `canceled`
for di in data:
institute = di[u"institute"]
if institute == "CDS":
if u"value" in di:
return OAI % (u"cds.cern.ch", di[u"value"])
else:
return ""
if "035" not in self:
return ""
values = self["035"]
if isinstance(values, list):
for di in values:
if "9" in di and "a" in di and di["9"].lower() == invenio_key:
return OAI % (secondary_host, di["a"])
elif isinstance(values, dict) and "9" in values and "a" in values:
if values["9"].lower() == invenio_key:
return OAI % (secondary_host, values["a"])
elif institute == "Inspire":
if u"value" in di:
return OAI % (u"inspirehep.net", di[u"value"])
return ""
......@@ -283,12 +316,10 @@ class Record(dict):
"""The Open Archive Initiative URL for the secondary OAI.
Returns:
str: the pattern of the string is ``http://host/record/id``.
unicode:
the pattern of the string is ``http://host/record/id``.
The string is empty when it is not defined or when the OAI
is not well formed.
"""
return self._oai_url(self.secondary_oai())
def sysno(self):
return self._get("970", "a")
# -*- coding: utf-8 -*-
"""test_03_Record
Test all methods of the Record class for a given article:
http://cds.cern.ch/record/1951625.
(same as http://inspirehep.net/record/1319638)
Precision luminosity measurements at LHCb,
J. Instrum. 9 (2014) P12005
arXiv:1410.0149
704 authors
"""
import pytest
from invenio_tools import load_record
@pytest.fixture(scope="module")
def reccds():
return load_record("cds.cern.ch", 1951625)
@pytest.fixture(scope="module")
def recins():
return load_record("inspirehep.net", 1319638)
def test_get(reccds):
assert reccds._get("subject", "source") == "arXiv"
assert reccds._get("subject", "source", force_list=True) == ["arXiv"]
authors = reccds._get("authors", "full_name")
assert isinstance(authors, list)
assert len(authors) == reccds["number_of_authors"]
def test__oai_url(reccds):
oai = reccds["oai"]["value"]
assert reccds._oai_url(oai) == "http://cds.cern.ch/record/1951625"
def test_id(reccds):
assert reccds.id() == 1951625
def test_primary_oai(reccds, recins):
assert reccds.primary_oai() == "oai:cds.cern.ch:1951625"
assert recins.primary_oai() == "oai:inspirehep.net:1319638"
def test_secondary_oai(reccds, recins):
assert reccds.secondary_oai() == "oai:inspirehep.net:1319638"
assert recins.secondary_oai() == "oai:cds.cern.ch:1951625"
def test_oai(reccds, recins):
assert reccds.oai() == \
"oai:cds.cern.ch:1951625, oai:inspirehep.net:1319638"
assert recins.oai() == \
"oai:inspirehep.net:1319638, oai:cds.cern.ch:1951625"
def test_primary_oai_url(reccds):
assert reccds.primary_oai_url() == "http://cds.cern.ch/record/1951625"
def test_secondary_oai_url(reccds):
assert reccds.secondary_oai_url() == "http://inspirehep.net/record/1319638"
def test_oai_url(reccds):
assert reccds.oai_url() == \
"http://cds.cern.ch/record/1951625, " \
"http://inspirehep.net/record/1319638"
def test_host(reccds, recins):
assert reccds.host() == "cds.cern.ch"
assert recins.host() == "inspirehep.net"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment