Commit d8770bcb authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

The OAI identifier contains the link to cds and inspirehep.

parent fa26d23a
......@@ -14,6 +14,10 @@ invenio_tools.record.Record
~Record.id
~Record.oai
~Record.oai_url
~Record.primary_oai
~Record.primary_oai_url
~Record.secondary_oai
~Record.secondary_oai_url
~Record.sysno
......
invenio_tools.record.Record.primary_oai
=======================================
.. currentmodule:: invenio_tools.record
.. automethod:: Record.primary_oai
\ No newline at end of file
invenio_tools.record.Record.primary_oai_url
===========================================
.. currentmodule:: invenio_tools.record
.. automethod:: Record.primary_oai_url
\ No newline at end of file
invenio_tools.record.Record.secondary_oai
=========================================
.. currentmodule:: invenio_tools.record
.. automethod:: Record.secondary_oai
\ No newline at end of file
invenio_tools.record.Record.secondary_oai_url
=============================================
.. currentmodule:: invenio_tools.record
.. automethod:: Record.secondary_oai_url
\ No newline at end of file
......@@ -4,8 +4,8 @@
"""
import re
ARXIV = "arXiv"
ARXIV_PDF = "http://arxiv.org/pdf/"
ARXIV = u"arXiv"
ARXIV_PDF = u"http://arxiv.org/pdf/"
MSG_NO_CONF = "Reject no conference information"
MSG_NO_COUNTRY = "Reject invalid country"
......@@ -13,7 +13,8 @@ MSG_NO_PUBLISHER = "Reject invalid publisher"
MSG_NO_THESIS = "Reject no thesis information"
MSG_WELL_FORMED_COLLABORATION = "Reject collaboration is not well formed"
OAI_URL = "http://%s/record/%s"
OAI = u"oai:%s:%s"
OAI_URL = u"http://%s/record/%s"
REG_ARXIV_NUMBER = re.compile("\d+\.\d+")
......
......@@ -5,7 +5,8 @@
import pprint
from base import OAI_URL, REG_OAI
from base import OAI, OAI_URL, REG_OAI
from __builtin__ import isinstance
class Record(dict):
......@@ -28,12 +29,13 @@ class Record(dict):
The relation between methods and MARC fields are the following::
+-----+--------+----------+
| | CDS | INSPIREP |
+-----+--------+----------+
| id | 001 | |
| oai | 0248 a | 909CO o |
+-----+--------+----------+
+---------------+--------+----------+
| | CDS | INSPIREP |
+---------------+--------+----------+
| id | 001 | |
| primary oai | 0248 a | 909CO o |
| secondary oai | 035 a | 035 a |
+---------------+--------+----------+
"""
def __init__(self, *args):
......@@ -82,6 +84,24 @@ class Record(dict):
return val
def _oai_url(self, value):
"""Build the Open Archive Initiative URL.
Args:
value (unicode): OAI identifier, *e*g* ``oai:host:id``
Returns:
unicode: the pattern of the string is ``http://host/record/id``.
The string is empty when it is not defined or when the value
is not well formed.
"""
match = REG_OAI.match(value)
if match:
return OAI_URL % (match.group(1), match.group(2))
return u""
def debug(self):
"""Print the record structure on the standard output.
......@@ -100,7 +120,7 @@ class Record(dict):
if self.__host is not None:
return self.__host
val = self.oai()
val = self.primary_oai()
if not val:
self.__host = u''
return self.__host
......@@ -122,14 +142,56 @@ class Record(dict):
return self[u"001"]
def oai(self):
"""The Open Archive Initiative identifier.
"""The Open Archive Initiative identifier(s).
Returns:
unicode: the primary and secondary OAI identifier are separated
by a comma. The pattern of the identifier is ``oai:host:id`` or
an empty string when it is not defined.
"""
oai_1 = self.primary_oai()
if not oai_1:
return u""
oai_2 = self.secondary_oai()
if not oai_2:
return oai_1
return u"%s, %s" % (oai_1, oai_2)
def oai_url(self):
"""The Open Archive Initiative identifier URL(s).
Returns:
unicode: the primary and secondary URLs are separated by a comma.
The pattern of the URL is ``http://host/record/id`` or
an empty string when it is not defined or when the OAI is
not well formed.
"""
oai_url_1 = self.primary_oai_url()
if not oai_url_1:
return u""
oai_url_2 = self.secondary_oai_url()
if not oai_url_2:
return oai_url_1
return u"%s, %s" % (oai_url_1, oai_url_2)
def primary_oai(self):
"""The primary Open Archive Initiative identifier.
The primary OAI identifier corresponds to the record of the store.
It contains the value return by the method :meth:`.id`.
Returns:
unicode: the pattern of the string is ``oai:host:record_id``.
unicode: the pattern of the string is ``oai:host:id``.
It is an empty string when not defined
"""
# the location of the oai information depends on the store
# the location of the OAI information depends on the store
# cds: (248, a), inspirehep: (909C0, o)
if u"0248" in self:
field, subfield = u"0248", "a"
......@@ -138,13 +200,31 @@ class Record(dict):
field, subfield = u"909CO", "o"
else:
# recover record which have a new address after a delete operation
# {
# u'0248_': {'a': u'oai:cds.cern.ch:1366561'},
# u'001': u'1366561',
# u'980': {'c': u'DELETED'},
# u'970': {'d': u'1366710'}
# }
is_deleted = \
u"0248_" in self and 'a' in self[u"0248_"] \
and u"980" in self and 'c' in self[u"980"] \
and self[u"980"]['c'] == u"DELETED" \
and u"970" in self and 'd' in self[u"970"]
if is_deleted:
old_oai = self[u"0248_"]['a']
new_oai = old_oai[:old_oai.rfind(":")+1] + self[u"970"]['d']
return new_oai
return u""
# standard case
value = self._get(field, subfield)
# in some case OAI is a list, e.g when two records were entered
# for the same entry but one deleted.
# Select the OAI corresponding to the current ID.
# in some case OAI is a list
# select the OAI corresponding to the current ID.
if isinstance(value, list):
myid = self.id()
for el in value:
......@@ -155,23 +235,65 @@ class Record(dict):
return value
def oai_url(self):
"""The Open Archive Initiative URL.
def primary_oai_url(self):
"""The Open Archive Initiative URL for the primary OAI.
Returns:
unicode: the pattern of the string is
``"http://inspirehep.net/record/123456"``.
The string is empty when it is not defined.
unicode: the pattern of the string is ``http://host/record/id``.
The string is empty when it is not defined or when the OAI
is not well formed.
"""
return self._oai_url(self.primary_oai())
def secondary_oai(self):
"""The secondary OAI identifier.
If the current store is *cds.cern.ch*, the secondary OAI identifier
corresponds to the record in the other store, *inspirehep.net*.
Returns:
unicode: the pattern of the string is ``oai:host:id``.
It is an empty string when not defined
"""
val = self.oai()
match = REG_OAI.match(val)
host = self.host()
if host == u"cds.cern.ch":
invenio_key = u"inspire"
secondary_host = u"inspirehep.net"
if match:
return OAI_URL % (match.group(1), match.group(2))
elif host == u"inspirehep.net":
invenio_key = u"cds"
secondary_host = u"cds.cern.ch"
else:
return u""
if u"035" not in self:
return u""
values = self[u"035"]
if isinstance(values, list):
for di in values:
if "9" in di and "a" in di and di["9"].lower() == invenio_key:
return OAI % (secondary_host, di["a"])
elif isinstance(values, dict) and "9" in values and "a" in values:
if values["9"].lower() == invenio_key:
return OAI % (secondary_host, values["a"])
return u""
def secondary_oai_url(self):
"""The Open Archive Initiative URL for the secondary OAI.
Returns:
unicode: the pattern of the string is ``http://host/record/id``.
The string is empty when it is not defined or when the OAI
is not well formed.
"""
return self._oai_url(self.secondary_oai())
def sysno(self):
return self._get(u"970", "a")
......@@ -86,8 +86,8 @@ def test_is_article(record):
def test_oai(record):
assert record.oai() == "oai:cds.cern.ch:1951625"
assert record.oai_url() == "http://cds.cern.ch/record/1951625"
assert record.oai() == "oai:cds.cern.ch:1951625, oai:inspirehep.net:1319638"
assert record.oai_url() == "http://cds.cern.ch/record/1951625, http://inspirehep.net/record/1319638"
def test_paper_reference(record):
......
......@@ -76,8 +76,8 @@ def test_is_article(record):
def test_oai(record):
assert record.oai() == "oai:inspirehep.net:1319638"
assert record.oai_url() == "http://inspirehep.net/record/1319638"
assert record.oai() == "oai:inspirehep.net:1319638, oai:cds.cern.ch:1951625"
assert record.oai_url() == "http://inspirehep.net/record/1319638, http://cds.cern.ch/record/1951625"
def test_paper_reference(record):
......
......@@ -94,8 +94,8 @@ def test_is_proceeding(record):
def test_oai(record):
assert record.oai() == "oai:cds.cern.ch:1411352"
assert record.oai_url() == "http://cds.cern.ch/record/1411352"
assert record.oai() == "oai:cds.cern.ch:1411352, oai:inspirehep.net:1089237"
assert record.oai_url() == "http://cds.cern.ch/record/1411352, http://inspirehep.net/record/1089237"
def test_paper_reference(record):
......
# -*- coding: utf-8 -*-
""" Test OAI methods.
http://inspirehep.net/record/1319638
(same as http://cds.cern.ch/record/1951625)
Precision luminosity measurements at LHCb,
J. Instrum. 9 (2014) P12005
arXiv:1410.0149
704 authors
"""
import pytest
from invenio_tools import load_record
@pytest.fixture(scope="module")
def cds_record():
return load_record('cds.cern.ch', 1951625)
@pytest.fixture(scope="module")
def ins_record():
return load_record('inspirehep.net', 1319638)
def test_primary_oai(cds_record, ins_record):
assert cds_record.primary_oai() == u"oai:cds.cern.ch:1951625"
assert ins_record.primary_oai() == u"oai:inspirehep.net:1319638"
def test_host(cds_record, ins_record):
assert cds_record.host() == u"cds.cern.ch"
assert ins_record.host() == u"inspirehep.net"
def test_secondary_oai(cds_record, ins_record):
assert cds_record.secondary_oai() == u"oai:inspirehep.net:1319638"
assert ins_record.secondary_oai() == u"oai:cds.cern.ch:1951625"
def test_oai(cds_record, ins_record):
assert cds_record.oai() == u"oai:cds.cern.ch:1951625, oai:inspirehep.net:1319638"
assert ins_record.oai() == u"oai:inspirehep.net:1319638, oai:cds.cern.ch:1951625"
def test_primary_oai_url(cds_record, ins_record):
assert cds_record.primary_oai_url() == u"http://cds.cern.ch/record/1951625"
assert ins_record.primary_oai_url() == u"http://inspirehep.net/record/1319638"
def test_secondary_oai_url(cds_record, ins_record):
assert cds_record.secondary_oai_url() == u"http://inspirehep.net/record/1319638"
assert ins_record.secondary_oai_url() == u"http://cds.cern.ch/record/1951625"
def test_oai_url(cds_record, ins_record):
assert cds_record.oai_url() == u"http://cds.cern.ch/record/1951625, http://inspirehep.net/record/1319638"
assert ins_record.oai_url() == u"http://inspirehep.net/record/1319638, http://cds.cern.ch/record/1951625"
def test_new_oai_after_delete():
record = load_record("cds.cern.ch", 1366561)
assert record.primary_oai() == "oai:cds.cern.ch:1366710"
\ No newline at end of file
......@@ -68,8 +68,8 @@ def test_is_phd(record):
def test_oai(record):
assert record.oai() == "oai:cds.cern.ch:1632177"
assert record.oai_url() == "http://cds.cern.ch/record/1632177"
assert record.oai() == "oai:cds.cern.ch:1632177, oai:inspirehep.net:1296381"
assert record.oai_url() == "http://cds.cern.ch/record/1632177, http://inspirehep.net/record/1296381"
def test_paper_reference(record):
......
......@@ -29,7 +29,7 @@ def test_protection_find_authors_by_affiliation():
def test_protection_oai():
"""['oai:cds.cern.ch:1513204', 'oai:cds.cern.ch:1512766']"""
record = load_record('cds.cern.ch', 1513204)
assert record.oai() == 'oai:cds.cern.ch:1513204'
assert record.oai() == 'oai:cds.cern.ch:1513204, oai:inspirehep.net:1216886'
assert record.host() == "cds.cern.ch"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment