Commit f21f3064 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Improved strategy to deal with record deleted in store and replaced by a new one.

parent d8770bcb
......@@ -5,7 +5,13 @@
import re
from base import is_conference, is_institute, is_thesis, MSG_NO_CONF
from base import (is_conference,
is_institute,
is_thesis,
MSG_NO_CONF,
OAI_URL,
REG_OAI)
from exception import Marc12Exception
from inveniostore import InvenioStore
from iterrecord import IterRecord
......@@ -14,6 +20,8 @@ from recordinst import RecordInst
from recordpubli import RecordPubli
from recordthesis import RecordThesis
MSG_DECODING_FAILED = "Record decoding failed."
REG_CONF = re.compile("^C\d+-\d+-\d+(\.\d+)?$")
......@@ -68,6 +76,32 @@ class Marc12(object):
if "8564" in conference:
record[u"8564"] = conference[u"8564"]
def __call__(self, xml, **kwargs):
"""
Note:
* Allow the syntax ``Marc12()(xml)``.
* Keep for backward compatibility.
* Prefer the method :meth:`.records`.
Args:
xml (unicode): the XML string with the publication contents.
Keyword Args:
filter_func (reference): a function to eliminate records
which don't satisfy functions criteria. The argument of the
function is a Record while the return value is a boolean.
func (reference): a function applied to each surviving record.
The argument of the function is a Record.
It can be used to polish the record content.
Returns:
list: list of :clas:`.Record`.
"""
return self.records(xml, **kwargs)
def _get_conference(self, host, conf_id, key):
"""Get the conference data associated to the record.
The conference is identified by its id or key.
......@@ -82,7 +116,7 @@ class Marc12(object):
Record: The conference record
Raises:
CheckException: when the conference is not found.
Marc12Exception: when the conference is not found.
"""
cds = InvenioStore(host)
......@@ -114,31 +148,58 @@ class Marc12(object):
raise Marc12Exception(MSG_NO_CONF)
def __call__(self, xml, **kwargs):
"""
Note:
def _recover_deleted_record(self, record):
"""Recover a deleted record.
* Allow the syntax ``Marc12()(xml)``.
* Keep for backward compatibility.
* Prefer the method :meth:`.records`.
From time to time a record is deleted and replace by a new one.
In that case the record looks like::
Args:
xml (unicode): the XML string with the publication contents.
{
u'0248_': {'a': u'oai:cds.cern.ch:1366561'},
u'001': u'1366561',
u'980': {'c': u'DELETED'},
u'970': {'d': u'1366710'}
}
Keyword Args:
filter_func (reference): a function to eliminate records
which don't satisfy functions criteria. The argument of the
function is a Record while the return value is a boolean.
The method replace the old record by the new one, by using the oai URL.
It is build using the field 0248_ and 970.
func (reference): a function applied to each surviving record.
The argument of the function is a Record.
It can be used to polish the record content.
Note:
It might happen that a record is deleted and not replace by a new.
In that case the CheckException is raised.
Args:
record (Record): the record to be check
Returns:
list: list of :clas:`.Record`.
Record: the input record or the new one.
Raises:
Marc12Exception: when the record is deleted and
not replaced by a new one.
"""
return self.records(xml, **kwargs)
is_deleted = u"980" in record \
and 'c' in record[u"980"] and record[u"980"]['c'] == "DELETED"
if not is_deleted:
return record
is_replaced = \
u"970" in record and 'd' in record[u"970"] \
and u"0248_" in record and 'a' in record[u"0248_"]
if not is_replaced:
raise Marc12Exception(MSG_DECODING_FAILED)
match = REG_OAI.match(record[u"0248_"]['a'])
if match:
cds = InvenioStore(match.group(1))
xml = cds.get_record(record[u"970"]['d'])
new_record = IterRecord(xml).next()
return new_record
raise Marc12Exception(MSG_DECODING_FAILED)
def iterrecords(self, xml):
"""Return an iterator on the embedded records.
......@@ -198,6 +259,8 @@ class Marc12(object):
for record in IterRecord(xml):
record = self._recover_deleted_record(record)
if is_conference(record):
upcast_record = RecordConf(record)
self._add_conference_data(upcast_record)
......
......@@ -200,24 +200,6 @@ class Record(dict):
field, subfield = u"909CO", "o"
else:
# recover record which have a new address after a delete operation
# {
# u'0248_': {'a': u'oai:cds.cern.ch:1366561'},
# u'001': u'1366561',
# u'980': {'c': u'DELETED'},
# u'970': {'d': u'1366710'}
# }
is_deleted = \
u"0248_" in self and 'a' in self[u"0248_"] \
and u"980" in self and 'c' in self[u"980"] \
and self[u"980"]['c'] == u"DELETED" \
and u"970" in self and 'd' in self[u"970"]
if is_deleted:
old_oai = self[u"0248_"]['a']
new_oai = old_oai[:old_oai.rfind(":")+1] + self[u"970"]['d']
return new_oai
return u""
# standard case
......@@ -238,13 +220,25 @@ class Record(dict):
def primary_oai_url(self):
"""The Open Archive Initiative URL for the primary OAI.
Note:
A record can be deleted and replaced by a new one.
In that case the OAI is not changed but the record has
a new *id* and new *URL* which is return by this method.
Returns:
unicode: the pattern of the string is ``http://host/record/id``.
The string is empty when it is not defined or when the OAI
is not well formed.
"""
return self._oai_url(self.primary_oai())
oai = self.primary_oai()
rec_id = self.id()
if oai.endswith(rec_id):
return self._oai_url(self.primary_oai())
else:
return OAI_URL % (self.host(), rec_id)
def secondary_oai(self):
"""The secondary OAI identifier.
......
......@@ -15,6 +15,7 @@ import pytest
from invenio_tools import load_record
from invenio_tools.exception import Marc12Exception
@pytest.fixture(scope="module")
......@@ -54,5 +55,15 @@ def test_oai_url(cds_record, ins_record):
assert ins_record.oai_url() == u"http://inspirehep.net/record/1319638, http://cds.cern.ch/record/1951625"
def test_new_oai_after_delete():
"""The record cds 1366561 has been deleted and replaced by 1366710.
The OAI is not change, but the record has a new id and a new URL.
"""
record = load_record("cds.cern.ch", 1366561)
assert record.primary_oai() == "oai:cds.cern.ch:1366710"
\ No newline at end of file
assert record.primary_oai() == "oai:cds.cern.ch:1366561"
assert record.oai() == "oai:cds.cern.ch:1366561, oai:inspirehep.net:917526"
assert record.oai_url() == "http://cds.cern.ch/record/1366710, http://inspirehep.net/record/917526"
# the next record has been deleted and not replace
with pytest.raises(Marc12Exception):
load_record("cds.cern.ch", 1361194)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment