Commit ce871bd1 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Reject existing db record in PublicationTool.process_url.

parent fcb3974e
......@@ -119,6 +119,40 @@ class PublicationsTool(object):
self.__par = None
self.__reference = None
def _is_in_db(self, rec_id, title):
"""Return C{True} if the record is already in the database.
The search is based on the origin field.
A new log entry is created
@type rec_id: int
@param rec_id: record identifier
@type title: str
@param title: title of the collection
@rtype: bool
"""
db = self.db
url = OAI_URL % (self.harvester.host, rec_id)
db_id = get_id(db.publications, origin=url)
if db_id is None:
return False
publication = db.publications[db_id]
self.logs.append(Msg(harvester=self.harvester,
collection=title,
record_id=rec_id,
title=publication.title))
self.logs[-1].idle(MSG_IN_DB, publication.year)
return True
def _search_parameters(self, collection):
"""Build the keywords to steer the URL search in invenio store.
The main parameter is the collection and the date range defined
......@@ -493,10 +527,13 @@ class PublicationsTool(object):
print "\nprocessing record", rec_id
try:
if self._is_in_db(rec_id, title):
continue
xml = store.get_record(rec_id)
self.decode_xml(xml)
except BaseException as e:
except Exception as e:
print traceback.format_exc()
url = OAI_URL % (host, rec_id)
self.logs.append(Msg(harvester=self.harvester,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment