Commit fa256098 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

The method Automaton._is_record_in_db returns the record id instead of a bool.

parent f13b2ffe
......@@ -185,14 +185,15 @@ class Automaton(object):
Keyword Args:
host (unicode): the store. possible values are ``cds.cern.ch`` or
``inspirehep.net``. To be used with *rec_id*.
rec_id (int): the record identifier.
rec_id (int): the record identifier in the store
oai_url (unicode): the URL of the record in the store.
Note:
Either use *host* and *rec_id* or *oai_url*
Returns:
bool: ``True`` when a record if found, ``False`` otherwise.
int: the id of the record in the database when a record is found,
0 otherwise.
Raises:
ValueError: when keyword arguments are not defined properly.
......@@ -214,10 +215,11 @@ class Automaton(object):
setrows = db(query)
if setrows.count() == 0:
return False
return 0
# one record found
columns = [db.publications.id_categories,
columns = [db.publications.id,
db.publications.id_categories,
db.publications.title,
db.publications.year]
publication = setrows.select(*columns).first()
......@@ -227,7 +229,7 @@ class Automaton(object):
# However, keep the record if it is not the case.
# This is required to transform a preprint into article
if publication.id_categories != harvester.id_categories:
return False
return 0
# log
self.logs.append(Msg(harvester=harvester,
......@@ -237,7 +239,7 @@ class Automaton(object):
self.logs[-1].idle(MSG_IN_DB, publication.year)
return True
return publication.id
def _search_parameters(self, collection):
"""Build the keywords to steer the URL search in invenio store.
......@@ -554,9 +556,10 @@ class Automaton(object):
print "process xml record"
# alias
is_record_in_db = self._is_record_in_db
db = self.db
check_record = self.check_record
insert_record = self.insert_record
is_record_in_db = self._is_record_in_db
logs = self.logs
# NOTE
......@@ -572,11 +575,16 @@ class Automaton(object):
# reject the record using the secondary OAI
# require to cover the case:
# - san store A. Only OAI_A is defined.
# - scan store A. Only OAI_A is defined.
# - later scan store B. OAI_B and OAI_A are found.
ok = is_record_in_db(record.title(),
oai_url=record.secondary_oai_url())
if ok:
rec_id = is_record_in_db(record.title(),
oai_url=record.secondary_oai_url())
if rec_id:
# update the oai_url for later uses.
if not self.dry_run:
tp = (record.secondary_oai_url(), record.primary_oai_url())
oai_url = "%s, %s" % tp
db.publications[rec_id] = dict(origin=oai_url)
continue
# start the log for the record
......
......@@ -20,13 +20,14 @@ def test_is_record_in_db():
atm = Automaton(db, 7, 8, u"articles", 2)
flag = atm._is_record_in_db("Luminosity measurement",
host="cds.cern.ch",
rec_id=1389907, )
rec_id_1 = atm._is_record_in_db("Luminosity measurement",
host="cds.cern.ch",
rec_id=1389907, )
assert flag == True
assert rec_id_1 != 0
flag = atm._is_record_in_db("Luminosity measurement",
oai_url = "http://inspirehep.net/record/939619" )
rec_id_2 = atm._is_record_in_db("Luminosity measurement",
oai_url = "http://inspirehep.net/record/939619" )
assert flag == True
\ No newline at end of file
assert rec_id_2 != 0
assert rec_id_1 == rec_id_2
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment