Commit 4a70f4dd authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Migrate haverster run and run_all.

parent 945dcead
......@@ -12,11 +12,11 @@ from harvest_tools import (build_harvester_tool,
DRY_RUN,
search_synonym,
ToolException)
from invenio_tools import (load_record,
from invenio_tools import (CdsException,
load_record,
OAI_URL,
RecordConf,
RecordThesis,
REG_INT)
RecordThesis)
from plugin_dbui import (inline_alert,
Selector,
to_formPanel,
......@@ -116,7 +116,7 @@ def edit_insert():
# With plugin_dbui 0.7.1 it is possible to enter decimal value
# for the record id (e.g by typing 1503,03 in the field)
#
if REG_INT.match(request.vars.Edit_insert_selectorRecord_id) is None:
if not request.vars.Edit_insert_selectorRecord_id.isalnum():
msg = T("The <i>record id</i> is not well formed.")
msg += "<br>"
msg += T("Use only digit character, no comma, no dot...")
......@@ -341,7 +341,7 @@ def insert_marcxml():
def run():
"""Run an harvester.
Scan the cds/invenio stores to find articles published during
Scan the cds/invenio stores to find publication during
a given range of years and for a given team/project.
Insert them in the database if they don't exist.
......@@ -388,10 +388,15 @@ def run():
logs.extend(tool.logs)
except ToolException as e:
return T(str(e))
log = tool.logs[-1]
msg = "<h4>Error on record %s (%s)</h4>" % (log.url, log.collection)
msg += T(str(e))
return msg
except BaseException as e:
msg = "<br><br><hr/>"
except Exception as e:
log = tool.logs[-1]
msg = "<h4>Error on record %s (%s)</h4>" % (log.url, log.collection)
msg += "<hr/>"
msg += CODE(traceback.format_exc()).xml()
msg += "<hr/>"
return msg
......@@ -451,10 +456,15 @@ def run_all():
logs.extend(tool.logs)
except ToolException as e:
return T(str(e))
log = tool.logs[-1]
msg = "<h4>Error on record %s (%s)</h4>" % (log.url, log.collection)
msg += T(str(e))
return msg
except BaseException as e:
msg = "<br><br><hr/>"
except Exception as e:
log = tool.logs[-1]
msg = "<h4>Error on record %s (%s)</h4>" % (log.url, log.collection)
msg += "<hr/>"
msg += CODE(traceback.format_exc()).xml()
msg += "<hr/>"
return msg
......
......@@ -7,7 +7,6 @@ import traceback
from .base import (MSG_FIX_ORIGIN,
MSG_IN_DB,
search_synonym,
ToolException)
from .checkandfix import CheckAndFix
from gluon.storage import Storage
......@@ -629,6 +628,7 @@ class Automaton(object):
collections (unicode):
list of collection to be interrogated.
Collections are separated by a comma.
Raises:
StoreException:
......
......@@ -19,7 +19,7 @@ def harvester_messages():
return messages()
def test_lhcb_acl(harvester_messages):
def test_lhcb(harvester_messages):
"""Harvest LHCb article for a given year.
This test is useful to:
......@@ -60,3 +60,46 @@ def test_lhcb_acl(harvester_messages):
msgs = set([el.txt for el in tool.logs])
assert msgs.issubset(harvester_messages)
def test_atlas(harvester_messages):
"""Harvest ATLAs article for a given year.
This test is useful to:
* debug an harvester
* profile its performance to see where the time is spent.
* compare different implementation to measure improvements.
* ...
"""
# These parameter only make sense if you are inserting record in database
# Select the current year in order to test different case
db = current.db
id_team = 3 # ATLAS
id_project = 2 # ATLAS
id_category = 2 # ACL
year = current.request.now.year
# build the harvester
tool = build_harvester_tool(
db,
id_team,
id_project,
"articles",
id_category,
year_start=str(year),
year_end="",
dry_run=True,
debug=True)
assert isinstance(tool, Articles)
# run the harvester
tool.process_url("cds.cern.ch", "ATLAS Papers")
# analyse the log
# Number of article cannot be check since it evolve within a year
# Only test that there are no unexpected messages
msgs = set([el.txt for el in tool.logs])
assert msgs.issubset(harvester_messages)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment