Commit 4a70f4dd authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Migrate haverster run and run_all.

parent 945dcead
...@@ -12,11 +12,11 @@ from harvest_tools import (build_harvester_tool, ...@@ -12,11 +12,11 @@ from harvest_tools import (build_harvester_tool,
DRY_RUN, DRY_RUN,
search_synonym, search_synonym,
ToolException) ToolException)
from invenio_tools import (load_record, from invenio_tools import (CdsException,
load_record,
OAI_URL, OAI_URL,
RecordConf, RecordConf,
RecordThesis, RecordThesis)
REG_INT)
from plugin_dbui import (inline_alert, from plugin_dbui import (inline_alert,
Selector, Selector,
to_formPanel, to_formPanel,
...@@ -116,7 +116,7 @@ def edit_insert(): ...@@ -116,7 +116,7 @@ def edit_insert():
# With plugin_dbui 0.7.1 it is possible to enter decimal value # With plugin_dbui 0.7.1 it is possible to enter decimal value
# for the record id (e.g by typing 1503,03 in the field) # for the record id (e.g by typing 1503,03 in the field)
# #
if REG_INT.match(request.vars.Edit_insert_selectorRecord_id) is None: if not request.vars.Edit_insert_selectorRecord_id.isalnum():
msg = T("The <i>record id</i> is not well formed.") msg = T("The <i>record id</i> is not well formed.")
msg += "<br>" msg += "<br>"
msg += T("Use only digit character, no comma, no dot...") msg += T("Use only digit character, no comma, no dot...")
...@@ -341,7 +341,7 @@ def insert_marcxml(): ...@@ -341,7 +341,7 @@ def insert_marcxml():
def run(): def run():
"""Run an harvester. """Run an harvester.
Scan the cds/invenio stores to find articles published during Scan the cds/invenio stores to find publication during
a given range of years and for a given team/project. a given range of years and for a given team/project.
Insert them in the database if they don't exist. Insert them in the database if they don't exist.
...@@ -388,10 +388,15 @@ def run(): ...@@ -388,10 +388,15 @@ def run():
logs.extend(tool.logs) logs.extend(tool.logs)
except ToolException as e: except ToolException as e:
return T(str(e)) log = tool.logs[-1]
msg = "<h4>Error on record %s (%s)</h4>" % (log.url, log.collection)
msg += T(str(e))
return msg
except BaseException as e: except Exception as e:
msg = "<br><br><hr/>" log = tool.logs[-1]
msg = "<h4>Error on record %s (%s)</h4>" % (log.url, log.collection)
msg += "<hr/>"
msg += CODE(traceback.format_exc()).xml() msg += CODE(traceback.format_exc()).xml()
msg += "<hr/>" msg += "<hr/>"
return msg return msg
...@@ -451,10 +456,15 @@ def run_all(): ...@@ -451,10 +456,15 @@ def run_all():
logs.extend(tool.logs) logs.extend(tool.logs)
except ToolException as e: except ToolException as e:
return T(str(e)) log = tool.logs[-1]
msg = "<h4>Error on record %s (%s)</h4>" % (log.url, log.collection)
msg += T(str(e))
return msg
except BaseException as e: except Exception as e:
msg = "<br><br><hr/>" log = tool.logs[-1]
msg = "<h4>Error on record %s (%s)</h4>" % (log.url, log.collection)
msg += "<hr/>"
msg += CODE(traceback.format_exc()).xml() msg += CODE(traceback.format_exc()).xml()
msg += "<hr/>" msg += "<hr/>"
return msg return msg
......
...@@ -7,7 +7,6 @@ import traceback ...@@ -7,7 +7,6 @@ import traceback
from .base import (MSG_FIX_ORIGIN, from .base import (MSG_FIX_ORIGIN,
MSG_IN_DB, MSG_IN_DB,
search_synonym,
ToolException) ToolException)
from .checkandfix import CheckAndFix from .checkandfix import CheckAndFix
from gluon.storage import Storage from gluon.storage import Storage
...@@ -629,6 +628,7 @@ class Automaton(object): ...@@ -629,6 +628,7 @@ class Automaton(object):
collections (unicode): collections (unicode):
list of collection to be interrogated. list of collection to be interrogated.
Collections are separated by a comma.
Raises: Raises:
StoreException: StoreException:
......
...@@ -19,7 +19,7 @@ def harvester_messages(): ...@@ -19,7 +19,7 @@ def harvester_messages():
return messages() return messages()
def test_lhcb_acl(harvester_messages): def test_lhcb(harvester_messages):
"""Harvest LHCb article for a given year. """Harvest LHCb article for a given year.
This test is useful to: This test is useful to:
...@@ -60,3 +60,46 @@ def test_lhcb_acl(harvester_messages): ...@@ -60,3 +60,46 @@ def test_lhcb_acl(harvester_messages):
msgs = set([el.txt for el in tool.logs]) msgs = set([el.txt for el in tool.logs])
assert msgs.issubset(harvester_messages) assert msgs.issubset(harvester_messages)
def test_atlas(harvester_messages):
"""Harvest ATLAs article for a given year.
This test is useful to:
* debug an harvester
* profile its performance to see where the time is spent.
* compare different implementation to measure improvements.
* ...
"""
# These parameter only make sense if you are inserting record in database
# Select the current year in order to test different case
db = current.db
id_team = 3 # ATLAS
id_project = 2 # ATLAS
id_category = 2 # ACL
year = current.request.now.year
# build the harvester
tool = build_harvester_tool(
db,
id_team,
id_project,
"articles",
id_category,
year_start=str(year),
year_end="",
dry_run=True,
debug=True)
assert isinstance(tool, Articles)
# run the harvester
tool.process_url("cds.cern.ch", "ATLAS Papers")
# analyse the log
# Number of article cannot be check since it evolve within a year
# Only test that there are no unexpected messages
msgs = set([el.txt for el in tool.logs])
assert msgs.issubset(harvester_messages)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment