Commit bfbe97d7 authored by MEESSEN Christophe's avatar MEESSEN Christophe
Browse files

PublicationTools.process_xml doesn't return xmls

- Renamed PublicationTools.process_xml() into PublicationTools.decode_xml()
- PublicationTools.process_url() now calls decode_xml internally and
  doesn't return a list of xml anymore
- Renamed PublicationTools.__call__() into PublicationTools.process_xml() for consistency
- Adjust white spaces for vertical alignement of parameters
- Remove double white line between method definitions
- Renamed some two letter variables to reduce pylint comments
parent f61c7f28
""" Harvest Controllers
"""
import traceback
from gluon.storage import Storage
from harvest_tools import (format_author_fr,
family_name_fr,
import traceback
from harvest_tools import (format_author_fr,
family_name_fr,
build_harvester_tool,
PublicationsTool,
PublicationsTool,
ToolException)
from invenio_tools import (CdsException,
CheckAndFix,
CheckException,
Marc12Exception,
InvenioStore,
InvenioStore,
Marc12,
OAI_URL)
from plugin_dbui import (get_id,
Selector,
from plugin_dbui import (get_id,
Selector,
to_formPanel,
UNDEF_ID)
# Dummy import to validate code in Ninja IDE
if 0:
from ninja_hack import (virtdb, db, T, CODE, response, request)
MSG_NO_HARVESTER = T("No harvesters for your selection !!!")
INLINE_ALERT = "<script>Ext.Msg.alert('%s', '%s');</script>"
DRY_RUN = current.T("dry run")
DRY_RUN = T("dry run")
def free_run():
"""Run a free harvester.
All harvester parameters are defined via the selector.
"""
table = virtdb.free_harvester_selector
fields = ('collections',
'controller',
'host',
'id_projects',
'id_teams',
'id_categories',
fields = ('collections',
'controller',
'host',
'id_projects',
'id_teams',
'id_categories',
'ratio')
try:
......@@ -51,8 +55,8 @@ def free_run():
msg += "<br>"
msg += T('The field "%s" is missing ...') % T(table[el].label)
return INLINE_ALERT % (T('Error'), msg)
tool = build_harvester_tool(db,
tool = build_harvester_tool(db,
selector.id_teams,
selector.id_projects,
selector.controller,
......@@ -63,43 +67,40 @@ def free_run():
debug=False)
if not tool:
return INLINE_ALERT % (T('Error'), T('Select an harvester.'))
marc12xmls = tool.process_url(selector.host, selector.collections)
for xml in marc12xmls:
tool(xml)
except ToolException, e:
tool.process_url(selector.host, selector.collections)
except ToolException as e:
return T(str(e))
except BaseException, e:
msg = '<br><br><hr/>'
except BaseException as e:
msg = '<br><br><hr/>'
msg += CODE(traceback.format_exc()).xml()
msg += '<hr/>'
return msg
response.view = 'harvest/layout.html'
r = tool.report()
r['selector'] = selector
return r
report = tool.report()
report['selector'] = selector
return report
def edit_insert():
"""Edit an invenio record and insert it in the database.
@note: Recovery procedures are applied to fix basic non-conformity, but
no checks are run. The user is editing the record to fix problems.
"""
fields = ('controller',
'host',
'id_projects',
'id_teams',
fields = ('controller',
'host',
'id_projects',
'id_teams',
'id_categories',
'record_id')
table = virtdb.edit_insert_selector
try:
selector = Selector(table)
......@@ -109,83 +110,87 @@ def edit_insert():
msg += "<br>"
msg += T('The field "%s" is missing ...') % T(table[el].label)
return INLINE_ALERT % (T('Error'), msg)
# record
store = InvenioStore(selector.host)
xml = store.get_record(selector.record_id)
decode = Marc12()
record = decode(xml)[0]
# form configuration
cfg = to_formPanel(db.publications)
# tools to extract values to be loaded in the form
values = {}
check = CheckAndFix()
tool = PublicationsTool(db, selector)
# title, preprint, URL, report number
values['PublicationsTitle'] = record.title()
values['PublicationsPreprint'] = record.preprint_number()
values['PublicationsPublication_url'] = record.paper_url()
values['PublicationsReport_numbers'] = record.report_number()
# authors
try:
check.authors(record)
check.format_authors(record, format_author_fr)
check.my_authors(record,
reference=tool._my_author_list(record),
check.my_authors(record,
reference=tool._my_author_list(record),
cmpFct=family_name_fr)
except CheckException:
pass
values['PublicationsFirst_author'] = record.first_author()
values['PublicationsAuthors'] = record.authors()
values['PublicationsAuthors_institute'] = record.my_authors
# collaboration
id = get_id(db.collaborations, collaboration=record.collaboration())
values['PublicationsId_collaborations'] = (int(id) if id else UNDEF_ID)
recId = get_id(db.collaborations, collaboration=record.collaboration())
values['PublicationsId_collaborations'] = \
int(recId) if recId else UNDEF_ID
# teams, project, categories, origin
values['PublicationsId_categories'] = int(selector.id_categories)
values['PublicationsId_projects'] = int(selector.id_projects)
values['PublicationsId_teams'] = int(selector.id_teams)
values['PublicationsOrigin'] = OAI_URL %(selector.host, selector.record_id)
values['PublicationsOrigin'] = \
OAI_URL % (selector.host, selector.record_id)
# publishers
if selector.controller in ('articles', 'proceedings'):
check.format_editor(record)
id = get_id(db.publishers, abbreviation=record.paper_editor())
values['PublicationsId_publishers'] = (int(id) if id else UNDEF_ID)
recId = get_id(db.publishers, abbreviation=record.paper_editor())
values['PublicationsId_publishers'] = \
int(recId) if recId else UNDEF_ID
values['PublicationsVolume'] = record.paper_volume()
values['PublicationsPages'] = record.paper_pages()
# conference
if selector.controller in ('proceedings', 'talks'):
try:
check.conference(record)
except CheckException:
pass
values['PublicationsConference_title'] = record.conference_title()
values['PublicationsConference_url'] = record.conference_url()
values['PublicationsConference_dates'] = record.conference_dates()
values['PublicationsConference_town'] = record.conference_town()
id = get_id(db.countries, country=record.conference_country())
values['PublicationsId_countries'] = (id if id != None else UNDEF_ID)
recId = get_id(db.countries, country=record.conference_country())
values['PublicationsId_countries'] = \
recId if recId is not None else UNDEF_ID
values['PublicationsConference_speaker'] = record.first_author()
# thesis
if selector.controller == 'theses':
values['PublicationsUniversities'] = record.these_universities()
values['PublicationsDirectors'] = record.these_directors()
values['PublicationsDefense'] = record.these_defense()
......@@ -196,16 +201,16 @@ def edit_insert():
check.year(record)
except CheckException:
pass
values['PublicationsSubmitted'] = ', '.join(record.submitted())
values['PublicationsYear'] = record.year()
except (CdsException, Marc12Exception, ToolException), e:
except (CdsException, Marc12Exception, ToolException) as e:
return INLINE_ALERT % (T('Error'), T(str(e)))
except BaseException, e:
except BaseException as e:
# for debug when web2py is in debug mode
print traceback.format_exc()
print((traceback.format_exc()))
return INLINE_ALERT % (T('Error'), T(str(e)))
return dict(cfg=cfg, values=values)
......@@ -213,55 +218,51 @@ def edit_insert():
def insert_marcxml():
"""Insert a MarcXML record in the database.
"""
try:
selector = Selector(virtdb.marc12_selector, exclude_fields=('mode'))
tool_class = get_harvester_tool(selector.controller)
if not tool_class:
return INLINE_ALERT % (T('Error'), T('Select a controller.'))
tool = build_harvester_tool(db,
selector.id_teams,
selector.id_projects,
selector.controller,
selector.id_categories,
year_start=selector.year_start,
year_end=selector.year_end,
dry_run=(selector.mode == DRY_RUN),
debug=False)
selector = Selector(virtdb.marc12_selector, exclude_fields=('mode'))
tool = build_harvester_tool(db,
selector.id_teams,
selector.id_projects,
selector.controller,
selector.id_categories,
year_start=selector.year_start,
year_end=selector.year_end,
dry_run=(selector.mode == DRY_RUN),
debug=False)
if not tool:
return INLINE_ALERT % (T('Error'), T('Select an harvester.'))
tool(selector.xml)
except ToolException, e:
tool.process_xml(selector.xml)
except ToolException as e:
return T(str(e))
except BaseException, e:
msg = '<br><br><hr/>'
except BaseException as e:
msg = '<br><br><hr/>'
msg += CODE(traceback.format_exc()).xml()
msg += '<hr/>'
return msg
response.view = 'harvest/layout.html'
r = tool.report()
r['selector'] = selector
return r
report = tool.report()
report['selector'] = selector
return report
def run():
"""Run an harvester.
Scan the cds/invenio stores to find articles published during
a given range of years and for a given team/project.
Scan the cds/invenio stores to find articles published during
a given range of years and for a given team/project.
Insert them in the database if they don't exist.
The scanning is steered using the current request arguments as well as
the harvest parameters associated to this action.
Search arguments are defined via the harvester selector.
Search arguments are defined via the harvester selector.
"""
try:
......@@ -273,7 +274,7 @@ def run():
if not row:
raise ToolException(MSG_NO_HARVESTER)
tool = build_harvester_tool(db,
tool = build_harvester_tool(db,
selector.id_teams,
selector.id_projects,
selector.controller,
......@@ -284,34 +285,31 @@ def run():
debug=False)
if not tool:
return INLINE_ALERT % (T('Error'), T('Select an harvester.'))
marc12xmls = tool.process_url(row.harvesters.host, row.harvesters.collections)
for xml in marc12xmls:
tool(xml)
except ToolException, e:
tool.process_url(row.harvesters.host, row.harvesters.collections)
except ToolException as e:
return T(str(e))
except BaseException, e:
msg = '<br><br><hr/>'
except BaseException as e:
msg = '<br><br><hr/>'
msg += CODE(traceback.format_exc()).xml()
msg += '<hr/>'
return msg
response.view = 'harvest/layout.%s' % request.extension
r = tool.report()
r['selector'] = selector
return r
response.view = 'harvest/layout.%s' % request.extension
report = tool.report()
report['selector'] = selector
return report
def run_all():
"""Run all harvesters in one go.
"""
collection_logs = []
logs = []
try:
selector = Selector(virtdb.run_all_harvesters_selector,
exclude_fields=('mode', 'year_start', 'year_end'))
......@@ -328,10 +326,10 @@ def run_all():
harvesters = db(query).select(db.harvesters.ALL)
if not len(harvesters):
return INLINE_ALERT % (T('Error'), MSG_NO_HARVESTER)
for harvester in harvesters:
tool = build_harvester_tool(db,
tool = build_harvester_tool(db,
harvester.id_teams,
harvester.id_projects,
harvester.controller,
......@@ -342,31 +340,28 @@ def run_all():
debug=False)
if not tool:
return INLINE_ALERT % (T('Error'), T('Select an harvester.'))
marc12xmls = tool.process_url(harvester.host, harvester.collections)
for xml in marc12xmls:
tool(xml)
tool.process_url(harvester.host, harvester.collections)
collection_logs.extend(tool.collection_logs)
logs.extend(tool.logs)
except ToolException, e:
except ToolException as e:
return T(str(e))
except BaseException, e:
msg = '<br><br><hr/>'
except BaseException as e:
msg = '<br><br><hr/>'
msg += CODE(traceback.format_exc()).xml()
msg += '<hr/>'
return msg
# tune selector parameters used in the report title
if query == None:
if query is None:
selector.id_projects = None
# delegate rendering to the report view
response.view = 'harvest/layout.%s' % request.extension
response.view = 'harvest/layout.%s' % request.extension
return dict(collection_logs=collection_logs,
controller='all harvesters',
logs=logs,
selector=selector)
selector=selector)
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment