""" Harvest Controllers """ import traceback from gluon import current from gluon.restricted import RestrictedError from harvest_tools import (build_harvester_tool, CheckAndFix, CheckException, DRY_RUN, format_author_fr, family_name_fr, search_synonym, ToolException) from invenio_tools import (load_record, OAI_URL, RecordConf, RecordThesis, REG_INT) from plugin_dbui import (inline_alert, Selector, to_formPanel, UNDEF_ID) MODE_DRY_RUN = T(DRY_RUN) MSG_NO_AFFILIATION = "Affiliation keys are not defined !!!" MSG_NO_HARVESTER = "No harvesters for your selection !!!" MSG_NO_RECORD = "Sorry, the record does not exist." def free_run(): """Run a free harvester. All harvester parameters are defined via the selector. """ if db(db.affiliation_keys.id > 0).count() == 0: return inline_alert(T("Error"), T(MSG_NO_AFFILIATION)) table = virtdb.free_harvester_selector fields = ("collections", "controller", "host", "id_projects", "id_teams", "id_categories", "ratio") try: selector = Selector( table, exclude_fields=("mode", "year_start", "year_end")) for el in fields: if not selector[el]: msg = T("All fields of the form have to be defined !!!") msg += "
" msg += T("The field '%s' is missing ...") % T(table[el].label) return inline_alert(T("Error"), msg) tool = build_harvester_tool( db, selector.id_teams, selector.id_projects, selector.controller, selector.id_categories, year_start=selector.year_start, year_end=selector.year_end, dry_run=(selector.mode == MODE_DRY_RUN), debug=False) if not tool: return inline_alert(T("Error"), T("Select an harvester.")) tool.process_url(selector.host, selector.collections) except ToolException as e: return T(str(e)) except BaseException as e: msg = "


" msg += CODE(traceback.format_exc()).xml() msg += "
" return msg response.view = "harvest/layout.html" report = tool.report() report["selector"] = selector return report def edit_insert(): """Edit an invenio record and insert it in the database. Note: Recovery procedures are applied to fix basic non-conformity, but no checks are run. The user is editing the record to fix problems. """ if db(db.affiliation_keys.id > 0).count() == 0: return inline_alert(T("Error"), T(MSG_NO_AFFILIATION)) fields = ( "controller", "host", "id_projects", "id_teams", "id_categories", "record_id") table = virtdb.edit_insert_selector try: # Protection # # NOTE # With plugin_dbui 0.7.1 it is possible to enter decimal value # for the record id (e.g by typing 1503,03 in the field) # if REG_INT.match(request.vars.Edit_insert_selectorRecord_id) is None: msg = T("The record id is not well formed.") msg += "
" msg += T("Use only digit character, no comma, no dot...") return inline_alert(T("Error"), msg) selector = Selector(table) for el in fields: if not selector[el]: msg = T("All fields of the form have to be defined !!!") msg += "
" msg += T("The field '%s' is missing ...") % T(table[el].label) return inline_alert(T("Error"), msg) # record record = load_record(selector.host, selector.record_id) if record is None: return inline_alert(T("Error"), T(MSG_NO_RECORD)) # form configuration cfg = to_formPanel(db.publications) # tools to extract values to be loaded in the form values = {} check = CheckAndFix() # fix invalid oai check.recover_oai(record, selector.host) # title, preprint, URL, report number values["PublicationsTitle"] = record.title() values["PublicationsPreprint"] = record.preprint_number() values["PublicationsPublication_url"] = record.paper_url() values["PublicationsReport_numbers"] = record.report_number() # authors try: check.authors(record) check.format_authors(record, format_author_fr) check.my_affiliation( record, selector.id_projects, selector.id_teams) check.get_my_authors(record, cmpFct=family_name_fr) except CheckException: pass fauthor = record.first_author() if isinstance(fauthor, list): fauthor = u", ".join(fauthor) values["PublicationsFirst_author"] = fauthor values["PublicationsAuthors"] = record.authors() values["PublicationsAuthors_institute"] = record.my_authors # collaboration recId = UNDEF_ID try: recId = search_synonym(db.collaborations, "collaboration", record.collaboration()) except ToolException: pass values["PublicationsId_collaborations"] = int(recId) # teams, project, categories values["PublicationsId_categories"] = int(selector.id_categories) values["PublicationsId_projects"] = int(selector.id_projects) values["PublicationsId_teams"] = int(selector.id_teams) # origin # Note: # - It is always defined # - Use a trivial algorithm to recover it oai_url = record.oai_url() if not oai_url: oai_url = OAI_URL % (selector.host, selector.record_id) values["PublicationsOrigin"] = oai_url # publishers if selector.controller in ("articles", "proceedings"): check.clean_erratum(record) check.paper_reference(record) check.format_editor(record) recId = UNDEF_ID try: recId = search_synonym(db.publishers, "abbreviation", record.paper_editor()) except ToolException: pass values["PublicationsId_publishers"] = int(recId) values["PublicationsVolume"] = record.paper_volume() values["PublicationsPages"] = record.paper_pages() # conference if selector.controller in ("proceedings", "talks"): try: check.country(record) check.conference_date(record, selector.host) except CheckException: pass if isinstance(record, RecordConf): values["PublicationsConference_title"] = \ record.conference_title() values["PublicationsConference_url"] = \ record.conference_url() values["PublicationsConference_dates"] = \ record.conference_dates() values["PublicationsConference_town"] = \ record.conference_town() recId = UNDEF_ID try: recId = search_synonym(db.countries, "country", record.conference_country()) except ToolException: pass values["PublicationsId_countries"] = int(recId) values["PublicationsConference_speaker"] = \ record.first_author() # thesis if selector.controller == "theses": if isinstance(record, RecordThesis): values["PublicationsUniversities"] = \ record.these_universities() values["PublicationsDirectors"] = record.these_directors() values["PublicationsDefense"] = record.these_defense() # submitted date and year try: check.submitted(record) check.year(record) except CheckException: pass values["PublicationsSubmitted"] = ", ".join(record.submitted()) if record.is_published(): year = record.paper_year() else: year = record.year() values["PublicationsYear"] = year except Exception: # log the exception in the web2py ticker system ticket = RestrictedError(layer="harvester.py", code="edit_insert", output="", environment=current.globalenv) ticket.log(request) # inform the user that something went wrong in the server raise HTTP(500) return dict(cfg=cfg, values=values) def insert_marcxml(): """Insert a MarcXML record in the database. """ if db(db.affiliation_keys.id > 0).count() == 0: return inline_alert(T("Error"), T(MSG_NO_AFFILIATION)) try: selector = Selector(virtdb.marc12_selector, exclude_fields=("mode")) tool = build_harvester_tool( db, selector.id_teams, selector.id_projects, selector.controller, selector.id_categories, year_start=selector.year_start, year_end=selector.year_end, dry_run=(selector.mode == MODE_DRY_RUN), debug=False) if not tool: return inline_alert(T("Error"), T("Select an harvester.")) tool.harvester.host = selector.host tool.process_xml(selector.xml) except ToolException as e: return T(str(e)) except BaseException as e: msg = "


" msg += CODE(traceback.format_exc()).xml() msg += "
" return msg response.view = "harvest/layout.html" report = tool.report() report["selector"] = selector return report def run(): """Run an harvester. Scan the cds/invenio stores to find articles published during a given range of years and for a given team/project. Insert them in the database if they don't exist. The scanning is steered using the current request arguments as well as the harvest parameters associated to this action. Search arguments are defined via the harvester selector. """ if db(db.affiliation_keys.id > 0).count() == 0: return inline_alert(T("Error"), T(MSG_NO_AFFILIATION)) try: selector = Selector( virtdb.harvester_selector, exclude_fields=("mode", "year_start", "year_end")) # Get hosts and collections rows = selector.select(db.harvesters) if not rows: raise ToolException(T(MSG_NO_HARVESTER)) collection_logs = [] logs = [] for row in rows: tool = build_harvester_tool( db, selector.id_teams, selector.id_projects, selector.controller, row.harvesters.id_categories, year_start=selector.year_start, year_end=selector.year_end, dry_run=(selector.mode == MODE_DRY_RUN), debug=False) if not tool: return inline_alert(T("Error"), T("Select an harvester.")) tool.process_url(row.harvesters.host, row.harvesters.collections) collection_logs.extend(tool.collection_logs) logs.extend(tool.logs) except ToolException as e: return T(str(e)) except BaseException as e: msg = "


" msg += CODE(traceback.format_exc()).xml() msg += "
" return msg # delegate rendering to the report view response.view = "harvest/layout.%s" % request.extension return dict(collection_logs=collection_logs, controller=selector.controller, logs=logs, selector=selector) def run_all(): """Run all harvesters in one go. """ if db(db.affiliation_keys.id > 0).count() == 0: return inline_alert(T("Error"), T(MSG_NO_AFFILIATION)) collection_logs = [] logs = [] try: selector = Selector( virtdb.run_all_harvesters_selector, exclude_fields=("mode", "year_start", "year_end")) query = None for fieldname in ("id_teams", "id_projects"): if selector[fieldname]: q = db.harvesters[fieldname] == selector[fieldname] query = (q if query is None else (query) & (q)) harvesters = db(query).select(db.harvesters.ALL) if not len(harvesters): return inline_alert(T("Error"), T(MSG_NO_HARVESTER)) for harvester in harvesters: tool = build_harvester_tool( db, harvester.id_teams, harvester.id_projects, harvester.controller, harvester.id_categories, year_start=selector.year_start, year_end=selector.year_end, dry_run=(selector.mode == MODE_DRY_RUN), debug=False) if not tool: return inline_alert(T("Error"), T("Select an harvester.")) tool.process_url(harvester.host, harvester.collections) collection_logs.extend(tool.collection_logs) logs.extend(tool.logs) except ToolException as e: return T(str(e)) except BaseException as e: msg = "


" msg += CODE(traceback.format_exc()).xml() msg += "
" return msg # tune selector parameters used in the report title if query is None: selector.id_projects = None # delegate rendering to the report view response.view = "harvest/layout.%s" % request.extension return dict(collection_logs=collection_logs, controller="all harvesters", logs=logs, selector=selector)