# -*- coding: utf-8 -*- """ harvest_tools.proceedings """ import traceback from automaton import Automaton from base import family_name_fr, format_author_fr, MSG_CRASH, MSG_LOAD from invenio_tools import CheckException from plugin_dbui import UNDEF_ID class Proceedings(Automaton): """Automaton for conference proceedings. """ def check_record(self, record): """Check the content of the proceeding in order to fix non conformities. Args: record (RecordConf): record describing a proceeding. Returns: bool: ``False`` when a non conformity is found and can not be corrected. """ if not Automaton.check_record(self, record): return False if self.dbg: print "check proceeding record" try: self.check.is_conference(record) self.check.country(record) self.check.conference_date(record) self.check.clean_erratum(record) self.check.submitted(record) self.check.year(record) self.check.format_editor(record) self.check.publisher(record) self.check.paper_reference(record) self.check.format_authors(record, format_author_fr) self.check.get_my_authors(record, family_name_fr) except CheckException as e: self.logs[-1].reject(e, record=record) return False except Exception as e: self.logs[-1].reject(MSG_CRASH % e, record=record, translate=False) print traceback.format_exc() return False return True def insert_record(self, record): """Insert a conference proceeding in the database. Args: record (RecordConf): record describing a proceeding. Returns: int: one when the record is inserted / updated in the database zero otherwise. """ # alias oai_url = record.oai_url() year = record.paper_year() # protection against proceeding not published in a journal if not year: year = record.year() # alias authors = record.authors() editor = record.paper_editor() pages = record.paper_pages() preprint = record.preprint_number() report_numbers = record.report_number() submitted = record.submitted()[0] title = record.title() url = record.paper_url() volume = record.paper_volume() # alias for the conference information conference_dates = record.conference_dates() conference_title = record.conference_title() first_author = record.first_author() id_country = self.search_country(record.conference_country()) # get the collaboration/publisher identifiers id_collaboration = self.search_collaboration(record.collaboration()) id_publisher = self.search_publisher(editor) # get an already published proceeding fields = dict(authors=authors, conference_title=conference_title, first_author=first_author, id_publishers=id_publisher, preprint=preprint, pages=pages, publication_url=url, report_numbers=report_numbers, submitted=submitted, volume=volume, title=title) rec_id, status = self.get_record_by_fields(oai_url, year, **fields) if rec_id: return status # eventually insert a new proceeding ret = 1 if not self.dry_run: fields = dict(authors=authors, authors_institute=record.my_authors, conference_dates=conference_dates, conference_speaker=first_author, conference_title=conference_title, conference_town=record.conference_town(), conference_url=record.conference_url(), first_author=first_author, id_categories=self.id_category, id_collaborations=id_collaboration, id_countries=id_country, id_projects=self.id_project, id_publishers=id_publisher, id_status=UNDEF_ID, id_teams=self.id_team, origin=oai_url, pages=pages, preprint=preprint, publication_url=url, report_numbers=report_numbers, submitted=submitted, title=title, volume=volume, year=year) ret = self._insert_in_db(log_year=year, **fields) if ret == 1: self.logs[-1].load(MSG_LOAD, year) return 1 return 0