# -*- coding: utf-8 -*- """ harvest_tools.preprints """ import traceback from automaton import Automaton from base import family_name_fr, format_author_fr, MSG_CRASH, MSG_LOAD from invenio_tools import CheckException, RecordConf, RecordThesis from plugin_dbui import UNDEF_ID MSG_PREPRINT_IS_PAPER = "Reject preprint is a published paper" MSG_PREPRINT_IS_CONFERENCE = "Reject preprint is a conference" MSG_PREPRINT_IS_THESIS = "Reject preprint is a thesis" MSG_PREPRINT_NO_NUMBER = "Reject no preprint number" class Preprints(Automaton): """Automaton for preprints. """ def check_record(self, record): """Check the content of the preprint in order to fix non conformities. Args: record (RecordPubli): record describing a preprint. Returns: bool: ``False`` when a non conformity is found and can not be corrected. """ if not Automaton.check_record(self, record): return False if self.dbg: print "check preprint record" if record.is_published(): self.logs[-1].reject(MSG_PREPRINT_IS_PAPER, record=record) return False if isinstance(record, RecordConf): self.logs[-1].reject(MSG_PREPRINT_IS_CONFERENCE, record=record) return False if isinstance(record, RecordThesis): self.logs[-1].reject(MSG_PREPRINT_IS_THESIS, record=record) return False if not record.preprint_number(): self.logs[-1].reject(MSG_PREPRINT_NO_NUMBER, record=record) return False try: self.check.submitted(record) self.check.year(record) self.check.format_authors(record, format_author_fr) self.check.get_my_authors(record, family_name_fr) except CheckException as e: self.logs[-1].reject(e, record=record) return False except Exception as e: self.logs[-1].reject(MSG_CRASH % e, record=record, translate=False) print traceback.format_exc() return False return True def insert_record(self, record): """Insert a preprint in the database. Args: record (RecordPubli): record describing a preprint. Returns: int: one when the record is inserted / updated in the database zero otherwise. """ # alias first_author = record.first_author() oai_url = record.oai_url() preprint = record.preprint_number() title = record.title() submitted = record.submitted()[0] year = record.year() # get the collaboration identifier id_collaboration = self.search_collaboration(record.collaboration()) # get existing preprint or article fields = dict(first_author=first_author, id_projects=self.id_project, id_teams=self.id_team, preprint=preprint, submitted=submitted, title=title) rec_id, status = self.get_record_by_fields(oai_url, year, **fields) if rec_id: return status # eventually insert a new preprint ret = 1 if not self.dry_run: fields = dict(authors=record.authors(), authors_institute=record.my_authors, first_author=first_author, id_categories=self.id_category, id_collaborations=id_collaboration, id_projects=self.id_project, id_status=UNDEF_ID, id_teams=self.id_team, origin=oai_url, preprint=preprint, publication_url=record.paper_url(), submitted=submitted, title=title, year=year) ret = self._insert_in_db(log_year=year, **fields) if ret == 1: self.logs[-1].load(MSG_LOAD, year) return 1 return 0