# -*- coding: utf-8 -*- """ harvest_tools.thesis """ import re import traceback from automaton import Automaton from base import family_name_fr, MSG_CRASH, MSG_LOAD from invenio_tools import CheckException, RecordThesis from plugin_dbui import get_id, UNDEF_ID MSG_NO_THESIS = "Reject not a thesis record" class Thesis(Automaton): """Publications tool for thesis. """ def load_db(self, record): """Load a thesis in the database. @type record: L{Record} @param record: @rtype: int @return: one when the record is inserted / updated in the database zero otherwise. """ db = self.db # alias defense_date = record.these_defense() first_author = record.first_author() id_category = get_id(db.categories, code='PHD') oai_url = record.oai_url() title = record.title() universities = ', '.join(record.these_universities()) # extract the year from the defense date # this approach seems the most reliable year = re.search(r"(\d\d\d\d)", defense_date).group(1) # check against already published thesis rec_id, status = self.check_by_fields(first_author=first_author, defense=defense_date, id_projects=self.id_project, id_teams=self.id_team, oai_url=oai_url, title=title, year=year) if rec_id: return status # eventually insert a new thesis if not self.dry_run: db.publications.insert(authors=first_author, authors_institute=first_author, defense=defense_date, directors=record.these_directors(), first_author=first_author, id_categories=id_category, id_teams=self.id_team, id_projects=self.id_project, id_status=UNDEF_ID, origin=oai_url, publication_url=record.paper_url(), submitted=record.submitted()[0], title=title, universities=universities, year=year) self.logs[-1].load(MSG_LOAD, year) return 1 def select_record(self, record): """C{True} when thesis is signed by a CPPM author. @type record: L{Record} @param record: """ if not Automaton.select_record(self, record): return False try: self.check.my_authors(record, reference=self._my_author_list(record), cmpFct=family_name_fr) self.check.oai(record) self.check.is_thesis(record) self.check.submitted(record) self.check.year(record) self.check.format_universities(record) except CheckException as e: self.logs[-1].reject(e, record.year()) return False except Exception as e: self.logs[-1].reject(MSG_CRASH % e, record.year(), translate=False) print traceback.format_exc() return False if self.dbg: print "select thesis record" if isinstance(record, RecordThesis): return True self.logs[-1].reject(MSG_NO_THESIS, record.year()) return False