""" harvest_tools.thesis """ import re from .automaton import Automaton from .base import MSG_CRASH, MSG_LOAD, T4 from .checkandfix import CheckException from plugin_dbui import get_id, UNDEF_ID from store_tools import RecordCdsThesis, RecordHepThesis MSG_NOT_THESIS = "Reject publication is not a thesis" class Thesis(Automaton): """Automaton for thesis. """ def check_record(self, record): """Check the content of the thesis in order to fix non conformities. Args: record (RecordThesis): record describing a thesis. Returns: bool: ``False`` when a non conformity is found and can not be corrected. """ self.logger.debug(f"{T4}check and fix record (thesis)") if record.subtype() == "thesis": self.logs[-1].reject(MSG_NOT_THESIS, record) return False try: # is with authors form my institute # standardise name of collaboration # format authors according to my format # extract authors form my institute signing the publication # is submitted date well formed record.check_and_fix(db=self.db, fmt_author="F. Last", rex_institute=self.rex_institute, sep_author=", ", sort_author=True) record.format_universities() except CheckException as e: self.logs[-1].reject(e, record=record) return False except Exception as e: self.logs[-1].reject(MSG_CRASH % e, record=record, translate=False) return False return True def insert_record(self, record): """Insert a thesis in the database. Args: record (RecordThesis): record describing a thesis. Returns: int: one when the record is inserted / updated in the database zero otherwise. """ db = self.db # alias defense_date = record.these_defense() first_author = record.first_author() id_category = get_id(db.categories, code="PHD") oai_url = record.oai_url() title = record.title() universities = ", ".join(record.these_universities()) # extract the year from the defence date # this approach seems the most reliable year = re.search(r"(\d\d\d\d)", defense_date).group(1) # get an already published thesis fields = dict(first_author=first_author, defense=defense_date, id_projects=self.id_project, id_teams=self.id_team, title=title) rec_id, status = self.get_record_by_fields(oai_url, year, **fields) if rec_id: return status # eventually insert a new thesis ret = 1 if not self.dry_run: fields = dict(authors=first_author, authors_institute=first_author, defense=defense_date, directors=record.these_directors(), first_author=first_author, id_categories=id_category, id_teams=self.id_team, id_projects=self.id_project, id_status=UNDEF_ID, origin=oai_url, publication_url=record.paper_url(), submitted=record.submitted(), title=title, universities=universities, year=year) ret = self._insert_in_db(log_year=year, **fields) if ret == 1: self.logs[-1].load(MSG_LOAD, year) return 1 return 0