thesis.py 3.92 KB
Newer Older
1 2 3 4 5 6 7 8
# -*- coding: utf-8 -*-
""" harvest_tools.thesis

"""
import re
import traceback


9
from automaton import Automaton
10
from base import family_name_fr, MSG_CRASH, MSG_LOAD
11
from invenio_tools import CheckException, RecordThesis
12 13 14
from plugin_dbui import get_id, UNDEF_ID


15
MSG_NO_THESIS = "Reject not a thesis record"
16 17


18
class Thesis(Automaton):
19
    """Automaton for thesis.
20 21

    """
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
    def check_record(self, record):
        """Check the content of the thesis in order to fix non conformities.

        @type record: L{Record}
        @param record:

        @rtype: bool
        @return: C{False} when a non conformity is found and can not be
        corrected.

        """
        if not Automaton.check_record(self, record):
            return False

        try:
            self.check.my_authors(record,
                                  reference=self._my_author_list(record),
                                  cmpFct=family_name_fr)

            self.check.oai(record)
            self.check.is_thesis(record)
            self.check.submitted(record)
            self.check.year(record)
            self.check.format_universities(record)

        except CheckException as e:
            self.logs[-1].reject(e, record.year())
            return False

        except Exception as e:
            self.logs[-1].reject(MSG_CRASH % e, record.year(), translate=False)
            print traceback.format_exc()
            return False

        if self.dbg:
            print "select thesis record"

        if isinstance(record, RecordThesis):
            return True

        self.logs[-1].reject(MSG_NO_THESIS, record.year())
        return False
64

65 66
    def insert_record(self, record):
        """Insert a thesis in the database.
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83

        @type record: L{Record}
        @param record:

        @rtype: int
        @return: one when the record is inserted / updated in the database
        zero otherwise.

        """
        db = self.db

        # alias
        defense_date = record.these_defense()
        first_author = record.first_author()
        id_category = get_id(db.categories, code='PHD')
        oai_url = record.oai_url()
        title = record.title()
84
        universities = ', '.join(record.these_universities())
85

86
        # extract the year from the defence date
87 88 89
        # this approach seems the most reliable
        year = re.search(r"(\d\d\d\d)", defense_date).group(1)

90
        # get an already published thesis
91 92 93 94 95 96 97
        rec_id, status = self.get_record_by_fields(first_author=first_author,
                                                   defense=defense_date,
                                                   id_projects=self.id_project,
                                                   id_teams=self.id_team,
                                                   oai_url=oai_url,
                                                   title=title,
                                                   year=year)
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
        if rec_id:
            return status

        # eventually insert a new thesis
        if not self.dry_run:
            db.publications.insert(authors=first_author,
                                   authors_institute=first_author,
                                   defense=defense_date,
                                   directors=record.these_directors(),
                                   first_author=first_author,
                                   id_categories=id_category,
                                   id_teams=self.id_team,
                                   id_projects=self.id_project,
                                   id_status=UNDEF_ID,
                                   origin=oai_url,
                                   publication_url=record.paper_url(),
                                   submitted=record.submitted()[0],
                                   title=title,
                                   universities=universities,
                                   year=year)

        self.logs[-1].load(MSG_LOAD, year)
        return 1