thesis.py 3.48 KB
Newer Older
1 2 3 4 5 6 7
""" harvest_tools.thesis

"""
import re
import traceback


8 9 10
from .automaton import Automaton
from .base import MSG_CRASH, MSG_LOAD
from .checkandfix import CheckException
11 12 13
from plugin_dbui import get_id, UNDEF_ID


14
class Thesis(Automaton):
15
    """Automaton for thesis.
16 17

    """
18 19 20
    def check_record(self, record):
        """Check the content of the thesis in order to fix non conformities.

21
        Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
22 23
            record (RecordThesis):
                record describing a thesis.
24

25
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
26 27 28
            bool:
                ``False`` when a non conformity is found and can not be
                corrected.
29 30 31 32 33

        """
        if not Automaton.check_record(self, record):
            return False

LE GAC Renaud's avatar
LE GAC Renaud committed
34 35 36
        if self.dbg:
            print("check thesis record")

37 38 39
        try:
            self.check.is_thesis(record)
            self.check.submitted(record)
LE GAC Renaud's avatar
LE GAC Renaud committed
40

41 42
            self.check.format_universities(record)

43 44
            self.check.format_authors(record, fmt="F. Last")
            self.check.get_my_authors(record, sort=True)
45

46
        except CheckException as e:
47
            self.logs[-1].reject(e, record=record)
48 49 50
            return False

        except Exception as e:
51
            self.logs[-1].reject(MSG_CRASH % e, record=record, translate=False)
52
            print(traceback.format_exc())
53 54
            return False

LE GAC Renaud's avatar
LE GAC Renaud committed
55
        return True
56

57 58
    def insert_record(self, record):
        """Insert a thesis in the database.
59

60
        Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
61 62
            record (RecordThesis):
                record describing a thesis.
63

64
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
65 66
            int:
                one when the record is inserted / updated in the database
67
                zero otherwise.
68 69 70 71 72 73 74

        """
        db = self.db

        # alias
        defense_date = record.these_defense()
        first_author = record.first_author()
LE GAC Renaud's avatar
LE GAC Renaud committed
75
        id_category = get_id(db.categories, code="PHD")
76 77
        oai_url = record.oai_url()
        title = record.title()
LE GAC Renaud's avatar
LE GAC Renaud committed
78
        universities = ", ".join(record.these_universities())
79

80
        # extract the year from the defence date
81 82 83
        # this approach seems the most reliable
        year = re.search(r"(\d\d\d\d)", defense_date).group(1)

84
        # get an already published thesis
LE GAC Renaud's avatar
LE GAC Renaud committed
85 86 87 88
        fields = dict(first_author=first_author,
                      defense=defense_date,
                      id_projects=self.id_project,
                      id_teams=self.id_team,
89
                      title=title)
LE GAC Renaud's avatar
LE GAC Renaud committed
90

91
        rec_id, status = self.get_record_by_fields(oai_url, year, **fields)
92 93 94 95
        if rec_id:
            return status

        # eventually insert a new thesis
96
        ret = 1
97
        if not self.dry_run:
LE GAC Renaud's avatar
LE GAC Renaud committed
98
            fields = dict(authors=first_author,
99 100 101 102 103 104 105 106 107 108 109 110 111 112
                          authors_institute=first_author,
                          defense=defense_date,
                          directors=record.these_directors(),
                          first_author=first_author,
                          id_categories=id_category,
                          id_teams=self.id_team,
                          id_projects=self.id_project,
                          id_status=UNDEF_ID,
                          origin=oai_url,
                          publication_url=record.paper_url(),
                          submitted=record.submitted()[0],
                          title=title,
                          universities=universities,
                          year=year)
113

LE GAC Renaud's avatar
LE GAC Renaud committed
114 115
            ret = self._insert_in_db(log_year=year, **fields)

116 117 118 119 120
        if ret == 1:
            self.logs[-1].load(MSG_LOAD, year)
            return 1

        return 0