Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

thesis.py 3.9 KB
Newer Older
1 2 3 4 5
""" harvest_tools.thesis

"""
import re

6
from .automaton import Automaton
7
from .base import MSG_CRASH, MSG_LOAD, T4
8
from .checkandfix import CheckException
9
from plugin_dbui import get_id, UNDEF_ID
10
from store_tools import RecordCdsThesis, RecordHepThesis
11

12
MSG_NOT_THESIS = "Reject publication is not a thesis"
13 14


15
class Thesis(Automaton):
16
    """Automaton for thesis.
17 18

    """
19

20 21 22
    def check_record(self, record):
        """Check the content of the thesis in order to fix non conformities.

23
        Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
24 25
            record (RecordThesis):
                record describing a thesis.
26

27
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
28 29 30
            bool:
                ``False`` when a non conformity is found and can not be
                corrected.
31 32

        """
33
        self.logger.debug(f"{T4}check and fix record (thesis)")
34

35
        if record.subtype() == "thesis":
36 37
            self.logs[-1].reject(MSG_NOT_THESIS, record)
            return False
LE GAC Renaud's avatar
LE GAC Renaud committed
38

39
        try:
40 41 42 43 44
            # is with authors form my institute
            # standardise name of collaboration
            # format authors according to my format
            # extract authors form my institute signing the publication
            # is submitted date well formed
45
            record.check_and_fix(db=self.db,
46
                                 fmt_author="F. Last",
47
                                 rex_institute=self.rex_institute,
48 49 50 51
                                 sep_author=", ",
                                 sort_author=True)

            record.format_universities()
52

53
        except CheckException as e:
54
            self.logs[-1].reject(e, record=record)
55 56 57
            return False

        except Exception as e:
58
            self.logs[-1].reject(MSG_CRASH % e, record=record, translate=False)
59 60
            return False

LE GAC Renaud's avatar
LE GAC Renaud committed
61
        return True
62

63 64
    def insert_record(self, record):
        """Insert a thesis in the database.
65

66
        Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
67 68
            record (RecordThesis):
                record describing a thesis.
69

70
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
71 72
            int:
                one when the record is inserted / updated in the database
73
                zero otherwise.
74 75 76 77 78 79 80

        """
        db = self.db

        # alias
        defense_date = record.these_defense()
        first_author = record.first_author()
LE GAC Renaud's avatar
LE GAC Renaud committed
81
        id_category = get_id(db.categories, code="PHD")
82 83
        oai_url = record.oai_url()
        title = record.title()
LE GAC Renaud's avatar
LE GAC Renaud committed
84
        universities = ", ".join(record.these_universities())
85

86
        # extract the year from the defence date
87 88 89
        # this approach seems the most reliable
        year = re.search(r"(\d\d\d\d)", defense_date).group(1)

90
        # get an already published thesis
LE GAC Renaud's avatar
LE GAC Renaud committed
91 92 93 94
        fields = dict(first_author=first_author,
                      defense=defense_date,
                      id_projects=self.id_project,
                      id_teams=self.id_team,
95
                      title=title)
LE GAC Renaud's avatar
LE GAC Renaud committed
96

97
        rec_id, status = self.get_record_by_fields(oai_url, year, **fields)
98 99 100 101
        if rec_id:
            return status

        # eventually insert a new thesis
102
        ret = 1
103
        if not self.dry_run:
LE GAC Renaud's avatar
LE GAC Renaud committed
104
            fields = dict(authors=first_author,
105 106 107 108 109 110 111 112 113 114
                          authors_institute=first_author,
                          defense=defense_date,
                          directors=record.these_directors(),
                          first_author=first_author,
                          id_categories=id_category,
                          id_teams=self.id_team,
                          id_projects=self.id_project,
                          id_status=UNDEF_ID,
                          origin=oai_url,
                          publication_url=record.paper_url(),
115
                          submitted=record.submitted(),
116 117 118
                          title=title,
                          universities=universities,
                          year=year)
119

LE GAC Renaud's avatar
LE GAC Renaud committed
120 121
            ret = self._insert_in_db(log_year=year, **fields)

122 123 124 125 126
        if ret == 1:
            self.logs[-1].load(MSG_LOAD, year)
            return 1

        return 0