Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

thesis.py 3.75 KB
Newer Older
1 2 3 4 5 6 7 8
# -*- coding: utf-8 -*-
""" harvest_tools.thesis

"""
import re
import traceback


9
from automaton import Automaton
LE GAC Renaud's avatar
LE GAC Renaud committed
10
from base import family_name_fr, format_author_fr, MSG_CRASH, MSG_LOAD
11
from invenio_tools import CheckException, RecordThesis
12 13 14
from plugin_dbui import get_id, UNDEF_ID


15
MSG_NO_THESIS = "Reject not a thesis record"
16 17


18
class Thesis(Automaton):
19
    """Automaton for thesis.
20 21

    """
22 23 24
    def check_record(self, record):
        """Check the content of the thesis in order to fix non conformities.

25 26
        Args:
            record (RecordThesis): record describing a thesis.
27

28 29 30
        Returns:
            bool: ``False`` when a non conformity is found and can not be
            corrected.
31 32 33 34 35 36 37 38 39 40 41

        """
        if not Automaton.check_record(self, record):
            return False

        try:
            self.check.is_thesis(record)
            self.check.submitted(record)
            self.check.year(record)
            self.check.format_universities(record)

42 43 44
            self.check.format_authors(record, format_author_fr)
            self.check.get_my_authors(record, family_name_fr)

45
        except CheckException as e:
46
            self.logs[-1].reject(e, record=record)
47 48 49
            return False

        except Exception as e:
50
            self.logs[-1].reject(MSG_CRASH % e, record=record, translate=False)
51 52 53 54
            print traceback.format_exc()
            return False

        if self.dbg:
LE GAC Renaud's avatar
LE GAC Renaud committed
55
            print "check thesis record"
56 57 58 59

        if isinstance(record, RecordThesis):
            return True

60
        self.logs[-1].reject(MSG_NO_THESIS, record=record)
61
        return False
62

63 64
    def insert_record(self, record):
        """Insert a thesis in the database.
65

66 67
        Args:
            record (RecordThesis): record describing a thesis.
68

69 70 71
        Returns:
            int: one when the record is inserted / updated in the database
                zero otherwise.
72 73 74 75 76 77 78 79 80 81

        """
        db = self.db

        # alias
        defense_date = record.these_defense()
        first_author = record.first_author()
        id_category = get_id(db.categories, code='PHD')
        oai_url = record.oai_url()
        title = record.title()
82
        universities = ', '.join(record.these_universities())
83

84
        # extract the year from the defence date
85 86 87
        # this approach seems the most reliable
        year = re.search(r"(\d\d\d\d)", defense_date).group(1)

88
        # get an already published thesis
LE GAC Renaud's avatar
LE GAC Renaud committed
89 90 91 92 93 94 95 96 97
        fields = dict(first_author=first_author,
                      defense=defense_date,
                      id_projects=self.id_project,
                      id_teams=self.id_team,
                      oai_url=oai_url,
                      title=title,
                      year=year)

        rec_id, status = self.get_record_by_fields(**fields)
98 99 100 101
        if rec_id:
            return status

        # eventually insert a new thesis
102
        ret = 1
103
        if not self.dry_run:
LE GAC Renaud's avatar
LE GAC Renaud committed
104
            fields = dict(authors=first_author,
105 106 107 108 109 110 111 112 113 114 115 116 117 118
                          authors_institute=first_author,
                          defense=defense_date,
                          directors=record.these_directors(),
                          first_author=first_author,
                          id_categories=id_category,
                          id_teams=self.id_team,
                          id_projects=self.id_project,
                          id_status=UNDEF_ID,
                          origin=oai_url,
                          publication_url=record.paper_url(),
                          submitted=record.submitted()[0],
                          title=title,
                          universities=universities,
                          year=year)
119

LE GAC Renaud's avatar
LE GAC Renaud committed
120 121
            ret = self._insert_in_db(log_year=year, **fields)

122 123 124 125 126
        if ret == 1:
            self.logs[-1].load(MSG_LOAD, year)
            return 1

        return 0