thesis.py 3.84 KB
Newer Older
1 2 3 4 5 6 7 8
# -*- coding: utf-8 -*-
""" harvest_tools.thesis

"""
import re
import traceback


9
from base import family_name_fr, MSG_CRASH, MSG_LOAD
10 11 12 13 14
from invenio_tools import CheckException
from publicationstool import PublicationsTool
from plugin_dbui import get_id, UNDEF_ID


15
MSG_NO_THESIS = "Reject not a thesis record"
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41


class Thesis(PublicationsTool):
    """Publications tool for thesis.

    """

    def load_db(self, record):
        """Load a thesis in the database.

        @type record: L{Record}
        @param record:

        @rtype: int
        @return: one when the record is inserted / updated in the database
        zero otherwise.

        """
        db = self.db

        # alias
        defense_date = record.these_defense()
        first_author = record.first_author()
        id_category = get_id(db.categories, code='PHD')
        oai_url = record.oai_url()
        title = record.title()
42
        universities = ', '.join(record.these_universities())
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101

        # extract the year from the defense date
        # this approach seems the most reliable
        year = re.search(r"(\d\d\d\d)", defense_date).group(1)

        # check against already published thesis
        rec_id, status = self.check_by_origin(oai_url=oai_url, year=year)
        if rec_id:
            return status

        rec_id, status = self.check_by_fields(first_author=first_author,
                                              defense=defense_date,
                                              id_projects=self.id_project,
                                              id_teams=self.id_team,
                                              oai_url=oai_url,
                                              title=title,
                                              year=year)
        if rec_id:
            return status

        # eventually insert a new thesis
        if not self.dry_run:
            db.publications.insert(authors=first_author,
                                   authors_institute=first_author,
                                   defense=defense_date,
                                   directors=record.these_directors(),
                                   first_author=first_author,
                                   id_categories=id_category,
                                   id_teams=self.id_team,
                                   id_projects=self.id_project,
                                   id_status=UNDEF_ID,
                                   origin=oai_url,
                                   publication_url=record.paper_url(),
                                   submitted=record.submitted()[0],
                                   title=title,
                                   universities=universities,
                                   year=year)

        self.logs[-1].load(MSG_LOAD, year)
        return 1

    def select_record(self, record):
        """C{True} when thesis is signed by a CPPM author.

        @type record: L{Record}
        @param record:

        """
        if not PublicationsTool.select_record(self, record):
            return False

        try:
            self.check.my_authors(record,
                                  reference=self._my_author_list(record),
                                  cmpFct=family_name_fr)

            self.check.oai(record)
            self.check.submitted(record)
            self.check.year(record)
102
            self.check.format_universities(record)
103 104 105 106 107

        except CheckException as e:
            self.logs[-1].reject(e, record.year())
            return False

108 109
        except Exception as e:
            self.logs[-1].reject(MSG_CRASH % e, record.year(), translate=False)
110 111 112 113 114 115 116 117 118 119 120
            print traceback.format_exc()
            return False

        if self.dbg:
            print "select thesis record"

        if record.is_thesis():
            return True

        self.logs[-1].reject(MSG_NO_THESIS, record.year())
        return False