thesis.py 3.78 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# -*- coding: utf-8 -*-
""" harvest_tools.thesis

"""
import re
import traceback


from base import family_name_fr, fix_amu, MSG_CRASH, MSG_LOAD
from invenio_tools import CheckException
from publicationstool import PublicationsTool
from plugin_dbui import get_id, UNDEF_ID


15
MSG_NO_THESIS = "Reject not a thesis record"
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106


class Thesis(PublicationsTool):
    """Publications tool for thesis.

    """

    def load_db(self, record):
        """Load a thesis in the database.

        @type record: L{Record}
        @param record:

        @rtype: int
        @return: one when the record is inserted / updated in the database
        zero otherwise.

        """
        db = self.db

        # alias
        defense_date = record.these_defense()
        first_author = record.first_author()
        id_category = get_id(db.categories, code='PHD')
        oai_url = record.oai_url()
        title = record.title()
        universities = fix_amu(record)

        # extract the year from the defense date
        # this approach seems the most reliable
        year = re.search(r"(\d\d\d\d)", defense_date).group(1)

        # check against already published thesis
        rec_id, status = self.check_by_origin(oai_url=oai_url, year=year)
        if rec_id:
            return status

        rec_id, status = self.check_by_fields(first_author=first_author,
                                              defense=defense_date,
                                              id_projects=self.id_project,
                                              id_teams=self.id_team,
                                              oai_url=oai_url,
                                              title=title,
                                              year=year)
        if rec_id:
            return status

        # eventually insert a new thesis
        if not self.dry_run:
            db.publications.insert(authors=first_author,
                                   authors_institute=first_author,
                                   defense=defense_date,
                                   directors=record.these_directors(),
                                   first_author=first_author,
                                   id_categories=id_category,
                                   id_teams=self.id_team,
                                   id_projects=self.id_project,
                                   id_status=UNDEF_ID,
                                   origin=oai_url,
                                   publication_url=record.paper_url(),
                                   submitted=record.submitted()[0],
                                   title=title,
                                   universities=universities,
                                   year=year)

        self.logs[-1].load(MSG_LOAD, year)
        return 1

    def select_record(self, record):
        """C{True} when thesis is signed by a CPPM author.

        @type record: L{Record}
        @param record:

        """
        if not PublicationsTool.select_record(self, record):
            return False

        try:
            self.check.my_authors(record,
                                  reference=self._my_author_list(record),
                                  cmpFct=family_name_fr)

            self.check.oai(record)
            self.check.submitted(record)
            self.check.year(record)

        except CheckException as e:
            self.logs[-1].reject(e, record.year())
            return False

107 108
        except Exception as e:
            self.logs[-1].reject(MSG_CRASH % e, record.year(), translate=False)
109 110 111 112 113 114 115 116 117 118 119
            print traceback.format_exc()
            return False

        if self.dbg:
            print "select thesis record"

        if record.is_thesis():
            return True

        self.logs[-1].reject(MSG_NO_THESIS, record.year())
        return False