proceedings.py 5.05 KB
Newer Older
1 2 3 4 5 6 7
# -*- coding: utf-8 -*-
""" harvest_tools.proceedings

"""
import traceback


8
from automaton import Automaton
LE GAC Renaud's avatar
LE GAC Renaud committed
9
from base import family_name_fr, format_author_fr, MSG_CRASH, MSG_LOAD
10
from checkandfix import CheckException
11
from plugin_dbui import UNDEF_ID
12 13


14
class Proceedings(Automaton):
15
    """Automaton for conference proceedings.
16 17

    """
18 19 20
    def check_record(self, record):
        """Check the content of the proceeding in order to fix non conformities.

21 22
        Args:
            record (RecordConf): record describing a proceeding.
23

24 25 26
        Returns:
            bool: ``False`` when a non conformity is found and can not be
                corrected.
27 28 29 30 31 32

        """
        if not Automaton.check_record(self, record):
            return False

        if self.dbg:
LE GAC Renaud's avatar
LE GAC Renaud committed
33
            print "check proceeding record"
34 35 36

        try:
            self.check.is_conference(record)
37
            self.check.country(record)
38
            self.check.conference_date(record, self.harvester.host)
39 40 41 42 43

            self.check.clean_erratum(record)
            self.check.submitted(record)
            self.check.year(record)

44
            self.check.format_editor(record)
45
            self.check.publisher(record)
46 47
            self.check.paper_reference(record)

48 49 50
            self.check.format_authors(record, format_author_fr)
            self.check.get_my_authors(record, family_name_fr)

51
        except CheckException as e:
52
            self.logs[-1].reject(e, record=record)
53 54 55
            return False

        except Exception as e:
56
            self.logs[-1].reject(MSG_CRASH % e, record=record, translate=False)
57 58 59 60 61
            print traceback.format_exc()
            return False

        return True

62 63
    def insert_record(self, record):
        """Insert a conference proceeding in the database.
64

65 66
        Args:
            record (RecordConf): record describing a proceeding.
67

68 69 70
        Returns:
            int: one when the record is inserted / updated in the database
                zero otherwise.
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95

        """
        # alias
        oai_url = record.oai_url()
        year = record.paper_year()

        # protection against proceeding not published in a journal
        if not year:
            year = record.year()

        # alias
        authors = record.authors()
        editor = record.paper_editor()
        pages = record.paper_pages()
        preprint = record.preprint_number()
        report_numbers = record.report_number()
        submitted = record.submitted()[0]
        title = record.title()
        url = record.paper_url()
        volume = record.paper_volume()

        # alias for the conference information
        conference_dates = record.conference_dates()
        conference_title = record.conference_title()
        first_author = record.first_author()
96
        id_country = self.search_country(record.conference_country())
97

98 99 100
        # get the collaboration/publisher identifiers
        id_collaboration = self.search_collaboration(record.collaboration())
        id_publisher = self.search_publisher(editor)
101

102
        # get an already published proceeding
LE GAC Renaud's avatar
LE GAC Renaud committed
103 104 105 106 107 108 109 110 111 112
        fields = dict(authors=authors,
                      conference_title=conference_title,
                      first_author=first_author,
                      id_publishers=id_publisher,
                      preprint=preprint,
                      pages=pages,
                      publication_url=url,
                      report_numbers=report_numbers,
                      submitted=submitted,
                      volume=volume,
113
                      title=title)
LE GAC Renaud's avatar
LE GAC Renaud committed
114

115
        rec_id, status = self.get_record_by_fields(oai_url, year, **fields)
116 117 118 119
        if rec_id:
            return status

        # eventually insert a new proceeding
120
        ret = 1
121 122
        if not self.dry_run:

LE GAC Renaud's avatar
LE GAC Renaud committed
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
            fields = dict(authors=authors,
                          authors_institute=record.my_authors,
                          conference_dates=conference_dates,
                          conference_speaker=first_author,
                          conference_title=conference_title,
                          conference_town=record.conference_town(),
                          conference_url=record.conference_url(),
                          first_author=first_author,
                          id_categories=self.id_category,
                          id_collaborations=id_collaboration,
                          id_countries=id_country,
                          id_projects=self.id_project,
                          id_publishers=id_publisher,
                          id_status=UNDEF_ID,
                          id_teams=self.id_team,
                          origin=oai_url,
                          pages=pages,
                          preprint=preprint,
                          publication_url=url,
                          report_numbers=report_numbers,
                          submitted=submitted,
                          title=title,
                          volume=volume,
                          year=year)

            ret = self._insert_in_db(log_year=year, **fields)
149

150 151 152 153 154
        if ret == 1:
            self.logs[-1].load(MSG_LOAD, year)
            return 1

        return 0