proceedings.py 4.99 KB
Newer Older
1 2 3 4 5 6
""" harvest_tools.proceedings

"""
import traceback


7 8 9
from .automaton import Automaton
from .base import MSG_CRASH, MSG_LOAD
from .checkandfix import CheckException
10
from plugin_dbui import UNDEF_ID
11 12


13
class Proceedings(Automaton):
14
    """Automaton for conference proceedings.
15 16

    """
17 18 19
    def check_record(self, record):
        """Check the content of the proceeding in order to fix non conformities.

20 21
        Args:
            record (RecordConf): record describing a proceeding.
22

23 24 25
        Returns:
            bool: ``False`` when a non conformity is found and can not be
                corrected.
26 27 28 29 30 31

        """
        if not Automaton.check_record(self, record):
            return False

        if self.dbg:
32
            print("check proceeding record")
33 34 35

        try:
            self.check.is_conference(record)
36
            self.check.country(record)
37
            self.check.conference_date(record, self.harvester.host)
38 39 40 41 42

            self.check.clean_erratum(record)
            self.check.submitted(record)
            self.check.year(record)

43
            self.check.format_editor(record)
44
            self.check.publisher(record)
45 46
            self.check.paper_reference(record)

47 48
            self.check.format_authors(record, fmt="F. Last")
            self.check.get_my_authors(record, sort=True)
49

50
        except CheckException as e:
51
            self.logs[-1].reject(e, record=record)
52 53 54
            return False

        except Exception as e:
55
            self.logs[-1].reject(MSG_CRASH % e, record=record, translate=False)
56
            print(traceback.format_exc())
57 58 59 60
            return False

        return True

61 62
    def insert_record(self, record):
        """Insert a conference proceeding in the database.
63

64 65
        Args:
            record (RecordConf): record describing a proceeding.
66

67 68 69
        Returns:
            int: one when the record is inserted / updated in the database
                zero otherwise.
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94

        """
        # alias
        oai_url = record.oai_url()
        year = record.paper_year()

        # protection against proceeding not published in a journal
        if not year:
            year = record.year()

        # alias
        authors = record.authors()
        editor = record.paper_editor()
        pages = record.paper_pages()
        preprint = record.preprint_number()
        report_numbers = record.report_number()
        submitted = record.submitted()[0]
        title = record.title()
        url = record.paper_url()
        volume = record.paper_volume()

        # alias for the conference information
        conference_dates = record.conference_dates()
        conference_title = record.conference_title()
        first_author = record.first_author()
95
        id_country = self.search_country(record.conference_country())
96

97 98 99
        # get the collaboration/publisher identifiers
        id_collaboration = self.search_collaboration(record.collaboration())
        id_publisher = self.search_publisher(editor)
100

101
        # get an already published proceeding
LE GAC Renaud's avatar
LE GAC Renaud committed
102 103 104 105 106 107 108 109 110 111
        fields = dict(authors=authors,
                      conference_title=conference_title,
                      first_author=first_author,
                      id_publishers=id_publisher,
                      preprint=preprint,
                      pages=pages,
                      publication_url=url,
                      report_numbers=report_numbers,
                      submitted=submitted,
                      volume=volume,
112
                      title=title)
LE GAC Renaud's avatar
LE GAC Renaud committed
113

114
        rec_id, status = self.get_record_by_fields(oai_url, year, **fields)
115 116 117 118
        if rec_id:
            return status

        # eventually insert a new proceeding
119
        ret = 1
120 121
        if not self.dry_run:

LE GAC Renaud's avatar
LE GAC Renaud committed
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
            fields = dict(authors=authors,
                          authors_institute=record.my_authors,
                          conference_dates=conference_dates,
                          conference_speaker=first_author,
                          conference_title=conference_title,
                          conference_town=record.conference_town(),
                          conference_url=record.conference_url(),
                          first_author=first_author,
                          id_categories=self.id_category,
                          id_collaborations=id_collaboration,
                          id_countries=id_country,
                          id_projects=self.id_project,
                          id_publishers=id_publisher,
                          id_status=UNDEF_ID,
                          id_teams=self.id_team,
                          origin=oai_url,
                          pages=pages,
                          preprint=preprint,
                          publication_url=url,
                          report_numbers=report_numbers,
                          submitted=submitted,
                          title=title,
                          volume=volume,
                          year=year)

            ret = self._insert_in_db(log_year=year, **fields)
148

149 150 151 152 153
        if ret == 1:
            self.logs[-1].load(MSG_LOAD, year)
            return 1

        return 0