preprints.py 4.07 KB
Newer Older
1 2 3 4 5 6
""" harvest_tools.preprints

"""
import traceback


7 8 9 10
from .automaton import Automaton
from .base import MSG_CRASH, MSG_LOAD
from .checkandfix import CheckException
from .invenio_tools import RecordConf, RecordThesis
11 12 13
from plugin_dbui import UNDEF_ID


14 15 16 17
MSG_PREPRINT_IS_PAPER = "Reject preprint is a published paper"
MSG_PREPRINT_IS_CONFERENCE = "Reject preprint is a conference"
MSG_PREPRINT_IS_THESIS = "Reject preprint is a thesis"
MSG_PREPRINT_NO_NUMBER = "Reject no preprint number"
18 19


20
class Preprints(Automaton):
21
    """Automaton for preprints.
22 23

    """
24 25 26
    def check_record(self, record):
        """Check the content of the preprint in order to fix non conformities.

27 28
        Args:
            record (RecordPubli): record describing a preprint.
29

30 31 32
        Returns:
            bool: ``False`` when a non conformity is found and can not be
                corrected.
33 34 35 36 37 38

        """
        if not Automaton.check_record(self, record):
            return False

        if self.dbg:
39
            print("check preprint record")
40 41

        if record.is_published():
42
            self.logs[-1].reject(MSG_PREPRINT_IS_PAPER, record=record)
43 44 45
            return False

        if isinstance(record, RecordConf):
46
            self.logs[-1].reject(MSG_PREPRINT_IS_CONFERENCE, record=record)
47 48 49
            return False

        if isinstance(record, RecordThesis):
50
            self.logs[-1].reject(MSG_PREPRINT_IS_THESIS, record=record)
51 52 53
            return False

        if not record.preprint_number():
54
            self.logs[-1].reject(MSG_PREPRINT_NO_NUMBER, record=record)
55 56 57 58 59 60
            return False

        try:
            self.check.submitted(record)
            self.check.year(record)

61 62
            self.check.format_authors(record, fmt="F. Last")
            self.check.get_my_authors(record, sort=True)
63

64
        except CheckException as e:
65
            self.logs[-1].reject(e, record=record)
66 67 68
            return False

        except Exception as e:
69
            self.logs[-1].reject(MSG_CRASH % e, record=record, translate=False)
70
            print(traceback.format_exc())
71 72 73
            return False

        return True
74

75 76
    def insert_record(self, record):
        """Insert a preprint in the database.
77

78 79
        Args:
            record (RecordPubli): record describing a preprint.
80

81 82 83
        Returns:
            int: one when the record is inserted / updated in the database
                zero otherwise.
84 85 86 87 88 89 90 91 92 93

        """
        # alias
        first_author = record.first_author()
        oai_url = record.oai_url()
        preprint = record.preprint_number()
        title = record.title()
        submitted = record.submitted()[0]
        year = record.year()

94
        # get the collaboration identifier
95
        id_collaboration = self.search_collaboration(record.collaboration())
96

97
        # get existing preprint or article
LE GAC Renaud's avatar
LE GAC Renaud committed
98 99 100 101 102
        fields = dict(first_author=first_author,
                      id_projects=self.id_project,
                      id_teams=self.id_team,
                      preprint=preprint,
                      submitted=submitted,
103
                      title=title)
LE GAC Renaud's avatar
LE GAC Renaud committed
104

105
        rec_id, status = self.get_record_by_fields(oai_url, year, **fields)
106 107 108 109
        if rec_id:
            return status

        # eventually insert a new preprint
110
        ret = 1
111
        if not self.dry_run:
LE GAC Renaud's avatar
LE GAC Renaud committed
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
            fields = dict(authors=record.authors(),
                          authors_institute=record.my_authors,
                          first_author=first_author,
                          id_categories=self.id_category,
                          id_collaborations=id_collaboration,
                          id_projects=self.id_project,
                          id_status=UNDEF_ID,
                          id_teams=self.id_team,
                          origin=oai_url,
                          preprint=preprint,
                          publication_url=record.paper_url(),
                          submitted=submitted,
                          title=title,
                          year=year)

            ret = self._insert_in_db(log_year=year, **fields)
128 129 130 131 132 133

        if ret == 1:
            self.logs[-1].load(MSG_LOAD, year)
            return 1

        return 0