check_tools.py 11.1 KB
Newer Older
LE GAC Renaud's avatar
LE GAC Renaud committed
1 2 3 4 5
# -*- coding: utf-8 -*-
"""a collection of tools to check rows.

"""
import re
6
import regex
LE GAC Renaud's avatar
LE GAC Renaud committed
7 8

from gluon import current
9 10
from plugin_dbui import (UNDEF,
                         UNDEF_ID,
11
                         get_where_query)
LE GAC Renaud's avatar
LE GAC Renaud committed
12 13


14 15
# syntax for the submission date YYYY-MM or YYYY-MM-DD
REG_SUBMITTED = re.compile(regex.REG_SUBMITTED)
LE GAC Renaud's avatar
LE GAC Renaud committed
16

17
# HTML code like >
LE GAC Renaud's avatar
LE GAC Renaud committed
18 19 20 21
REG_HTML = re.compile('&[a-z]+;')


def check_publication(row):
22
    """Check the publication fields.
23

24 25
    Args:
        row (gluon.dal.Row): record defining a publication.
LE GAC Renaud's avatar
LE GAC Renaud committed
26
            Its contains the publication table as well as its foreign tables.
27

28 29 30 31
    Returns:
        tuple:
            * the first element contains the list of message
            * the second one contains the list of duplicate *ids*.
32

LE GAC Renaud's avatar
LE GAC Renaud committed
33
    """
34
    T, li, idset = current.T, [], set()
35

LE GAC Renaud's avatar
LE GAC Renaud committed
36 37 38 39 40
    categories = row.categories
    category_code = categories.code
    category_usual = categories.usual
    publication = row.publications

LE GAC Renaud's avatar
LE GAC Renaud committed
41 42 43 44 45 46
    # status code
    if row.status.code == '???':
        text = T("The status is ???")
        li.append(text)

    # category
LE GAC Renaud's avatar
LE GAC Renaud committed
47
    if category_code == UNDEF:
LE GAC Renaud's avatar
LE GAC Renaud committed
48 49
        text = T("The category is undefined")
        li.append(text)
50

LE GAC Renaud's avatar
LE GAC Renaud committed
51
    # team
LE GAC Renaud's avatar
LE GAC Renaud committed
52
    if publication.id_teams == UNDEF_ID:
LE GAC Renaud's avatar
LE GAC Renaud committed
53 54 55 56
        text = T("The team is undefined")
        li.append(text)

    # project
LE GAC Renaud's avatar
LE GAC Renaud committed
57
    if publication.id_projects == UNDEF_ID:
LE GAC Renaud's avatar
LE GAC Renaud committed
58 59
        text = T("The project is undefined")
        li.append(text)
60 61

    # authors list
LE GAC Renaud's avatar
LE GAC Renaud committed
62
    if 'et al' in publication.authors:
LE GAC Renaud's avatar
LE GAC Renaud committed
63 64 65 66
        text = T("'et al.' in authors")
        li.append(text)

    # CPPM authors (team name, ...)
LE GAC Renaud's avatar
LE GAC Renaud committed
67
    if row.teams.team in publication.authors_institute:
LE GAC Renaud's avatar
LE GAC Renaud committed
68
        text = T("The institute authors contains the team name?")
LE GAC Renaud's avatar
LE GAC Renaud committed
69
        li.append(text)
70

LE GAC Renaud's avatar
LE GAC Renaud committed
71
    # submitted date
LE GAC Renaud's avatar
LE GAC Renaud committed
72 73
    submitted = publication.submitted
    if not submitted:
LE GAC Renaud's avatar
LE GAC Renaud committed
74 75 76
        text = T("Submitted date is not defined")
        li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
77 78
    if submitted:
        if not REG_SUBMITTED.match(submitted):
LE GAC Renaud's avatar
LE GAC Renaud committed
79 80
            text = T("Submitted date is not valid")
            li.append(text)
81

82
    # publication URL
LE GAC Renaud's avatar
LE GAC Renaud committed
83 84 85
    publication_url = publication.publication_url
    if publication_url:
        if 'pdf' not in publication_url:
LE GAC Renaud's avatar
LE GAC Renaud committed
86 87
            text = \
                T("Check that the publication URL corresponds to a pdf file.")
88
            li.append(text)
89

LE GAC Renaud's avatar
LE GAC Renaud committed
90
    # latex syntax
LE GAC Renaud's avatar
LE GAC Renaud committed
91
    title = publication.title
LE GAC Renaud's avatar
LE GAC Renaud committed
92 93 94
    rules = "√" in title or \
            ("^" in title and "$" not in title) or \
            ("→" in title and "$" not in title) or \
95 96
            ("->" in title) or \
            ("s**(1/2)" in title) or \
LE GAC Renaud's avatar
LE GAC Renaud committed
97
            REG_HTML.search(title)
98

LE GAC Renaud's avatar
LE GAC Renaud committed
99 100 101
    if rules:
        text = T("Check latex syntax in the title")
        li.append(text)
102

LE GAC Renaud's avatar
LE GAC Renaud committed
103
    # "Note :" in report number
LE GAC Renaud's avatar
LE GAC Renaud committed
104
    value = publication.report_numbers
LE GAC Renaud's avatar
LE GAC Renaud committed
105 106 107
    rules = "Note :" in value or \
            "Note:" in value or \
            ";" in value
108

LE GAC Renaud's avatar
LE GAC Renaud committed
109 110 111
    if rules:
        text = T('Report numbers contains "Note :" or ";"')
        li.append(text)
112

113
    # duplicate by origin
LE GAC Renaud's avatar
LE GAC Renaud committed
114
    ids = duplicate_origin(publication)
115
    if len(ids):
116 117
        idset = idset.union(ids)
        text = T("Entries with duplicate origin")
118
        li.append(text)
119

LE GAC Renaud's avatar
LE GAC Renaud committed
120
    # specific fields for article
LE GAC Renaud's avatar
LE GAC Renaud committed
121
    if category_code in ('ACL', 'ACLN'):
122

LE GAC Renaud's avatar
LE GAC Renaud committed
123
        if publication.id_publishers == UNDEF_ID:
LE GAC Renaud's avatar
LE GAC Renaud committed
124 125
            text = T("Publishers is not defined")
            li.append(text)
126

LE GAC Renaud's avatar
LE GAC Renaud committed
127
        if not publication.volume:
LE GAC Renaud's avatar
LE GAC Renaud committed
128 129
            text = T("Volume number is not defined")
            li.append(text)
130

LE GAC Renaud's avatar
LE GAC Renaud committed
131
        if not publication.pages:
LE GAC Renaud's avatar
LE GAC Renaud committed
132 133 134
            text = T("Pages range is not defined")
            li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
135
        if not publication.preprint:
LE GAC Renaud's avatar
LE GAC Renaud committed
136 137
            text = T("Preprint number is not defined")
            li.append(text)
138

LE GAC Renaud's avatar
LE GAC Renaud committed
139
        ids = duplicate_article(publication)
140
        if ids:
141 142
            idset = idset.union(ids)
            text = T("Possible duplicate entries")
143 144
            li.append(text)

145
    # specific fields for preprint
LE GAC Renaud's avatar
LE GAC Renaud committed
146
    if category_code == 'PRE':
147

LE GAC Renaud's avatar
LE GAC Renaud committed
148
        if not publication.preprint:
149 150 151
            text = T("Preprint number is not defined")
            li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
152
    # specific fields for proceeding and talk
LE GAC Renaud's avatar
LE GAC Renaud committed
153
    if category_usual in ('proceeding', 'talk'):
LE GAC Renaud's avatar
LE GAC Renaud committed
154

LE GAC Renaud's avatar
LE GAC Renaud committed
155
        if not publication.conference_title:
LE GAC Renaud's avatar
LE GAC Renaud committed
156 157 158
            text = T("Conference title is not defined")
            li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
159
        if not publication.conference_dates:
LE GAC Renaud's avatar
LE GAC Renaud committed
160 161 162
            text = T("Conference dates is not defined")
            li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
163
        if not publication.conference_town:
LE GAC Renaud's avatar
LE GAC Renaud committed
164 165 166
            text = T("Conference town is not defined")
            li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
167
        if not publication.id_countries:
LE GAC Renaud's avatar
LE GAC Renaud committed
168 169 170
            text = T("Conference country is not defined")
            li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
171
        if not publication.conference_speaker:
LE GAC Renaud's avatar
LE GAC Renaud committed
172 173
            text = T("Conference speaker is missing")
            li.append(text)
174

LE GAC Renaud's avatar
LE GAC Renaud committed
175 176
        ids = duplicate_conference(publication,
                                   category_usual == 'proceeding')
177
        if ids:
178 179
            idset = idset.union(ids)
            text = T("Possible duplicate entries")
180
            li.append(text)
181

LE GAC Renaud's avatar
LE GAC Renaud committed
182
    # specific fields for report
LE GAC Renaud's avatar
LE GAC Renaud committed
183
    if category_usual == 'report':
184

LE GAC Renaud's avatar
LE GAC Renaud committed
185
        if not publication.report_numbers:
LE GAC Renaud's avatar
LE GAC Renaud committed
186 187
            text = T("Report number is missing")
            li.append(text)
188

LE GAC Renaud's avatar
LE GAC Renaud committed
189
        ids = duplicate_report(publication)
190
        if ids:
191 192
            idset = idset.union(ids)
            text = T("Possible duplicate entries")
193
            li.append(text)
194

195
    return (li, list(idset))
196 197


198
def extend_ids(db, query, ids):
199
    """Extend list of publication *ids* with those found by the *query*.
200

201 202
    Note:
        The *id* are unique in the list.
203

204 205 206 207
    Args:
        db (gluon.dal.DAL): database connection.
        query (gluon.dal.query): database query
        ids (list of string): the current list of *ids*
208

209
    """
210 211 212 213 214 215
    set_records = db(query)
    if set_records.count():
        for row in set_records.select():
            id_rec = str(row.publications.id)
            if id_rec not in ids:
                ids.append(id_rec)
216

217

218
def duplicate_article(publication):
219
    """Look for duplicate article.
220

221 222
    The comparison is performed on article published by the given team
    using the following criteria:
223

224 225 226
        * title, publishers, volume and pages
        * publisher, volume and pages
        * publisher and title
227

228 229 230
    Args:
        publication (dict or gluon.storage.Storage): contains the
            publication fields and theirs values.
231

232 233
    Returns:
        list: list of *ids* corresponding to duplicate entries.
234

235 236 237
    """
    ids = []
    db = current.globalenv['db']
238

LE GAC Renaud's avatar
LE GAC Renaud committed
239
    categories = db.categories
240
    publications = db.publications
241

LE GAC Renaud's avatar
LE GAC Renaud committed
242 243 244 245
    qcat = (categories.code == 'ACL') | (categories.code == 'ACLN')
    qpub = publications.id_publishers == publication['id_publishers']

    qmain = get_where_query(publications)
246
    qmain = ((qmain) & (qcat))
LE GAC Renaud's avatar
LE GAC Renaud committed
247 248
    qmain = ((qmain) & (publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (qpub))
249

250
    if 'id' in publication and publication['id']:
LE GAC Renaud's avatar
LE GAC Renaud committed
251
        qmain = ((qmain) & (publications.id != publication['id']))
252

253
    # title, publishers, volume and pages
LE GAC Renaud's avatar
LE GAC Renaud committed
254 255 256
    query = ((qmain) & (publications.title == publication['title']))
    query = ((query) & (publications.volume == publication['volume']))
    query = ((query) & (publications.pages == publication['pages']))
257
    extend_ids(db, query, ids)
258

259
    # publisher, volume, pages and year
LE GAC Renaud's avatar
LE GAC Renaud committed
260 261 262
    query = ((qmain) & (publications.volume == publication['volume']))
    query = ((query) & (publications.pages == publication['pages']))
    query = ((query) & (publications.year == publication['year']))
263
    extend_ids(db, query, ids)
264

265
    # publisher and title
LE GAC Renaud's avatar
LE GAC Renaud committed
266
    query = ((qmain) & (publications.title == publication['title']))
267
    extend_ids(db, query, ids)
268 269 270 271

    return ids


272
def duplicate_conference(publication, proceeding=False):
273
    """Look for duplicate talk / proceeding.
274

275
    The comparison is performed on conference talk/proceeding published
276
    by the given team using the following criteria:
277

278 279 280
        * title, conference title, conference date and conference town
        * title, conference date and conference town
        * title, conference title and conference town
281

282 283 284
    Args:
        publication (dict or gluon.storage.Storage): contains the publication
            fields and theirs values.
285
        proceeding (bool): tag the publication either as talk or a proceeding.
286

287 288
    Returns:
        list: list of *ids* corresponding to duplicate entries.
289

290 291 292
    """
    ids = []
    db = current.globalenv['db']
293

LE GAC Renaud's avatar
LE GAC Renaud committed
294 295 296
    categories = db.categories
    publications = db.publications

297 298
    if proceeding:
        qcat = (categories.code == 'ACTI') | (categories.code == 'ACTN')
299 300

    else:
301
        qcat = categories.code == 'COM'
302

LE GAC Renaud's avatar
LE GAC Renaud committed
303
    qmain = get_where_query(publications)
304 305 306
    qmain &= qcat
    qmain &= publications.id_teams == publication['id_teams']
    qmain &= publications.title == publication['title']
307

308
    if 'id' in publication and publication['id']:
309
        qmain &= publications.id != publication['id']
310

311
    # title, conference title, conference date and conference town
LE GAC Renaud's avatar
LE GAC Renaud committed
312 313 314 315
    qtitle = publications.conference_title == publication['conference_title']
    qdates = publications.conference_dates == publication['conference_dates']
    qtown = publications.conference_town == publication['conference_town']

316
    query = ((qmain) & (qtitle) & (qdates) & (qtown))
317
    extend_ids(db, query, ids)
318 319

    # title, conference date and conference town
320
    query = ((qmain) & (qdates) & (qtown))
321 322
    extend_ids(db, query, ids)

323
    # title, conference title and conference town
324
    query = ((qmain) & (qtitle) & (qtown))
325 326
    extend_ids(db, query, ids)

327 328 329 330 331 332
    return ids


def duplicate_origin(publication):
    """Look for publications with the same value in the origin field.

333 334 335
    Args:
        publication (dict or gluon.storage.Storage): contains the publication
            fields and theirs values.
336

337 338
    Returns:
        list: list of *ids* corresponding to duplicate entries.
339

340 341 342 343
    """
    ids = []
    db = current.globalenv['db']

LE GAC Renaud's avatar
LE GAC Renaud committed
344 345
    publications = db.publications

346 347 348 349 350
    # protection against empty origin field
    if not publication['origin']:
        return ids

    # look for publication with the same origin field
LE GAC Renaud's avatar
LE GAC Renaud committed
351 352
    query = publications.id != publication['id']
    query = ((query) & (publications.origin == publication['origin']))
353
    set_records = db(query)
354

355 356
    if set_records.count():
        for row in set_records.select():
357
            ids.append(str(row.id))
358

359 360 361
    return ids


362
def duplicate_report(publication):
363
    """Look for duplicate report.
364

365 366
    The comparison is performed on report published by the given team
    using the *title*.
367

368 369 370
    Args:
        publication (dict or gluon.storage.Storage): contains the publication
            fields and theirs values.
371

372 373
    Returns:
        list: list of *ids* corresponding to duplicate entries.
374

375 376 377
    """
    ids = []
    db = current.globalenv['db']
378

LE GAC Renaud's avatar
LE GAC Renaud committed
379
    publications = db.publications
380

LE GAC Renaud's avatar
LE GAC Renaud committed
381 382 383 384
    qmain = get_where_query(publications)
    qmain = ((qmain) & (db.categories.code == 'AP'))
    qmain = ((qmain) & (publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (publications.title == publication['title']))
385

386
    if 'id' in publication and publication['id']:
LE GAC Renaud's avatar
LE GAC Renaud committed
387
        qmain = ((qmain) & (publications.id != publication['id']))
388 389 390 391

    extend_ids(db, qmain, ids)

    return ids