Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

check_tools.py 10.9 KB
Newer Older
LE GAC Renaud's avatar
LE GAC Renaud committed
1 2 3 4 5 6 7
# -*- coding: utf-8 -*-
"""a collection of tools to check rows.

@author: R. Le Gac

"""
import re
8
import regex
LE GAC Renaud's avatar
LE GAC Renaud committed
9 10

from gluon import current
11 12
from plugin_dbui import (UNDEF,
                         UNDEF_ID,
13
                         get_where_query)
LE GAC Renaud's avatar
LE GAC Renaud committed
14 15


16 17
# syntax for the submission date YYYY-MM or YYYY-MM-DD
REG_SUBMITTED = re.compile(regex.REG_SUBMITTED)
LE GAC Renaud's avatar
LE GAC Renaud committed
18

19
# HTML code like >
LE GAC Renaud's avatar
LE GAC Renaud committed
20 21 22 23
REG_HTML = re.compile('&[a-z]+;')


def check_publication(row):
24
    """Check the publication fields.
25

LE GAC Renaud's avatar
LE GAC Renaud committed
26
    @type row: gluon.dal.Row
27
    @param row: record defining a publication. Its contains the publications
LE GAC Renaud's avatar
LE GAC Renaud committed
28
    table as well as its reference tables.
29

30
    @rtype: tuple
31
    @return:
32 33
        - the first element contains the list of message
        - the second one contains the list of duplicate ids.
34

LE GAC Renaud's avatar
LE GAC Renaud committed
35
    """
36
    T, li, idset = current.T, [], set()
37

LE GAC Renaud's avatar
LE GAC Renaud committed
38 39 40 41 42 43 44 45 46
    # status code
    if row.status.code == '???':
        text = T("The status is ???")
        li.append(text)

    # category
    if row.categories.code == UNDEF:
        text = T("The category is undefined")
        li.append(text)
47

LE GAC Renaud's avatar
LE GAC Renaud committed
48 49 50 51 52 53 54 55 56
    # team
    if row.publications.id_teams == UNDEF_ID:
        text = T("The team is undefined")
        li.append(text)

    # project
    if row.publications.id_projects == UNDEF_ID:
        text = T("The project is undefined")
        li.append(text)
57 58

    # authors list
LE GAC Renaud's avatar
LE GAC Renaud committed
59 60 61 62 63
    if 'et al' in row.publications.authors:
        text = T("'et al.' in authors")
        li.append(text)

    # CPPM authors (team name, ...)
LE GAC Renaud's avatar
LE GAC Renaud committed
64 65
    if row.teams.team in row.publications.authors_institute:
        text = T("The institute authors contains the team name?")
LE GAC Renaud's avatar
LE GAC Renaud committed
66
        li.append(text)
67

LE GAC Renaud's avatar
LE GAC Renaud committed
68
    # submitted date
69
    if not row.publications.submitted:
LE GAC Renaud's avatar
LE GAC Renaud committed
70 71 72
        text = T("Submitted date is not defined")
        li.append(text)

73 74
    if row.publications.submitted:
        if not REG_SUBMITTED.match(row.publications.submitted):
LE GAC Renaud's avatar
LE GAC Renaud committed
75 76
            text = T("Submitted date is not valid")
            li.append(text)
77

78 79 80
    # publication URL
    if row.publications.publication_url:
        if 'pdf' not in row.publications.publication_url:
LE GAC Renaud's avatar
LE GAC Renaud committed
81 82
            text = \
                T("Check that the publication URL corresponds to a pdf file.")
83
            li.append(text)
84

LE GAC Renaud's avatar
LE GAC Renaud committed
85 86 87 88 89
    # latex syntax
    title = row.publications.title
    rules = "√" in title or \
            ("^" in title and "$" not in title) or \
            ("→" in title and "$" not in title) or \
90 91
            ("->" in title) or \
            ("s**(1/2)" in title) or \
LE GAC Renaud's avatar
LE GAC Renaud committed
92
            REG_HTML.search(title)
93

LE GAC Renaud's avatar
LE GAC Renaud committed
94 95 96
    if rules:
        text = T("Check latex syntax in the title")
        li.append(text)
97

LE GAC Renaud's avatar
LE GAC Renaud committed
98 99 100 101 102
    # "Note :" in report number
    value = row.publications.report_numbers
    rules = "Note :" in value or \
            "Note:" in value or \
            ";" in value
103

LE GAC Renaud's avatar
LE GAC Renaud committed
104 105 106
    if rules:
        text = T('Report numbers contains "Note :" or ";"')
        li.append(text)
107

108 109
    # duplicate by origin
    ids = duplicate_origin(row.publications)
110
    if len(ids):
111 112
        idset = idset.union(ids)
        text = T("Entries with duplicate origin")
113
        li.append(text)
114

LE GAC Renaud's avatar
LE GAC Renaud committed
115 116
    # specific fields for article
    if row.categories.usual == 'article':
117

LE GAC Renaud's avatar
LE GAC Renaud committed
118 119 120
        if row.publications.id_publishers == UNDEF_ID:
            text = T("Publishers is not defined")
            li.append(text)
121

LE GAC Renaud's avatar
LE GAC Renaud committed
122 123 124
        if not row.publications.volume:
            text = T("Volume number is not defined")
            li.append(text)
125

LE GAC Renaud's avatar
LE GAC Renaud committed
126 127 128 129 130 131 132
        if not row.publications.pages:
            text = T("Pages range is not defined")
            li.append(text)

        if not row.publications.preprint:
            text = T("Preprint number is not defined")
            li.append(text)
133

134
        ids = duplicate_article(row.publications)
135
        if ids:
136 137
            idset = idset.union(ids)
            text = T("Possible duplicate entries")
138 139
            li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
    # specific fields for proceeding and talk
    if row.categories.usual in ('proceeding', 'talk'):

        if not row.publications.conference_title:
            text = T("Conference title is not defined")
            li.append(text)

        if not row.publications.conference_dates:
            text = T("Conference dates is not defined")
            li.append(text)

        if not row.publications.conference_town:
            text = T("Conference town is not defined")
            li.append(text)

        if not row.publications.id_countries:
            text = T("Conference country is not defined")
            li.append(text)

        if not row.publications.conference_speaker:
            text = T("Conference speaker is missing")
            li.append(text)
162

163
        ids = duplicate_conference(row.publications)
164
        if ids:
165 166
            idset = idset.union(ids)
            text = T("Possible duplicate entries")
167
            li.append(text)
168

LE GAC Renaud's avatar
LE GAC Renaud committed
169 170
    # specific fields for report
    if row.categories.usual == 'report':
171

LE GAC Renaud's avatar
LE GAC Renaud committed
172 173 174
        if not row.publications.report_numbers:
            text = T("Report number is missing")
            li.append(text)
175

176
        ids = duplicate_report(row.publications)
177
        if ids:
178 179
            idset = idset.union(ids)
            text = T("Possible duplicate entries")
180
            li.append(text)
181

182
    return (li, list(idset))
183 184


185
def extend_ids(db, query, ids):
186
    """helper functions
187

188 189
    @type db: gluon.dal.DAL
    @param db:
190

191
    @type query: gluon.dal.query
192 193
    @param query:

194 195
    @type ids: list of string
    @param ids: the current list of ids
196

197 198
    @note: the current list of publication ids will be extend by those
    corresponding to the C{query}. The id are unique in the list.
199

200
    """
201 202 203 204 205 206
    set_records = db(query)
    if set_records.count():
        for row in set_records.select():
            id_rec = str(row.publications.id)
            if id_rec not in ids:
                ids.append(id_rec)
207

208

209
def duplicate_article(publication):
210
    """Look for duplicate article.
211 212
    The comparison is performed on article published by the given team
    using the following criteria:
213

214 215 216
        - title, publishers, volume and pages
        - publisher, volume and pages
        - publisher and title
217

218 219
    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
220

221 222
    @rtype: list
    @return: list of ids corresponding to duplicate entries
223

224 225 226
    """
    ids = []
    db = current.globalenv['db']
227

LE GAC Renaud's avatar
LE GAC Renaud committed
228 229
    categories = db.categories
    publications = db.pulications
230

LE GAC Renaud's avatar
LE GAC Renaud committed
231 232 233 234
    qcat = (categories.code == 'ACL') | (categories.code == 'ACLN')
    qpub = publications.id_publishers == publication['id_publishers']

    qmain = get_where_query(publications)
235
    qmain = ((qmain) & (qcat))
LE GAC Renaud's avatar
LE GAC Renaud committed
236 237
    qmain = ((qmain) & (publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (qpub))
238

239
    if 'id' in publication and publication['id']:
LE GAC Renaud's avatar
LE GAC Renaud committed
240
        qmain = ((qmain) & (publications.id != publication['id']))
241

242
    # title, publishers, volume and pages
LE GAC Renaud's avatar
LE GAC Renaud committed
243 244 245
    query = ((qmain) & (publications.title == publication['title']))
    query = ((query) & (publications.volume == publication['volume']))
    query = ((query) & (publications.pages == publication['pages']))
246
    extend_ids(db, query, ids)
247

248
    # publisher, volume, pages and year
LE GAC Renaud's avatar
LE GAC Renaud committed
249 250 251
    query = ((qmain) & (publications.volume == publication['volume']))
    query = ((query) & (publications.pages == publication['pages']))
    query = ((query) & (publications.year == publication['year']))
252
    extend_ids(db, query, ids)
253

254
    # publisher and title
LE GAC Renaud's avatar
LE GAC Renaud committed
255
    query = ((qmain) & (publications.title == publication['title']))
256
    extend_ids(db, query, ids)
257 258 259 260

    return ids


261
def duplicate_conference(publication):
262
    """Look for duplicate talk / proceeding.
263
    The comparison is performed on conference talk/proceeding published
264
    by the given team using the following criteria:
265

266 267 268
        - title, conference title, conference date and conference town
        - title, conference date and conference town
        - title, conference title and conference town
269

270 271
    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
272

273 274
    @rtype: list
    @return: list of ids corresponding to duplicate entries
275

276 277 278
    """
    ids = []
    db = current.globalenv['db']
279

LE GAC Renaud's avatar
LE GAC Renaud committed
280 281 282 283 284 285
    categories = db.categories
    publications = db.publications

    qcat = (categories.code == 'ACTI') | \
           (categories.code == 'ACTN') | \
           (categories.code == 'COM')
286

LE GAC Renaud's avatar
LE GAC Renaud committed
287
    qmain = get_where_query(publications)
288
    qmain = ((qmain) & (qcat))
LE GAC Renaud's avatar
LE GAC Renaud committed
289 290
    qmain = ((qmain) & (publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (publications.title == publication['title']))
291

292
    if 'id' in publication and publication['id']:
LE GAC Renaud's avatar
LE GAC Renaud committed
293
        qmain = ((qmain) & (publications.id != publication['id']))
294

295
    # title, conference title, conference date and conference town
LE GAC Renaud's avatar
LE GAC Renaud committed
296 297 298 299 300 301 302
    qtitle = publications.conference_title == publication['conference_title']
    qdates = publications.conference_dates == publication['conference_dates']
    qtown = publications.conference_town == publication['conference_town']

    query = ((qmain) & (qtitle))
    query = ((query) & (qdates))
    query = ((query) & (qtown))
303
    extend_ids(db, query, ids)
304 305

    # title, conference date and conference town
LE GAC Renaud's avatar
LE GAC Renaud committed
306 307
    query = ((query) & (qdates))
    query = ((query) & (qtown))
308 309
    extend_ids(db, query, ids)

310
    # title, conference title and conference town
LE GAC Renaud's avatar
LE GAC Renaud committed
311 312
    query = ((qmain) & (qtitle))
    query = ((query) & (qtown))
313 314
    extend_ids(db, query, ids)

315 316 317 318 319 320 321 322
    return ids


def duplicate_origin(publication):
    """Look for publications with the same value in the origin field.

    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
323

324 325
    @rtype: list
    @return: list of ids corresponding to duplicate entries
326

327 328 329 330
    """
    ids = []
    db = current.globalenv['db']

LE GAC Renaud's avatar
LE GAC Renaud committed
331 332
    publications = db.publications

333 334 335 336 337
    # protection against empty origin field
    if not publication['origin']:
        return ids

    # look for publication with the same origin field
LE GAC Renaud's avatar
LE GAC Renaud committed
338 339
    query = publications.id != publication['id']
    query = ((query) & (publications.origin == publication['origin']))
340
    set_records = db(query)
341

342 343
    if set_records.count():
        for row in set_records.select():
344
            ids.append(str(row.id))
345

346 347 348
    return ids


349
def duplicate_report(publication):
350
    """Look for duplicate report.
351
    The comparison is performed on report published by the given team
352
    using the following criteria:
353

354
        - title
355

356 357
    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
358

359 360
    @rtype: list
    @return: list of ids corresponding to duplicate entries
361

362 363 364
    """
    ids = []
    db = current.globalenv['db']
365

LE GAC Renaud's avatar
LE GAC Renaud committed
366
    publications = db.publications
367

LE GAC Renaud's avatar
LE GAC Renaud committed
368 369 370 371
    qmain = get_where_query(publications)
    qmain = ((qmain) & (db.categories.code == 'AP'))
    qmain = ((qmain) & (publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (publications.title == publication['title']))
372

373
    if 'id' in publication and publication['id']:
LE GAC Renaud's avatar
LE GAC Renaud committed
374
        qmain = ((qmain) & (publications.id != publication['id']))
375 376 377 378

    extend_ids(db, qmain, ids)

    return ids