check_tools.py 11 KB
Newer Older
LE GAC Renaud's avatar
LE GAC Renaud committed
1 2 3 4 5
# -*- coding: utf-8 -*-
"""a collection of tools to check rows.

"""
import re
6
import regex
LE GAC Renaud's avatar
LE GAC Renaud committed
7 8

from gluon import current
9 10
from plugin_dbui import (UNDEF,
                         UNDEF_ID,
11
                         get_id,
12
                         get_where_query)
LE GAC Renaud's avatar
LE GAC Renaud committed
13 14


15 16
# syntax for the submission date YYYY-MM or YYYY-MM-DD
REG_SUBMITTED = re.compile(regex.REG_SUBMITTED)
LE GAC Renaud's avatar
LE GAC Renaud committed
17

18
# HTML code like >
LE GAC Renaud's avatar
LE GAC Renaud committed
19 20 21 22
REG_HTML = re.compile('&[a-z]+;')


def check_publication(row):
23
    """Check the publication fields.
24

25 26 27
    Args:
        row (gluon.dal.Row): record defining a publication.
            Its contains the publications table as well as its foreign tables.
28

29 30 31 32
    Returns:
        tuple:
            * the first element contains the list of message
            * the second one contains the list of duplicate *ids*.
33

LE GAC Renaud's avatar
LE GAC Renaud committed
34
    """
35
    T, li, idset = current.T, [], set()
36

LE GAC Renaud's avatar
LE GAC Renaud committed
37 38 39 40 41 42 43 44 45
    # status code
    if row.status.code == '???':
        text = T("The status is ???")
        li.append(text)

    # category
    if row.categories.code == UNDEF:
        text = T("The category is undefined")
        li.append(text)
46

LE GAC Renaud's avatar
LE GAC Renaud committed
47 48 49 50 51 52 53 54 55
    # team
    if row.publications.id_teams == UNDEF_ID:
        text = T("The team is undefined")
        li.append(text)

    # project
    if row.publications.id_projects == UNDEF_ID:
        text = T("The project is undefined")
        li.append(text)
56 57

    # authors list
LE GAC Renaud's avatar
LE GAC Renaud committed
58 59 60 61 62
    if 'et al' in row.publications.authors:
        text = T("'et al.' in authors")
        li.append(text)

    # CPPM authors (team name, ...)
LE GAC Renaud's avatar
LE GAC Renaud committed
63 64
    if row.teams.team in row.publications.authors_institute:
        text = T("The institute authors contains the team name?")
LE GAC Renaud's avatar
LE GAC Renaud committed
65
        li.append(text)
66

LE GAC Renaud's avatar
LE GAC Renaud committed
67
    # submitted date
68
    if not row.publications.submitted:
LE GAC Renaud's avatar
LE GAC Renaud committed
69 70 71
        text = T("Submitted date is not defined")
        li.append(text)

72 73
    if row.publications.submitted:
        if not REG_SUBMITTED.match(row.publications.submitted):
LE GAC Renaud's avatar
LE GAC Renaud committed
74 75
            text = T("Submitted date is not valid")
            li.append(text)
76

77 78 79
    # publication URL
    if row.publications.publication_url:
        if 'pdf' not in row.publications.publication_url:
LE GAC Renaud's avatar
LE GAC Renaud committed
80 81
            text = \
                T("Check that the publication URL corresponds to a pdf file.")
82
            li.append(text)
83

LE GAC Renaud's avatar
LE GAC Renaud committed
84 85 86 87 88
    # latex syntax
    title = row.publications.title
    rules = "√" in title or \
            ("^" in title and "$" not in title) or \
            ("→" in title and "$" not in title) or \
89 90
            ("->" in title) or \
            ("s**(1/2)" in title) or \
LE GAC Renaud's avatar
LE GAC Renaud committed
91
            REG_HTML.search(title)
92

LE GAC Renaud's avatar
LE GAC Renaud committed
93 94 95
    if rules:
        text = T("Check latex syntax in the title")
        li.append(text)
96

LE GAC Renaud's avatar
LE GAC Renaud committed
97 98 99 100 101
    # "Note :" in report number
    value = row.publications.report_numbers
    rules = "Note :" in value or \
            "Note:" in value or \
            ";" in value
102

LE GAC Renaud's avatar
LE GAC Renaud committed
103 104 105
    if rules:
        text = T('Report numbers contains "Note :" or ";"')
        li.append(text)
106

107 108
    # duplicate by origin
    ids = duplicate_origin(row.publications)
109
    if len(ids):
110 111
        idset = idset.union(ids)
        text = T("Entries with duplicate origin")
112
        li.append(text)
113

LE GAC Renaud's avatar
LE GAC Renaud committed
114
    # specific fields for article
115
    if row.categories.code in ('ACL', 'ACLN'):
116

LE GAC Renaud's avatar
LE GAC Renaud committed
117 118 119
        if row.publications.id_publishers == UNDEF_ID:
            text = T("Publishers is not defined")
            li.append(text)
120

LE GAC Renaud's avatar
LE GAC Renaud committed
121 122 123
        if not row.publications.volume:
            text = T("Volume number is not defined")
            li.append(text)
124

LE GAC Renaud's avatar
LE GAC Renaud committed
125 126 127 128 129 130 131
        if not row.publications.pages:
            text = T("Pages range is not defined")
            li.append(text)

        if not row.publications.preprint:
            text = T("Preprint number is not defined")
            li.append(text)
132

133
        ids = duplicate_article(row.publications)
134
        if ids:
135 136
            idset = idset.union(ids)
            text = T("Possible duplicate entries")
137 138
            li.append(text)

139 140 141 142 143 144 145
    # specific fields for preprint
    if row.categories.code == 'PRE':

        if not row.publications.preprint:
            text = T("Preprint number is not defined")
            li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
    # specific fields for proceeding and talk
    if row.categories.usual in ('proceeding', 'talk'):

        if not row.publications.conference_title:
            text = T("Conference title is not defined")
            li.append(text)

        if not row.publications.conference_dates:
            text = T("Conference dates is not defined")
            li.append(text)

        if not row.publications.conference_town:
            text = T("Conference town is not defined")
            li.append(text)

        if not row.publications.id_countries:
            text = T("Conference country is not defined")
            li.append(text)

        if not row.publications.conference_speaker:
            text = T("Conference speaker is missing")
            li.append(text)
168

169
        ids = duplicate_conference(row.publications)
170
        if ids:
171 172
            idset = idset.union(ids)
            text = T("Possible duplicate entries")
173
            li.append(text)
174

LE GAC Renaud's avatar
LE GAC Renaud committed
175 176
    # specific fields for report
    if row.categories.usual == 'report':
177

LE GAC Renaud's avatar
LE GAC Renaud committed
178 179 180
        if not row.publications.report_numbers:
            text = T("Report number is missing")
            li.append(text)
181

182
        ids = duplicate_report(row.publications)
183
        if ids:
184 185
            idset = idset.union(ids)
            text = T("Possible duplicate entries")
186
            li.append(text)
187

188
    return (li, list(idset))
189 190


191
def extend_ids(db, query, ids):
192
    """Extend list of publication *ids* with those found by the *query*.
193

194 195
    Note:
        The *id* are unique in the list.
196

197 198 199 200
    Args:
        db (gluon.dal.DAL): database connection.
        query (gluon.dal.query): database query
        ids (list of string): the current list of *ids*
201

202
    """
203 204 205 206 207 208
    set_records = db(query)
    if set_records.count():
        for row in set_records.select():
            id_rec = str(row.publications.id)
            if id_rec not in ids:
                ids.append(id_rec)
209

210

211
def duplicate_article(publication):
212
    """Look for duplicate article.
213

214 215
    The comparison is performed on article published by the given team
    using the following criteria:
216

217 218 219
        * title, publishers, volume and pages
        * publisher, volume and pages
        * publisher and title
220

221 222 223
    Args:
        publication (dict or gluon.storage.Storage): contains the
            publication fields and theirs values.
224

225 226
    Returns:
        list: list of *ids* corresponding to duplicate entries.
227

228 229 230
    """
    ids = []
    db = current.globalenv['db']
231

LE GAC Renaud's avatar
LE GAC Renaud committed
232
    categories = db.categories
233
    publications = db.publications
234

LE GAC Renaud's avatar
LE GAC Renaud committed
235 236 237 238
    qcat = (categories.code == 'ACL') | (categories.code == 'ACLN')
    qpub = publications.id_publishers == publication['id_publishers']

    qmain = get_where_query(publications)
239
    qmain = ((qmain) & (qcat))
LE GAC Renaud's avatar
LE GAC Renaud committed
240 241
    qmain = ((qmain) & (publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (qpub))
242

243
    if 'id' in publication and publication['id']:
LE GAC Renaud's avatar
LE GAC Renaud committed
244
        qmain = ((qmain) & (publications.id != publication['id']))
245

246
    # title, publishers, volume and pages
LE GAC Renaud's avatar
LE GAC Renaud committed
247 248 249
    query = ((qmain) & (publications.title == publication['title']))
    query = ((query) & (publications.volume == publication['volume']))
    query = ((query) & (publications.pages == publication['pages']))
250
    extend_ids(db, query, ids)
251

252
    # publisher, volume, pages and year
LE GAC Renaud's avatar
LE GAC Renaud committed
253 254 255
    query = ((qmain) & (publications.volume == publication['volume']))
    query = ((query) & (publications.pages == publication['pages']))
    query = ((query) & (publications.year == publication['year']))
256
    extend_ids(db, query, ids)
257

258
    # publisher and title
LE GAC Renaud's avatar
LE GAC Renaud committed
259
    query = ((qmain) & (publications.title == publication['title']))
260
    extend_ids(db, query, ids)
261 262 263 264

    return ids


265
def duplicate_conference(publication):
266
    """Look for duplicate talk / proceeding.
267

268
    The comparison is performed on conference talk/proceeding published
269
    by the given team using the following criteria:
270

271 272 273
        * title, conference title, conference date and conference town
        * title, conference date and conference town
        * title, conference title and conference town
274

275 276 277
    Args:
        publication (dict or gluon.storage.Storage): contains the publication
            fields and theirs values.
278

279 280
    Returns:
        list: list of *ids* corresponding to duplicate entries.
281

282 283 284
    """
    ids = []
    db = current.globalenv['db']
285

LE GAC Renaud's avatar
LE GAC Renaud committed
286 287 288
    categories = db.categories
    publications = db.publications

289 290 291 292 293
    if publications.id_categories == get_id(db.categories, code='COM'):
        qcat = categories.code == 'COM'

    else:
        qcat = (categories.code == 'ACTI') | (categories.code == 'ACTN')
294

LE GAC Renaud's avatar
LE GAC Renaud committed
295
    qmain = get_where_query(publications)
296 297 298
    qmain &= qcat
    qmain &= publications.id_teams == publication['id_teams']
    qmain &= publications.title == publication['title']
299

300
    if 'id' in publication and publication['id']:
301
        qmain &= publications.id != publication['id']
302

303
    # title, conference title, conference date and conference town
LE GAC Renaud's avatar
LE GAC Renaud committed
304 305 306 307
    qtitle = publications.conference_title == publication['conference_title']
    qdates = publications.conference_dates == publication['conference_dates']
    qtown = publications.conference_town == publication['conference_town']

308
    query = ((qmain) & (qtitle) & (qdates) & (qtown))
309
    extend_ids(db, query, ids)
310 311

    # title, conference date and conference town
312
    query = ((qmain) & (qdates) & (qtown))
313 314
    extend_ids(db, query, ids)

315
    # title, conference title and conference town
316
    query = ((qmain) & (qtitle) & (qtown))
317 318
    extend_ids(db, query, ids)

319 320 321 322 323 324
    return ids


def duplicate_origin(publication):
    """Look for publications with the same value in the origin field.

325 326 327
    Args:
        publication (dict or gluon.storage.Storage): contains the publication
            fields and theirs values.
328

329 330
    Returns:
        list: list of *ids* corresponding to duplicate entries.
331

332 333 334 335
    """
    ids = []
    db = current.globalenv['db']

LE GAC Renaud's avatar
LE GAC Renaud committed
336 337
    publications = db.publications

338 339 340 341 342
    # protection against empty origin field
    if not publication['origin']:
        return ids

    # look for publication with the same origin field
LE GAC Renaud's avatar
LE GAC Renaud committed
343 344
    query = publications.id != publication['id']
    query = ((query) & (publications.origin == publication['origin']))
345
    set_records = db(query)
346

347 348
    if set_records.count():
        for row in set_records.select():
349
            ids.append(str(row.id))
350

351 352 353
    return ids


354
def duplicate_report(publication):
355
    """Look for duplicate report.
356

357 358
    The comparison is performed on report published by the given team
    using the *title*.
359

360 361 362
    Args:
        publication (dict or gluon.storage.Storage): contains the publication
            fields and theirs values.
363

364 365
    Returns:
        list: list of *ids* corresponding to duplicate entries.
366

367 368 369
    """
    ids = []
    db = current.globalenv['db']
370

LE GAC Renaud's avatar
LE GAC Renaud committed
371
    publications = db.publications
372

LE GAC Renaud's avatar
LE GAC Renaud committed
373 374 375 376
    qmain = get_where_query(publications)
    qmain = ((qmain) & (db.categories.code == 'AP'))
    qmain = ((qmain) & (publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (publications.title == publication['title']))
377

378
    if 'id' in publication and publication['id']:
LE GAC Renaud's avatar
LE GAC Renaud committed
379
        qmain = ((qmain) & (publications.id != publication['id']))
380 381 382 383

    extend_ids(db, qmain, ids)

    return ids