check_tools.py 11 KB
Newer Older
LE GAC Renaud's avatar
LE GAC Renaud committed
1 2 3 4 5
"""a collection of tools to check rows.

"""
import re

6
from . import regex
LE GAC Renaud's avatar
LE GAC Renaud committed
7
from gluon import current
8 9
from plugin_dbui import (UNDEF,
                         UNDEF_ID,
10
                         get_where_query)
LE GAC Renaud's avatar
LE GAC Renaud committed
11 12


13 14
# syntax for the submission date YYYY-MM or YYYY-MM-DD
REG_SUBMITTED = re.compile(regex.REG_SUBMITTED)
LE GAC Renaud's avatar
LE GAC Renaud committed
15

16
# HTML code like >
LE GAC Renaud's avatar
LE GAC Renaud committed
17
REG_HTML = re.compile("&[a-z]+;")
LE GAC Renaud's avatar
LE GAC Renaud committed
18 19 20


def check_publication(row):
21
    """Check the publication fields.
22

23 24
    Args:
        row (gluon.dal.Row): record defining a publication.
LE GAC Renaud's avatar
LE GAC Renaud committed
25
            Its contains the publication table as well as its foreign tables.
26

27 28 29 30
    Returns:
        tuple:
            * the first element contains the list of message
            * the second one contains the list of duplicate *ids*.
31

LE GAC Renaud's avatar
LE GAC Renaud committed
32
    """
33
    T, li, idset = current.T, [], set()
34

LE GAC Renaud's avatar
LE GAC Renaud committed
35 36 37 38 39
    categories = row.categories
    category_code = categories.code
    category_usual = categories.usual
    publication = row.publications

LE GAC Renaud's avatar
LE GAC Renaud committed
40
    # status code
LE GAC Renaud's avatar
LE GAC Renaud committed
41
    if row.status.code == "???":
LE GAC Renaud's avatar
LE GAC Renaud committed
42 43 44 45
        text = T("The status is ???")
        li.append(text)

    # category
LE GAC Renaud's avatar
LE GAC Renaud committed
46
    if category_code == UNDEF:
LE GAC Renaud's avatar
LE GAC Renaud committed
47 48
        text = T("The category is undefined")
        li.append(text)
49

LE GAC Renaud's avatar
LE GAC Renaud committed
50
    # team
LE GAC Renaud's avatar
LE GAC Renaud committed
51
    if publication.id_teams == UNDEF_ID:
LE GAC Renaud's avatar
LE GAC Renaud committed
52 53 54 55
        text = T("The team is undefined")
        li.append(text)

    # project
LE GAC Renaud's avatar
LE GAC Renaud committed
56
    if publication.id_projects == UNDEF_ID:
LE GAC Renaud's avatar
LE GAC Renaud committed
57 58
        text = T("The project is undefined")
        li.append(text)
59 60

    # authors list
LE GAC Renaud's avatar
LE GAC Renaud committed
61
    if "et al" in publication.authors:
LE GAC Renaud's avatar
LE GAC Renaud committed
62 63 64 65
        text = T("'et al.' in authors")
        li.append(text)

    # CPPM authors (team name, ...)
LE GAC Renaud's avatar
LE GAC Renaud committed
66
    if row.teams.team in publication.authors_institute:
LE GAC Renaud's avatar
LE GAC Renaud committed
67
        text = T("The institute authors contains the team name?")
LE GAC Renaud's avatar
LE GAC Renaud committed
68
        li.append(text)
69

LE GAC Renaud's avatar
LE GAC Renaud committed
70
    # submitted date
LE GAC Renaud's avatar
LE GAC Renaud committed
71 72
    submitted = publication.submitted
    if not submitted:
LE GAC Renaud's avatar
LE GAC Renaud committed
73 74 75
        text = T("Submitted date is not defined")
        li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
76 77
    if submitted:
        if not REG_SUBMITTED.match(submitted):
LE GAC Renaud's avatar
LE GAC Renaud committed
78 79
            text = T("Submitted date is not valid")
            li.append(text)
80

81
    # publication URL
LE GAC Renaud's avatar
LE GAC Renaud committed
82 83
    publication_url = publication.publication_url
    if publication_url:
LE GAC Renaud's avatar
LE GAC Renaud committed
84
        if "pdf" not in publication_url:
LE GAC Renaud's avatar
LE GAC Renaud committed
85 86
            text = \
                T("Check that the publication URL corresponds to a pdf file.")
87
            li.append(text)
88

LE GAC Renaud's avatar
LE GAC Renaud committed
89
    # latex syntax
LE GAC Renaud's avatar
LE GAC Renaud committed
90
    title = publication.title
LE GAC Renaud's avatar
LE GAC Renaud committed
91 92 93
    rules = "√" in title or \
            ("^" in title and "$" not in title) or \
            ("→" in title and "$" not in title) or \
94 95
            ("->" in title) or \
            ("s**(1/2)" in title) or \
LE GAC Renaud's avatar
LE GAC Renaud committed
96
            REG_HTML.search(title)
97

LE GAC Renaud's avatar
LE GAC Renaud committed
98 99 100
    if rules:
        text = T("Check latex syntax in the title")
        li.append(text)
101

LE GAC Renaud's avatar
LE GAC Renaud committed
102
    # "Note :" in report number
LE GAC Renaud's avatar
LE GAC Renaud committed
103
    value = publication.report_numbers
LE GAC Renaud's avatar
LE GAC Renaud committed
104 105 106
    rules = "Note :" in value or \
            "Note:" in value or \
            ";" in value
107

LE GAC Renaud's avatar
LE GAC Renaud committed
108 109 110
    if rules:
        text = T('Report numbers contains "Note :" or ";"')
        li.append(text)
111

112
    # duplicate by origin
LE GAC Renaud's avatar
LE GAC Renaud committed
113
    ids = duplicate_origin(publication)
114
    if len(ids):
115 116
        idset = idset.union(ids)
        text = T("Entries with duplicate origin")
117
        li.append(text)
118

LE GAC Renaud's avatar
LE GAC Renaud committed
119
    # specific fields for article
LE GAC Renaud's avatar
LE GAC Renaud committed
120
    if category_code in ("ACL", "ACLN"):
121

LE GAC Renaud's avatar
LE GAC Renaud committed
122
        if publication.id_publishers == UNDEF_ID:
LE GAC Renaud's avatar
LE GAC Renaud committed
123 124
            text = T("Publishers is not defined")
            li.append(text)
125

LE GAC Renaud's avatar
LE GAC Renaud committed
126
        if not publication.volume:
LE GAC Renaud's avatar
LE GAC Renaud committed
127 128
            text = T("Volume number is not defined")
            li.append(text)
129

LE GAC Renaud's avatar
LE GAC Renaud committed
130
        if not publication.pages:
LE GAC Renaud's avatar
LE GAC Renaud committed
131 132 133
            text = T("Pages range is not defined")
            li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
134
        if not publication.preprint:
LE GAC Renaud's avatar
LE GAC Renaud committed
135 136
            text = T("Preprint number is not defined")
            li.append(text)
137

LE GAC Renaud's avatar
LE GAC Renaud committed
138
        ids = duplicate_article(publication)
139
        if ids:
140 141
            idset = idset.union(ids)
            text = T("Possible duplicate entries")
142 143
            li.append(text)

144
    # specific fields for preprint
LE GAC Renaud's avatar
LE GAC Renaud committed
145
    if category_code == "PRE":
146

LE GAC Renaud's avatar
LE GAC Renaud committed
147
        if not publication.preprint:
148 149 150
            text = T("Preprint number is not defined")
            li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
151
    # specific fields for proceeding and talk
LE GAC Renaud's avatar
LE GAC Renaud committed
152
    if category_usual in ("proceeding", "talk"):
LE GAC Renaud's avatar
LE GAC Renaud committed
153

LE GAC Renaud's avatar
LE GAC Renaud committed
154
        if not publication.conference_title:
LE GAC Renaud's avatar
LE GAC Renaud committed
155 156 157
            text = T("Conference title is not defined")
            li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
158
        if not publication.conference_dates:
LE GAC Renaud's avatar
LE GAC Renaud committed
159 160 161
            text = T("Conference dates is not defined")
            li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
162
        if not publication.conference_town:
LE GAC Renaud's avatar
LE GAC Renaud committed
163 164 165
            text = T("Conference town is not defined")
            li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
166
        if not publication.id_countries:
LE GAC Renaud's avatar
LE GAC Renaud committed
167 168 169
            text = T("Conference country is not defined")
            li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
170
        if not publication.conference_speaker:
LE GAC Renaud's avatar
LE GAC Renaud committed
171 172
            text = T("Conference speaker is missing")
            li.append(text)
173

LE GAC Renaud's avatar
LE GAC Renaud committed
174
        ids = duplicate_conference(publication,
LE GAC Renaud's avatar
LE GAC Renaud committed
175
                                   category_usual == "proceeding")
176
        if ids:
177 178
            idset = idset.union(ids)
            text = T("Possible duplicate entries")
179
            li.append(text)
180

LE GAC Renaud's avatar
LE GAC Renaud committed
181
    # specific fields for report
LE GAC Renaud's avatar
LE GAC Renaud committed
182
    if category_usual == "report":
183

LE GAC Renaud's avatar
LE GAC Renaud committed
184
        if not publication.report_numbers:
LE GAC Renaud's avatar
LE GAC Renaud committed
185 186
            text = T("Report number is missing")
            li.append(text)
187

LE GAC Renaud's avatar
LE GAC Renaud committed
188
        ids = duplicate_report(publication)
189
        if ids:
190 191
            idset = idset.union(ids)
            text = T("Possible duplicate entries")
192
            li.append(text)
193

194
    return (li, list(idset))
195 196


197
def extend_ids(db, query, ids):
198
    """Extend list of publication *ids* with those found by the *query*.
199

200 201
    Note:
        The *id* are unique in the list.
202

203 204 205 206
    Args:
        db (gluon.dal.DAL): database connection.
        query (gluon.dal.query): database query
        ids (list of string): the current list of *ids*
207

208
    """
209 210 211 212 213 214
    set_records = db(query)
    if set_records.count():
        for row in set_records.select():
            id_rec = str(row.publications.id)
            if id_rec not in ids:
                ids.append(id_rec)
215

216

217
def duplicate_article(publication):
218
    """Look for duplicate article.
219

220 221
    The comparison is performed on article published by the given team
    using the following criteria:
222

223 224 225
        * title, publishers, volume and pages
        * publisher, volume and pages
        * publisher and title
226

227 228 229
    Args:
        publication (dict or gluon.storage.Storage): contains the
            publication fields and theirs values.
230

231 232
    Returns:
        list: list of *ids* corresponding to duplicate entries.
233

234 235
    """
    ids = []
236
    db = current.db
237

LE GAC Renaud's avatar
LE GAC Renaud committed
238
    categories = db.categories
239
    publications = db.publications
240

LE GAC Renaud's avatar
LE GAC Renaud committed
241 242
    qcat = (categories.code == "ACL") | (categories.code == "ACLN")
    qpub = publications.id_publishers == publication["id_publishers"]
LE GAC Renaud's avatar
LE GAC Renaud committed
243 244

    qmain = get_where_query(publications)
245
    qmain = ((qmain) & (qcat))
LE GAC Renaud's avatar
LE GAC Renaud committed
246
    qmain = ((qmain) & (publications.id_teams == publication["id_teams"]))
LE GAC Renaud's avatar
LE GAC Renaud committed
247
    qmain = ((qmain) & (qpub))
248

LE GAC Renaud's avatar
LE GAC Renaud committed
249 250
    if "id" in publication and publication["id"]:
        qmain = ((qmain) & (publications.id != publication["id"]))
251

252
    # title, publishers, volume and pages
LE GAC Renaud's avatar
LE GAC Renaud committed
253 254 255
    query = ((qmain) & (publications.title == publication["title"]))
    query = ((query) & (publications.volume == publication["volume"]))
    query = ((query) & (publications.pages == publication["pages"]))
256
    extend_ids(db, query, ids)
257

258
    # publisher, volume, pages and year
LE GAC Renaud's avatar
LE GAC Renaud committed
259 260 261
    query = ((qmain) & (publications.volume == publication["volume"]))
    query = ((query) & (publications.pages == publication["pages"]))
    query = ((query) & (publications.year == publication["year"]))
262
    extend_ids(db, query, ids)
263

264
    # publisher and title
LE GAC Renaud's avatar
LE GAC Renaud committed
265
    query = ((qmain) & (publications.title == publication["title"]))
266
    extend_ids(db, query, ids)
267 268 269 270

    return ids


271
def duplicate_conference(publication, proceeding=False):
272
    """Look for duplicate talk / proceeding.
273

274
    The comparison is performed on conference talk/proceeding published
275
    by the given team using the following criteria:
276

277 278 279
        * title, conference title, conference date and conference town
        * title, conference date and conference town
        * title, conference title and conference town
280

281 282 283
    Args:
        publication (dict or gluon.storage.Storage): contains the publication
            fields and theirs values.
284
        proceeding (bool): tag the publication either as talk or a proceeding.
285

286 287
    Returns:
        list: list of *ids* corresponding to duplicate entries.
288

289 290
    """
    ids = []
291
    db = current.db
292

LE GAC Renaud's avatar
LE GAC Renaud committed
293 294 295
    categories = db.categories
    publications = db.publications

296
    if proceeding:
LE GAC Renaud's avatar
LE GAC Renaud committed
297
        qcat = (categories.code == "ACTI") | (categories.code == "ACTN")
298 299

    else:
LE GAC Renaud's avatar
LE GAC Renaud committed
300
        qcat = categories.code == "COM"
301

LE GAC Renaud's avatar
LE GAC Renaud committed
302
    qmain = get_where_query(publications)
303
    qmain &= qcat
LE GAC Renaud's avatar
LE GAC Renaud committed
304 305
    qmain &= publications.id_teams == publication["id_teams"]
    qmain &= publications.title == publication["title"]
306

LE GAC Renaud's avatar
LE GAC Renaud committed
307 308
    if "id" in publication and publication["id"]:
        qmain &= publications.id != publication["id"]
309

310
    # title, conference title, conference date and conference town
LE GAC Renaud's avatar
LE GAC Renaud committed
311 312 313
    qtitle = publications.conference_title == publication["conference_title"]
    qdates = publications.conference_dates == publication["conference_dates"]
    qtown = publications.conference_town == publication["conference_town"]
LE GAC Renaud's avatar
LE GAC Renaud committed
314

315
    query = ((qmain) & (qtitle) & (qdates) & (qtown))
316
    extend_ids(db, query, ids)
317 318

    # title, conference date and conference town
319
    query = ((qmain) & (qdates) & (qtown))
320 321
    extend_ids(db, query, ids)

322
    # title, conference title and conference town
323
    query = ((qmain) & (qtitle) & (qtown))
324 325
    extend_ids(db, query, ids)

326 327 328 329 330 331
    return ids


def duplicate_origin(publication):
    """Look for publications with the same value in the origin field.

332 333 334
    Args:
        publication (dict or gluon.storage.Storage): contains the publication
            fields and theirs values.
335

336 337
    Returns:
        list: list of *ids* corresponding to duplicate entries.
338

339 340
    """
    ids = []
341
    db = current.db
342

LE GAC Renaud's avatar
LE GAC Renaud committed
343 344
    publications = db.publications

345
    # protection against empty origin field
LE GAC Renaud's avatar
LE GAC Renaud committed
346
    if not publication["origin"]:
347 348 349
        return ids

    # look for publication with the same origin field
LE GAC Renaud's avatar
LE GAC Renaud committed
350 351
    query = publications.id != publication["id"]
    query = ((query) & (publications.origin == publication["origin"]))
352
    set_records = db(query)
353

354 355
    if set_records.count():
        for row in set_records.select():
356
            ids.append(str(row.id))
357

358 359 360
    return ids


361
def duplicate_report(publication):
362
    """Look for duplicate report.
363

364 365
    The comparison is performed on report published by the given team
    using the *title*.
366

367 368 369
    Args:
        publication (dict or gluon.storage.Storage): contains the publication
            fields and theirs values.
370

371 372
    Returns:
        list: list of *ids* corresponding to duplicate entries.
373

374 375
    """
    ids = []
376
    db = current.db
377

LE GAC Renaud's avatar
LE GAC Renaud committed
378
    publications = db.publications
379

LE GAC Renaud's avatar
LE GAC Renaud committed
380
    qmain = get_where_query(publications)
LE GAC Renaud's avatar
LE GAC Renaud committed
381 382 383
    qmain = ((qmain) & (db.categories.code == "AP"))
    qmain = ((qmain) & (publications.id_teams == publication["id_teams"]))
    qmain = ((qmain) & (publications.title == publication["title"]))
384

LE GAC Renaud's avatar
LE GAC Renaud committed
385 386
    if "id" in publication and publication["id"]:
        qmain = ((qmain) & (publications.id != publication["id"]))
387 388 389 390

    extend_ids(db, qmain, ids)

    return ids