check_tools.py 11.3 KB
Newer Older
LE GAC Renaud's avatar
LE GAC Renaud committed
1 2 3 4 5 6 7 8 9
# -*- coding: utf-8 -*-
"""a collection of tools to check rows.

@author: R. Le Gac

"""
import re

from gluon import current
10 11 12 13
from plugin_dbui import (UNDEF, 
                         UNDEF_ID, 
                         get_id, 
                         get_where_query)
LE GAC Renaud's avatar
LE GAC Renaud committed
14 15 16 17 18 19 20 21 22 23


# syntax for the submission date YYYY or YYYY-MM or YYYY-MM-DD
REG_SUBMITTED = re.compile('^\d{4}(-\d{2})?(-\d{2})?$')

# HTML code like > 
REG_HTML = re.compile('&[a-z]+;')


def check_publication(row):
24
    """Check the publication fields.
LE GAC Renaud's avatar
LE GAC Renaud committed
25 26
    
    @type row: gluon.dal.Row
27
    @param row: record defining a publication. Its contains the publications
LE GAC Renaud's avatar
LE GAC Renaud committed
28 29
    table as well as its reference tables.
    
30 31 32 33
    @rtype: tuple
    @return: 
        - the first element contains the list of message
        - the second one contains the list of duplicate ids.
LE GAC Renaud's avatar
LE GAC Renaud committed
34 35
    
    """
36
    T, li, idset = current.T, [], set()
LE GAC Renaud's avatar
LE GAC Renaud committed
37 38 39 40 41 42 43 44 45 46
    
    # status code
    if row.status.code == '???':
        text = T("The status is ???")
        li.append(text)

    # category
    if row.categories.code == UNDEF:
        text = T("The category is undefined")
        li.append(text)
LE GAC Renaud's avatar
LE GAC Renaud committed
47 48 49 50 51 52 53 54 55 56 57
    
    # team
    if row.publications.id_teams == UNDEF_ID:
        text = T("The team is undefined")
        li.append(text)

    # project
    if row.publications.id_projects == UNDEF_ID:
        text = T("The project is undefined")
        li.append(text)
               
LE GAC Renaud's avatar
LE GAC Renaud committed
58 59 60 61 62 63
    # authors list    
    if 'et al' in row.publications.authors:
        text = T("'et al.' in authors")
        li.append(text)

    # CPPM authors (team name, ...)
LE GAC Renaud's avatar
LE GAC Renaud committed
64 65
    if row.teams.team in row.publications.authors_institute:
        text = T("The institute authors contains the team name?")
LE GAC Renaud's avatar
LE GAC Renaud committed
66 67 68 69 70 71 72 73 74 75 76 77
        li.append(text)
        
    # submitted date
    if not row.publications.submitted:                    
        text = T("Submitted date is not defined")
        li.append(text)

    if row.publications.submitted:                    
        if not REG_SUBMITTED.match(row.publications.submitted):                    
            text = T("Submitted date is not valid")
            li.append(text)
    
78 79 80 81 82 83
    # publication URL
    if row.publications.publication_url:
        if 'pdf' not in row.publications.publication_url:
            text = T("Check that the publication URL corresponds to a pdf file.")
            li.append(text)
            
LE GAC Renaud's avatar
LE GAC Renaud committed
84 85 86 87 88
    # latex syntax
    title = row.publications.title
    rules = "√" in title or \
            ("^" in title and "$" not in title) or \
            ("→" in title and "$" not in title) or \
89 90
            ("->" in title) or \
            ("s**(1/2)" in title) or \
LE GAC Renaud's avatar
LE GAC Renaud committed
91 92 93 94 95
            REG_HTML.search(title)
             
    if rules:
        text = T("Check latex syntax in the title")
        li.append(text)
LE GAC Renaud's avatar
LE GAC Renaud committed
96 97 98 99 100 101 102 103 104 105 106
    
    # "Note :" in report number
    value = row.publications.report_numbers
    rules = "Note :" in value or \
            "Note:" in value or \
            ";" in value
            
    if rules:
        text = T('Report numbers contains "Note :" or ";"')
        li.append(text)
        
107 108
    # duplicate by origin
    ids = duplicate_origin(row.publications)
109
    if len(ids):
110 111
        idset = idset.union(ids)
        text = T("Entries with duplicate origin")
112 113
        li.append(text)
    
LE GAC Renaud's avatar
LE GAC Renaud committed
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
    # specific fields for article
    if row.categories.usual == 'article':
        
        if row.publications.id_publishers == UNDEF_ID:
            text = T("Publishers is not defined")
            li.append(text)
            
        if not row.publications.volume:
            text = T("Volume number is not defined")
            li.append(text)
            
        if not row.publications.pages:
            text = T("Pages range is not defined")
            li.append(text)

        if not row.publications.preprint:
            text = T("Preprint number is not defined")
            li.append(text)
132

133
        ids = duplicate_article(row.publications)
134
        if ids:
135 136
            idset = idset.union(ids)
            text = T("Possible duplicate entries")
137 138
            li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
    # specific fields for proceeding and talk
    if row.categories.usual in ('proceeding', 'talk'):

        if not row.publications.conference_title:
            text = T("Conference title is not defined")
            li.append(text)

        if not row.publications.conference_dates:
            text = T("Conference dates is not defined")
            li.append(text)

        if not row.publications.conference_town:
            text = T("Conference town is not defined")
            li.append(text)

        if not row.publications.id_countries:
            text = T("Conference country is not defined")
            li.append(text)

        if not row.publications.conference_speaker:
            text = T("Conference speaker is missing")
            li.append(text)
161

162
        ids = duplicate_conference(row.publications)
163
        if ids:
164 165
            idset = idset.union(ids)
            text = T("Possible duplicate entries")
166
            li.append(text)
LE GAC Renaud's avatar
LE GAC Renaud committed
167 168 169 170 171 172 173
    
    # specific fields for report
    if row.categories.usual == 'report':
        
        if not row.publications.report_numbers:
            text = T("Report number is missing")
            li.append(text)
174

175
        ids = duplicate_report(row.publications)
176
        if ids:
177 178
            idset = idset.union(ids)
            text = T("Possible duplicate entries")
179
            li.append(text)
180
    
181
    return (li, list(idset))
182 183


184
def extend_ids(db, query, ids):
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
    """helper functions
    
    @type db: gluon.dal.DAL
    @param db:
    
    @type query: gluon.dal.query
    @param query: 
    
    @type ids: list of string
    @param ids: the current list of ids
    
    @note: the current list of publication ids will be extend by those
    corresponding to the C{query}. The id are unique in the list.
    
    """
    set = db(query)
    if set.count():
        for row in set.select():
            id = str(row.publications.id)
            if id not in ids:
                ids.append(id)

    
def duplicate_article(publication):
209
    """Look for duplicate article.
210 211
    The comparison is performed on article published by the given team
    using the following criteria:
212
    
213 214 215 216 217 218
        - title, publishers, volume and pages
        - publisher, volume and pages
        - publisher and title
        
    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
219 220 221 222 223 224 225 226
    
    @rtype: list
    @return: list of ids corresponding to duplicate entries
    
    """
    ids = []
    db = current.globalenv['db']
    
227 228
    qcat = (db.categories.code == 'ACL') | (db.categories.code == 'ACLN')
    
229
    qmain = get_where_query(db.publications)
230 231 232
    qmain = ((qmain) & (qcat))
    qmain = ((qmain) & (db.publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (db.publications.id_publishers == publication['id_publishers']))
233
    
234
    if 'id' in publication and publication['id']:
235
        qmain = ((qmain) & (db.publications.id != publication['id']))
236
    
237 238 239 240
    # title, publishers, volume and pages
    query = ((qmain) & (db.publications.title == publication['title']))
    query = ((query) & (db.publications.volume == publication['volume']))
    query = ((query) & (db.publications.pages == publication['pages']))
241
    extend_ids(db, query, ids)
242

243 244 245
    # publisher, volume and pages
    query = ((qmain) & (db.publications.volume == publication['volume']))
    query = ((query) & (db.publications.pages == publication['pages']))
246
    extend_ids(db, query, ids)
247 248 249
    
    # publisher and title
    query = ((qmain) & (db.publications.title == publication['title']))
250
    extend_ids(db, query, ids)
251 252 253 254

    return ids


255
def duplicate_conference(publication):
256
    """Look for duplicate talk / proceeding.
257 258
    The comparison is performed on conference talk/proceeding published 
    by the given team using the following criteria:
259
    
260 261 262 263 264 265
        - title, conference title, conference date and conference town
        - title, conference date and conference town
        - title, conference title and conference town
    
    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
266 267 268 269 270 271 272 273
    
    @rtype: list
    @return: list of ids corresponding to duplicate entries
    
    """
    ids = []
    db = current.globalenv['db']
    
274 275 276 277 278 279 280 281 282
    qcat = (db.categories.code == 'ACTI') | \
           (db.categories.code == 'ACTN') | \
           (db.categories.code == 'COM')
    
    qmain = get_where_query(db.publications)
    qmain = ((qmain) & (qcat))
    qmain = ((qmain) & (db.publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (db.publications.title == publication['title']))
    
283
    if 'id' in publication and publication['id']:
284 285 286 287 288 289
        qmain = ((qmain) & (db.publications.id != publication['id']))
    
    # title, conference title, conference date and conference town
    query = ((qmain) & (db.publications.conference_title == publication['conference_title']))
    query = ((query) & (db.publications.conference_dates == publication['conference_dates']))
    query = ((query) & (db.publications.conference_town == publication['conference_town']))
290
    extend_ids(db, query, ids)        
291 292 293 294

    # title, conference date and conference town
    query = ((query) & (db.publications.conference_dates == publication['conference_dates']))
    query = ((query) & (db.publications.conference_town == publication['conference_town']))
295
    extend_ids(db, query, ids)        
296 297 298 299
    
    # title, conference title and conference town
    query = ((qmain) & (db.publications.conference_title == publication['conference_title']))
    query = ((query) & (db.publications.conference_town == publication['conference_town']))
300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
    extend_ids(db, query, ids)        
    
    return ids


def duplicate_origin(publication):
    """Look for publications with the same value in the origin field.

    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
    
    @rtype: list
    @return: list of ids corresponding to duplicate entries
    
    """
    ids = []
    db = current.globalenv['db']

    # protection against empty origin field
    if not publication['origin']:
        return ids

    # look for publication with the same origin field
323 324
    query = db.publications.id != publication['id']
    query = ((query) & (db.publications.origin == publication['origin']))
325
    set = db(query)
326

327 328 329
    if set.count():
        for row in set.select():
            ids.append(str(row.id))
330 331 332 333
    
    return ids


334
def duplicate_report(publication):
335
    """Look for duplicate report.
336 337
    The comparison is performed on report published by the given team 
    using the following criteria:
338
    
339 340 341 342
        - title
    
    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
343 344 345 346 347 348 349 350
    
    @rtype: list
    @return: list of ids corresponding to duplicate entries
    
    """
    ids = []
    db = current.globalenv['db']
    
351
    qcat = db.categories.code == 'AP'
352
    
353 354 355 356 357
    qmain = get_where_query(db.publications)
    qmain = ((qmain) & (qcat))
    qmain = ((qmain) & (db.publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (db.publications.title == publication['title']))

358
    if 'id' in publication and publication['id']:
359 360
        qmain = ((qmain) & (db.publications.id != publication['id']))
    
361
    extend_ids(db, qmain, ids)        
LE GAC Renaud's avatar
LE GAC Renaud committed
362
            
363
    return ids