check_tools.py 10.5 KB
Newer Older
LE GAC Renaud's avatar
LE GAC Renaud committed
1 2 3 4 5 6 7 8 9
# -*- coding: utf-8 -*-
"""a collection of tools to check rows.

@author: R. Le Gac

"""
import re

from gluon import current
10 11 12 13
from plugin_dbui import (UNDEF, 
                         UNDEF_ID, 
                         get_id, 
                         get_where_query)
LE GAC Renaud's avatar
LE GAC Renaud committed
14 15 16 17 18 19 20 21 22 23


# syntax for the submission date YYYY or YYYY-MM or YYYY-MM-DD
REG_SUBMITTED = re.compile('^\d{4}(-\d{2})?(-\d{2})?$')

# HTML code like > 
REG_HTML = re.compile('&[a-z]+;')


def check_publication(row):
24
    """Check the publication fields.
LE GAC Renaud's avatar
LE GAC Renaud committed
25 26
    
    @type row: gluon.dal.Row
27
    @param row: record defining a publication. Its contains the publications
LE GAC Renaud's avatar
LE GAC Renaud committed
28 29
    table as well as its reference tables.
    
30 31 32 33
    @rtype: tuple
    @return: 
        - the first element contains the list of message
        - the second one contains the list of duplicate ids.
LE GAC Renaud's avatar
LE GAC Renaud committed
34 35
    
    """
36
    T, li, ids = current.T, [], []
LE GAC Renaud's avatar
LE GAC Renaud committed
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
    
    # status code
    if row.status.code == '???':
        text = T("The status is ???")
        li.append(text)

    # category
    if row.categories.code == UNDEF:
        text = T("The category is undefined")
        li.append(text)
           
    # authors list    
    if 'et al' in row.publications.authors:
        text = T("'et al.' in authors")
        li.append(text)

    # CPPM authors (team name, ...)
    if row.teams.team in row.publications.authors_cppm:
        text = T("The cppm authors contains the team name?")
        li.append(text)
        
    # submitted date
    if not row.publications.submitted:                    
        text = T("Submitted date is not defined")
        li.append(text)

    if row.publications.submitted:                    
        if not REG_SUBMITTED.match(row.publications.submitted):                    
            text = T("Submitted date is not valid")
            li.append(text)
    
    # latex syntax
    title = row.publications.title
    rules = "√" in title or \
            ("^" in title and "$" not in title) or \
            ("→" in title and "$" not in title) or \
            REG_HTML.search(title)
             
    if rules:
        text = T("Check latex syntax in the title")
        li.append(text)
LE GAC Renaud's avatar
LE GAC Renaud committed
78 79 80 81 82 83 84 85 86 87 88
    
    # "Note :" in report number
    value = row.publications.report_numbers
    rules = "Note :" in value or \
            "Note:" in value or \
            ";" in value
            
    if rules:
        text = T('Report numbers contains "Note :" or ";"')
        li.append(text)
        
89 90 91 92 93 94
    # duplicate by origin
    ids = duplicate_origin(row.publications)
    if len(ids) > 1:
        text = T("Entries with duplicate origin [%s]") % ', '.join(ids)
        li.append(text)
    
LE GAC Renaud's avatar
LE GAC Renaud committed
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
    # specific fields for article
    if row.categories.usual == 'article':
        
        if row.publications.id_publishers == UNDEF_ID:
            text = T("Publishers is not defined")
            li.append(text)
            
        if not row.publications.volume:
            text = T("Volume number is not defined")
            li.append(text)
            
        if not row.publications.pages:
            text = T("Pages range is not defined")
            li.append(text)

        if not row.publications.preprint:
            text = T("Preprint number is not defined")
            li.append(text)
113

114
        ids = duplicate_article(row.publications)
115
        if ids:
116
            text = T("Possible duplicate entries [%s]") % ', '.join(ids)
117 118
            li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
    # specific fields for proceeding and talk
    if row.categories.usual in ('proceeding', 'talk'):

        if not row.publications.conference_title:
            text = T("Conference title is not defined")
            li.append(text)

        if not row.publications.conference_dates:
            text = T("Conference dates is not defined")
            li.append(text)

        if not row.publications.conference_town:
            text = T("Conference town is not defined")
            li.append(text)

        if not row.publications.id_countries:
            text = T("Conference country is not defined")
            li.append(text)

        if not row.publications.conference_speaker:
            text = T("Conference speaker is missing")
            li.append(text)
141

142
        ids = duplicate_conference(row.publications)
143
        if ids:
144
            text = T("Possible duplicate entries [%s]") % ', '.join(ids)
145
            li.append(text)
LE GAC Renaud's avatar
LE GAC Renaud committed
146 147 148 149 150 151 152
    
    # specific fields for report
    if row.categories.usual == 'report':
        
        if not row.publications.report_numbers:
            text = T("Report number is missing")
            li.append(text)
153

154
        ids = duplicate_report(row.publications)
155
        if ids:
156
            text = T("Possible duplicate entries [%s]") % ', '.join(ids)
157 158
            li.append(text)
            
159
    return (li, ids)
160 161


162
def extend_ids(db, query, ids):
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
    """helper functions
    
    @type db: gluon.dal.DAL
    @param db:
    
    @type query: gluon.dal.query
    @param query: 
    
    @type ids: list of string
    @param ids: the current list of ids
    
    @note: the current list of publication ids will be extend by those
    corresponding to the C{query}. The id are unique in the list.
    
    """
    set = db(query)
    if set.count():
        for row in set.select():
            id = str(row.publications.id)
            if id not in ids:
                ids.append(id)

    
def duplicate_article(publication):
187
    """Look for duplicate article.
188 189
    The comparison is performed on article published by the given team
    using the following criteria:
190
    
191 192 193 194 195 196
        - title, publishers, volume and pages
        - publisher, volume and pages
        - publisher and title
        
    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
197 198 199 200 201 202 203 204
    
    @rtype: list
    @return: list of ids corresponding to duplicate entries
    
    """
    ids = []
    db = current.globalenv['db']
    
205 206
    qcat = (db.categories.code == 'ACL') | (db.categories.code == 'ACLN')
    
207
    qmain = get_where_query(db.publications)
208 209 210
    qmain = ((qmain) & (qcat))
    qmain = ((qmain) & (db.publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (db.publications.id_publishers == publication['id_publishers']))
211
    
212 213
    if 'id' in publication:
        qmain = ((qmain) & (db.publications.id != publication['id']))
214
    
215 216 217 218
    # title, publishers, volume and pages
    query = ((qmain) & (db.publications.title == publication['title']))
    query = ((query) & (db.publications.volume == publication['volume']))
    query = ((query) & (db.publications.pages == publication['pages']))
219
    extend_ids(db, query, ids)
220

221 222 223
    # publisher, volume and pages
    query = ((qmain) & (db.publications.volume == publication['volume']))
    query = ((query) & (db.publications.pages == publication['pages']))
224
    extend_ids(db, query, ids)
225 226 227
    
    # publisher and title
    query = ((qmain) & (db.publications.title == publication['title']))
228
    extend_ids(db, query, ids)
229 230 231 232

    return ids


233
def duplicate_conference(publication):
234
    """Look for duplicate talk / proceeding.
235 236
    The comparison is performed on conference talk/proceeding published 
    by the given team using the following criteria:
237
    
238 239 240 241 242 243
        - title, conference title, conference date and conference town
        - title, conference date and conference town
        - title, conference title and conference town
    
    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
244 245 246 247 248 249 250 251
    
    @rtype: list
    @return: list of ids corresponding to duplicate entries
    
    """
    ids = []
    db = current.globalenv['db']
    
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
    qcat = (db.categories.code == 'ACTI') | \
           (db.categories.code == 'ACTN') | \
           (db.categories.code == 'COM')
    
    qmain = get_where_query(db.publications)
    qmain = ((qmain) & (qcat))
    qmain = ((qmain) & (db.publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (db.publications.title == publication['title']))
    
    if 'id' in publication:
        qmain = ((qmain) & (db.publications.id != publication['id']))
    
    # title, conference title, conference date and conference town
    query = ((qmain) & (db.publications.conference_title == publication['conference_title']))
    query = ((query) & (db.publications.conference_dates == publication['conference_dates']))
    query = ((query) & (db.publications.conference_town == publication['conference_town']))
268
    extend_ids(db, query, ids)        
269 270 271 272

    # title, conference date and conference town
    query = ((query) & (db.publications.conference_dates == publication['conference_dates']))
    query = ((query) & (db.publications.conference_town == publication['conference_town']))
273
    extend_ids(db, query, ids)        
274 275 276 277
    
    # title, conference title and conference town
    query = ((qmain) & (db.publications.conference_title == publication['conference_title']))
    query = ((query) & (db.publications.conference_town == publication['conference_town']))
278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
    extend_ids(db, query, ids)        
    
    return ids


def duplicate_origin(publication):
    """Look for publications with the same value in the origin field.

    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
    
    @rtype: list
    @return: list of ids corresponding to duplicate entries
    
    """
    ids = []
    db = current.globalenv['db']

    # protection against empty origin field
    if not publication['origin']:
        return ids

    # look for publication with the same origin field
    query = db.publications.origin == publication['origin']
    set = db(query)
    if set.count():
        for row in set.select():
            ids.append(str(row.id))
306 307 308 309
    
    return ids


310
def duplicate_report(publication):
311
    """Look for duplicate report.
312 313
    The comparison is performed on report published by the given team 
    using the following criteria:
314
    
315 316 317 318
        - title
    
    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
319 320 321 322 323 324 325 326
    
    @rtype: list
    @return: list of ids corresponding to duplicate entries
    
    """
    ids = []
    db = current.globalenv['db']
    
327
    qcat = db.categories.code == 'AP'
328
    
329 330 331 332 333 334 335 336
    qmain = get_where_query(db.publications)
    qmain = ((qmain) & (qcat))
    qmain = ((qmain) & (db.publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (db.publications.title == publication['title']))

    if 'id' in publication:
        qmain = ((qmain) & (db.publications.id != publication['id']))
    
337
    extend_ids(db, qmain, ids)        
LE GAC Renaud's avatar
LE GAC Renaud committed
338
            
339 340
    return ids