check_tools.py 10.6 KB
Newer Older
LE GAC Renaud's avatar
LE GAC Renaud committed
1 2 3 4 5 6 7 8 9
# -*- coding: utf-8 -*-
"""a collection of tools to check rows.

@author: R. Le Gac

"""
import re

from gluon import current
10 11 12 13
from plugin_dbui import (UNDEF, 
                         UNDEF_ID, 
                         get_id, 
                         get_where_query)
LE GAC Renaud's avatar
LE GAC Renaud committed
14 15 16 17 18 19 20 21 22 23


# syntax for the submission date YYYY or YYYY-MM or YYYY-MM-DD
REG_SUBMITTED = re.compile('^\d{4}(-\d{2})?(-\d{2})?$')

# HTML code like > 
REG_HTML = re.compile('&[a-z]+;')


def check_publication(row):
24
    """Check the publication fields.
LE GAC Renaud's avatar
LE GAC Renaud committed
25 26
    
    @type row: gluon.dal.Row
27
    @param row: record defining a publication. Its contains the publications
LE GAC Renaud's avatar
LE GAC Renaud committed
28 29
    table as well as its reference tables.
    
30 31 32 33
    @rtype: tuple
    @return: 
        - the first element contains the list of message
        - the second one contains the list of duplicate ids.
LE GAC Renaud's avatar
LE GAC Renaud committed
34 35
    
    """
36
    T, li, idset = current.T, [], set()
LE GAC Renaud's avatar
LE GAC Renaud committed
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
    
    # status code
    if row.status.code == '???':
        text = T("The status is ???")
        li.append(text)

    # category
    if row.categories.code == UNDEF:
        text = T("The category is undefined")
        li.append(text)
           
    # authors list    
    if 'et al' in row.publications.authors:
        text = T("'et al.' in authors")
        li.append(text)

    # CPPM authors (team name, ...)
    if row.teams.team in row.publications.authors_cppm:
        text = T("The cppm authors contains the team name?")
        li.append(text)
        
    # submitted date
    if not row.publications.submitted:                    
        text = T("Submitted date is not defined")
        li.append(text)

    if row.publications.submitted:                    
        if not REG_SUBMITTED.match(row.publications.submitted):                    
            text = T("Submitted date is not valid")
            li.append(text)
    
    # latex syntax
    title = row.publications.title
    rules = "√" in title or \
            ("^" in title and "$" not in title) or \
            ("→" in title and "$" not in title) or \
73 74
            ("->" in title) or \
            ("s**(1/2)" in title) or \
LE GAC Renaud's avatar
LE GAC Renaud committed
75 76 77 78 79
            REG_HTML.search(title)
             
    if rules:
        text = T("Check latex syntax in the title")
        li.append(text)
LE GAC Renaud's avatar
LE GAC Renaud committed
80 81 82 83 84 85 86 87 88 89 90
    
    # "Note :" in report number
    value = row.publications.report_numbers
    rules = "Note :" in value or \
            "Note:" in value or \
            ";" in value
            
    if rules:
        text = T('Report numbers contains "Note :" or ";"')
        li.append(text)
        
91 92 93
    # duplicate by origin
    ids = duplicate_origin(row.publications)
    if len(ids) > 1:
94 95
        idset = idset.union(ids)
        text = T("Entries with duplicate origin")
96 97
        li.append(text)
    
LE GAC Renaud's avatar
LE GAC Renaud committed
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
    # specific fields for article
    if row.categories.usual == 'article':
        
        if row.publications.id_publishers == UNDEF_ID:
            text = T("Publishers is not defined")
            li.append(text)
            
        if not row.publications.volume:
            text = T("Volume number is not defined")
            li.append(text)
            
        if not row.publications.pages:
            text = T("Pages range is not defined")
            li.append(text)

        if not row.publications.preprint:
            text = T("Preprint number is not defined")
            li.append(text)
116

117
        ids = duplicate_article(row.publications)
118
        if ids:
119 120
            idset = idset.union(ids)
            text = T("Possible duplicate entries")
121 122
            li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
    # specific fields for proceeding and talk
    if row.categories.usual in ('proceeding', 'talk'):

        if not row.publications.conference_title:
            text = T("Conference title is not defined")
            li.append(text)

        if not row.publications.conference_dates:
            text = T("Conference dates is not defined")
            li.append(text)

        if not row.publications.conference_town:
            text = T("Conference town is not defined")
            li.append(text)

        if not row.publications.id_countries:
            text = T("Conference country is not defined")
            li.append(text)

        if not row.publications.conference_speaker:
            text = T("Conference speaker is missing")
            li.append(text)
145

146
        ids = duplicate_conference(row.publications)
147
        if ids:
148 149
            idset = idset.union(ids)
            text = T("Possible duplicate entries")
150
            li.append(text)
LE GAC Renaud's avatar
LE GAC Renaud committed
151 152 153 154 155 156 157
    
    # specific fields for report
    if row.categories.usual == 'report':
        
        if not row.publications.report_numbers:
            text = T("Report number is missing")
            li.append(text)
158

159
        ids = duplicate_report(row.publications)
160
        if ids:
161 162
            idset = idset.union(ids)
            text = T("Possible duplicate entries")
163
            li.append(text)
164
    
165
    return (li, list(idset))
166 167


168
def extend_ids(db, query, ids):
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
    """helper functions
    
    @type db: gluon.dal.DAL
    @param db:
    
    @type query: gluon.dal.query
    @param query: 
    
    @type ids: list of string
    @param ids: the current list of ids
    
    @note: the current list of publication ids will be extend by those
    corresponding to the C{query}. The id are unique in the list.
    
    """
    set = db(query)
    if set.count():
        for row in set.select():
            id = str(row.publications.id)
            if id not in ids:
                ids.append(id)

    
def duplicate_article(publication):
193
    """Look for duplicate article.
194 195
    The comparison is performed on article published by the given team
    using the following criteria:
196
    
197 198 199 200 201 202
        - title, publishers, volume and pages
        - publisher, volume and pages
        - publisher and title
        
    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
203 204 205 206 207 208 209 210
    
    @rtype: list
    @return: list of ids corresponding to duplicate entries
    
    """
    ids = []
    db = current.globalenv['db']
    
211 212
    qcat = (db.categories.code == 'ACL') | (db.categories.code == 'ACLN')
    
213
    qmain = get_where_query(db.publications)
214 215 216
    qmain = ((qmain) & (qcat))
    qmain = ((qmain) & (db.publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (db.publications.id_publishers == publication['id_publishers']))
217
    
218 219
    if 'id' in publication:
        qmain = ((qmain) & (db.publications.id != publication['id']))
220
    
221 222 223 224
    # title, publishers, volume and pages
    query = ((qmain) & (db.publications.title == publication['title']))
    query = ((query) & (db.publications.volume == publication['volume']))
    query = ((query) & (db.publications.pages == publication['pages']))
225
    extend_ids(db, query, ids)
226

227 228 229
    # publisher, volume and pages
    query = ((qmain) & (db.publications.volume == publication['volume']))
    query = ((query) & (db.publications.pages == publication['pages']))
230
    extend_ids(db, query, ids)
231 232 233
    
    # publisher and title
    query = ((qmain) & (db.publications.title == publication['title']))
234
    extend_ids(db, query, ids)
235 236 237 238

    return ids


239
def duplicate_conference(publication):
240
    """Look for duplicate talk / proceeding.
241 242
    The comparison is performed on conference talk/proceeding published 
    by the given team using the following criteria:
243
    
244 245 246 247 248 249
        - title, conference title, conference date and conference town
        - title, conference date and conference town
        - title, conference title and conference town
    
    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
250 251 252 253 254 255 256 257
    
    @rtype: list
    @return: list of ids corresponding to duplicate entries
    
    """
    ids = []
    db = current.globalenv['db']
    
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
    qcat = (db.categories.code == 'ACTI') | \
           (db.categories.code == 'ACTN') | \
           (db.categories.code == 'COM')
    
    qmain = get_where_query(db.publications)
    qmain = ((qmain) & (qcat))
    qmain = ((qmain) & (db.publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (db.publications.title == publication['title']))
    
    if 'id' in publication:
        qmain = ((qmain) & (db.publications.id != publication['id']))
    
    # title, conference title, conference date and conference town
    query = ((qmain) & (db.publications.conference_title == publication['conference_title']))
    query = ((query) & (db.publications.conference_dates == publication['conference_dates']))
    query = ((query) & (db.publications.conference_town == publication['conference_town']))
274
    extend_ids(db, query, ids)        
275 276 277 278

    # title, conference date and conference town
    query = ((query) & (db.publications.conference_dates == publication['conference_dates']))
    query = ((query) & (db.publications.conference_town == publication['conference_town']))
279
    extend_ids(db, query, ids)        
280 281 282 283
    
    # title, conference title and conference town
    query = ((qmain) & (db.publications.conference_title == publication['conference_title']))
    query = ((query) & (db.publications.conference_town == publication['conference_town']))
284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
    extend_ids(db, query, ids)        
    
    return ids


def duplicate_origin(publication):
    """Look for publications with the same value in the origin field.

    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
    
    @rtype: list
    @return: list of ids corresponding to duplicate entries
    
    """
    ids = []
    db = current.globalenv['db']

    # protection against empty origin field
    if not publication['origin']:
        return ids

    # look for publication with the same origin field
    query = db.publications.origin == publication['origin']
    set = db(query)
    if set.count():
        for row in set.select():
            ids.append(str(row.id))
312 313 314 315
    
    return ids


316
def duplicate_report(publication):
317
    """Look for duplicate report.
318 319
    The comparison is performed on report published by the given team 
    using the following criteria:
320
    
321 322 323 324
        - title
    
    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
325 326 327 328 329 330 331 332
    
    @rtype: list
    @return: list of ids corresponding to duplicate entries
    
    """
    ids = []
    db = current.globalenv['db']
    
333
    qcat = db.categories.code == 'AP'
334
    
335 336 337 338 339 340 341 342
    qmain = get_where_query(db.publications)
    qmain = ((qmain) & (qcat))
    qmain = ((qmain) & (db.publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (db.publications.title == publication['title']))

    if 'id' in publication:
        qmain = ((qmain) & (db.publications.id != publication['id']))
    
343
    extend_ids(db, qmain, ids)        
LE GAC Renaud's avatar
LE GAC Renaud committed
344
            
345
    return ids