check_tools.py 9.62 KB
Newer Older
LE GAC Renaud's avatar
LE GAC Renaud committed
1 2 3 4 5 6 7 8 9
# -*- coding: utf-8 -*-
"""a collection of tools to check rows.

@author: R. Le Gac

"""
import re

from gluon import current
10 11 12 13
from plugin_dbui import (UNDEF, 
                         UNDEF_ID, 
                         get_id, 
                         get_where_query)
LE GAC Renaud's avatar
LE GAC Renaud committed
14 15 16 17 18 19 20 21 22 23


# syntax for the submission date YYYY or YYYY-MM or YYYY-MM-DD
REG_SUBMITTED = re.compile('^\d{4}(-\d{2})?(-\d{2})?$')

# HTML code like > 
REG_HTML = re.compile('&[a-z]+;')


def check_publication(row):
24
    """Check the publication fields.
LE GAC Renaud's avatar
LE GAC Renaud committed
25 26
    
    @type row: gluon.dal.Row
27
    @param row: record defining a publication. Its contains the publications
LE GAC Renaud's avatar
LE GAC Renaud committed
28 29
    table as well as its reference tables.
    
30 31 32 33
    @rtype: tuple
    @return: 
        - the first element contains the list of message
        - the second one contains the list of duplicate ids.
LE GAC Renaud's avatar
LE GAC Renaud committed
34 35
    
    """
36
    T, li, ids = current.T, [], []
LE GAC Renaud's avatar
LE GAC Renaud committed
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
    
    # status code
    if row.status.code == '???':
        text = T("The status is ???")
        li.append(text)

    # category
    if row.categories.code == UNDEF:
        text = T("The category is undefined")
        li.append(text)
           
    # authors list    
    if 'et al' in row.publications.authors:
        text = T("'et al.' in authors")
        li.append(text)

    # CPPM authors (team name, ...)
    if row.teams.team in row.publications.authors_cppm:
        text = T("The cppm authors contains the team name?")
        li.append(text)
        
    # submitted date
    if not row.publications.submitted:                    
        text = T("Submitted date is not defined")
        li.append(text)

    if row.publications.submitted:                    
        if not REG_SUBMITTED.match(row.publications.submitted):                    
            text = T("Submitted date is not valid")
            li.append(text)
    
    # latex syntax
    title = row.publications.title
    rules = "√" in title or \
            ("^" in title and "$" not in title) or \
            ("→" in title and "$" not in title) or \
            REG_HTML.search(title)
             
    if rules:
        text = T("Check latex syntax in the title")
        li.append(text)
LE GAC Renaud's avatar
LE GAC Renaud committed
78 79 80 81 82 83 84 85 86 87 88
    
    # "Note :" in report number
    value = row.publications.report_numbers
    rules = "Note :" in value or \
            "Note:" in value or \
            ";" in value
            
    if rules:
        text = T('Report numbers contains "Note :" or ";"')
        li.append(text)
        
LE GAC Renaud's avatar
LE GAC Renaud committed
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
    # specific fields for article
    if row.categories.usual == 'article':
        
        if row.publications.id_publishers == UNDEF_ID:
            text = T("Publishers is not defined")
            li.append(text)
            
        if not row.publications.volume:
            text = T("Volume number is not defined")
            li.append(text)
            
        if not row.publications.pages:
            text = T("Pages range is not defined")
            li.append(text)

        if not row.publications.preprint:
            text = T("Preprint number is not defined")
            li.append(text)
107

108
        ids = duplicate_article(row.publications)
109
        if ids:
110
            text = T("Possible duplicate entries [%s]") % ', '.join(ids)
111 112
            li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
    # specific fields for proceeding and talk
    if row.categories.usual in ('proceeding', 'talk'):

        if not row.publications.conference_title:
            text = T("Conference title is not defined")
            li.append(text)

        if not row.publications.conference_dates:
            text = T("Conference dates is not defined")
            li.append(text)

        if not row.publications.conference_town:
            text = T("Conference town is not defined")
            li.append(text)

        if not row.publications.id_countries:
            text = T("Conference country is not defined")
            li.append(text)

        if not row.publications.conference_speaker:
            text = T("Conference speaker is missing")
            li.append(text)
135

136
        ids = duplicate_conference(row.publications)
137
        if ids:
138
            text = T("Possible duplicate entries [%s]") % ', '.join(ids)
139
            li.append(text)
LE GAC Renaud's avatar
LE GAC Renaud committed
140 141 142 143 144 145 146
    
    # specific fields for report
    if row.categories.usual == 'report':
        
        if not row.publications.report_numbers:
            text = T("Report number is missing")
            li.append(text)
147

148
        ids = duplicate_report(row.publications)
149
        if ids:
150
            text = T("Possible duplicate entries [%s]") % ', '.join(ids)
151 152
            li.append(text)
            
153
    return (li, ids)
154 155


156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
def _extend_ids(db, query, ids):
    """helper functions
    
    @type db: gluon.dal.DAL
    @param db:
    
    @type query: gluon.dal.query
    @param query: 
    
    @type ids: list of string
    @param ids: the current list of ids
    
    @note: the current list of publication ids will be extend by those
    corresponding to the C{query}. The id are unique in the list.
    
    """
    set = db(query)
    if set.count():
        for row in set.select():
            id = str(row.publications.id)
            if id not in ids:
                ids.append(id)

    
def duplicate_article(publication):
181
    """Look for duplicate article.
182 183
    The comparison is performed on article published by the given team
    using the following criteria:
184
    
185 186 187 188 189 190
        - title, publishers, volume and pages
        - publisher, volume and pages
        - publisher and title
        
    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
191 192 193 194 195 196 197 198
    
    @rtype: list
    @return: list of ids corresponding to duplicate entries
    
    """
    ids = []
    db = current.globalenv['db']
    
199 200
    qcat = (db.categories.code == 'ACL') | (db.categories.code == 'ACLN')
    
201
    qmain = get_where_query(db.publications)
202 203 204
    qmain = ((qmain) & (qcat))
    qmain = ((qmain) & (db.publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (db.publications.id_publishers == publication['id_publishers']))
205
    
206 207
    if 'id' in publication:
        qmain = ((qmain) & (db.publications.id != publication['id']))
208
    
209 210 211 212 213
    # title, publishers, volume and pages
    query = ((qmain) & (db.publications.title == publication['title']))
    query = ((query) & (db.publications.volume == publication['volume']))
    query = ((query) & (db.publications.pages == publication['pages']))
    _extend_ids(db, query, ids)
214

215 216 217 218 219 220 221 222
    # publisher, volume and pages
    query = ((qmain) & (db.publications.volume == publication['volume']))
    query = ((query) & (db.publications.pages == publication['pages']))
    _extend_ids(db, query, ids)
    
    # publisher and title
    query = ((qmain) & (db.publications.title == publication['title']))
    _extend_ids(db, query, ids)
223 224 225 226

    return ids


227
def duplicate_conference(publication):
228
    """Look for duplicate talk / proceeding.
229 230
    The comparison is performed on conference talk/proceeding published 
    by the given team using the following criteria:
231
    
232 233 234 235 236 237
        - title, conference title, conference date and conference town
        - title, conference date and conference town
        - title, conference title and conference town
    
    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
238 239 240 241 242 243 244 245
    
    @rtype: list
    @return: list of ids corresponding to duplicate entries
    
    """
    ids = []
    db = current.globalenv['db']
    
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
    qcat = (db.categories.code == 'ACTI') | \
           (db.categories.code == 'ACTN') | \
           (db.categories.code == 'COM')
    
    qmain = get_where_query(db.publications)
    qmain = ((qmain) & (qcat))
    qmain = ((qmain) & (db.publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (db.publications.title == publication['title']))
    
    if 'id' in publication:
        qmain = ((qmain) & (db.publications.id != publication['id']))
    
    # title, conference title, conference date and conference town
    query = ((qmain) & (db.publications.conference_title == publication['conference_title']))
    query = ((query) & (db.publications.conference_dates == publication['conference_dates']))
    query = ((query) & (db.publications.conference_town == publication['conference_town']))
    _extend_ids(db, query, ids)        

    # title, conference date and conference town
    query = ((query) & (db.publications.conference_dates == publication['conference_dates']))
    query = ((query) & (db.publications.conference_town == publication['conference_town']))
    _extend_ids(db, query, ids)        
    
    # title, conference title and conference town
    query = ((qmain) & (db.publications.conference_title == publication['conference_title']))
    query = ((query) & (db.publications.conference_town == publication['conference_town']))
    _extend_ids(db, query, ids)        
273 274 275 276
    
    return ids


277
def duplicate_report(publication):
278
    """Look for duplicate report.
279 280
    The comparison is performed on report published by the given team 
    using the following criteria:
281
    
282 283 284 285
        - title
    
    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
286 287 288 289 290 291 292 293
    
    @rtype: list
    @return: list of ids corresponding to duplicate entries
    
    """
    ids = []
    db = current.globalenv['db']
    
294
    qcat = db.categories.code == 'AP'
295
    
296 297 298 299 300 301 302 303 304
    qmain = get_where_query(db.publications)
    qmain = ((qmain) & (qcat))
    qmain = ((qmain) & (db.publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (db.publications.title == publication['title']))

    if 'id' in publication:
        qmain = ((qmain) & (db.publications.id != publication['id']))
    
    _extend_ids(db, qmain, ids)        
LE GAC Renaud's avatar
LE GAC Renaud committed
305
            
306 307
    return ids