check_tools.py 9.34 KB
Newer Older
LE GAC Renaud's avatar
LE GAC Renaud committed
1 2 3 4 5 6 7 8 9
# -*- coding: utf-8 -*-
"""a collection of tools to check rows.

@author: R. Le Gac

"""
import re

from gluon import current
10 11 12 13
from plugin_dbui import (UNDEF, 
                         UNDEF_ID, 
                         get_id, 
                         get_where_query)
LE GAC Renaud's avatar
LE GAC Renaud committed
14 15 16 17 18 19 20 21 22 23


# syntax for the submission date YYYY or YYYY-MM or YYYY-MM-DD
REG_SUBMITTED = re.compile('^\d{4}(-\d{2})?(-\d{2})?$')

# HTML code like > 
REG_HTML = re.compile('&[a-z]+;')


def check_publication(row):
24
    """Check the publication fields.
LE GAC Renaud's avatar
LE GAC Renaud committed
25 26
    
    @type row: gluon.dal.Row
27
    @param row: record defining a publication. Its contains the publications
LE GAC Renaud's avatar
LE GAC Renaud committed
28 29
    table as well as its reference tables.
    
30 31 32 33
    @rtype: tuple
    @return: 
        - the first element contains the list of message
        - the second one contains the list of duplicate ids.
LE GAC Renaud's avatar
LE GAC Renaud committed
34 35
    
    """
36
    T, li, ids = current.T, [], []
LE GAC Renaud's avatar
LE GAC Renaud committed
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
    
    # status code
    if row.status.code == '???':
        text = T("The status is ???")
        li.append(text)

    # category
    if row.categories.code == UNDEF:
        text = T("The category is undefined")
        li.append(text)
           
    # authors list    
    if 'et al' in row.publications.authors:
        text = T("'et al.' in authors")
        li.append(text)

    # CPPM authors (team name, ...)
    if row.teams.team in row.publications.authors_cppm:
        text = T("The cppm authors contains the team name?")
        li.append(text)
        
    # submitted date
    if not row.publications.submitted:                    
        text = T("Submitted date is not defined")
        li.append(text)

    if row.publications.submitted:                    
        if not REG_SUBMITTED.match(row.publications.submitted):                    
            text = T("Submitted date is not valid")
            li.append(text)
    
    # latex syntax
    title = row.publications.title
    rules = "√" in title or \
            ("^" in title and "$" not in title) or \
            ("→" in title and "$" not in title) or \
            REG_HTML.search(title)
             
    if rules:
        text = T("Check latex syntax in the title")
        li.append(text)
         
    # specific fields for article
    if row.categories.usual == 'article':
        
        if row.publications.id_publishers == UNDEF_ID:
            text = T("Publishers is not defined")
            li.append(text)
            
        if not row.publications.volume:
            text = T("Volume number is not defined")
            li.append(text)
            
        if not row.publications.pages:
            text = T("Pages range is not defined")
            li.append(text)

        if not row.publications.preprint:
            text = T("Preprint number is not defined")
            li.append(text)
97

98
        ids = duplicate_article(row.publications)
99
        if ids:
100
            text = T("Possible duplicate entries [%s]") % ', '.join(ids)
101 102
            li.append(text)

LE GAC Renaud's avatar
LE GAC Renaud committed
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
    # specific fields for proceeding and talk
    if row.categories.usual in ('proceeding', 'talk'):

        if not row.publications.conference_title:
            text = T("Conference title is not defined")
            li.append(text)

        if not row.publications.conference_dates:
            text = T("Conference dates is not defined")
            li.append(text)

        if not row.publications.conference_town:
            text = T("Conference town is not defined")
            li.append(text)

        if not row.publications.id_countries:
            text = T("Conference country is not defined")
            li.append(text)

        if not row.publications.conference_speaker:
            text = T("Conference speaker is missing")
            li.append(text)
125

126
        ids = duplicate_conference(row.publications)
127
        if ids:
128
            text = T("Possible duplicate entries [%s]") % ', '.join(ids)
129
            li.append(text)
LE GAC Renaud's avatar
LE GAC Renaud committed
130 131 132 133 134 135 136
    
    # specific fields for report
    if row.categories.usual == 'report':
        
        if not row.publications.report_numbers:
            text = T("Report number is missing")
            li.append(text)
137

138
        ids = duplicate_report(row.publications)
139
        if ids:
140
            text = T("Possible duplicate entries [%s]") % ', '.join(ids)
141 142
            li.append(text)
            
143
    return (li, ids)
144 145


146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
def _extend_ids(db, query, ids):
    """helper functions
    
    @type db: gluon.dal.DAL
    @param db:
    
    @type query: gluon.dal.query
    @param query: 
    
    @type ids: list of string
    @param ids: the current list of ids
    
    @note: the current list of publication ids will be extend by those
    corresponding to the C{query}. The id are unique in the list.
    
    """
    set = db(query)
    if set.count():
        for row in set.select():
            id = str(row.publications.id)
            if id not in ids:
                ids.append(id)

    
def duplicate_article(publication):
171
    """Look for duplicate article.
172 173
    The comparison is performed on article published by the given team
    using the following criteria:
174
    
175 176 177 178 179 180
        - title, publishers, volume and pages
        - publisher, volume and pages
        - publisher and title
        
    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
181 182 183 184 185 186 187 188
    
    @rtype: list
    @return: list of ids corresponding to duplicate entries
    
    """
    ids = []
    db = current.globalenv['db']
    
189 190
    qcat = (db.categories.code == 'ACL') | (db.categories.code == 'ACLN')
    
191
    qmain = get_where_query(db.publications)
192 193 194
    qmain = ((qmain) & (qcat))
    qmain = ((qmain) & (db.publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (db.publications.id_publishers == publication['id_publishers']))
195
    
196 197
    if 'id' in publication:
        qmain = ((qmain) & (db.publications.id != publication['id']))
198
    
199 200 201 202 203
    # title, publishers, volume and pages
    query = ((qmain) & (db.publications.title == publication['title']))
    query = ((query) & (db.publications.volume == publication['volume']))
    query = ((query) & (db.publications.pages == publication['pages']))
    _extend_ids(db, query, ids)
204

205 206 207 208 209 210 211 212
    # publisher, volume and pages
    query = ((qmain) & (db.publications.volume == publication['volume']))
    query = ((query) & (db.publications.pages == publication['pages']))
    _extend_ids(db, query, ids)
    
    # publisher and title
    query = ((qmain) & (db.publications.title == publication['title']))
    _extend_ids(db, query, ids)
213 214 215 216

    return ids


217
def duplicate_conference(publication):
218
    """Look for duplicate talk / proceeding.
219 220
    The comparison is performed on conference talk/proceeding published 
    by the given team using the following criteria:
221
    
222 223 224 225 226 227
        - title, conference title, conference date and conference town
        - title, conference date and conference town
        - title, conference title and conference town
    
    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
228 229 230 231 232 233 234 235
    
    @rtype: list
    @return: list of ids corresponding to duplicate entries
    
    """
    ids = []
    db = current.globalenv['db']
    
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
    qcat = (db.categories.code == 'ACTI') | \
           (db.categories.code == 'ACTN') | \
           (db.categories.code == 'COM')
    
    qmain = get_where_query(db.publications)
    qmain = ((qmain) & (qcat))
    qmain = ((qmain) & (db.publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (db.publications.title == publication['title']))
    
    if 'id' in publication:
        qmain = ((qmain) & (db.publications.id != publication['id']))
    
    # title, conference title, conference date and conference town
    query = ((qmain) & (db.publications.conference_title == publication['conference_title']))
    query = ((query) & (db.publications.conference_dates == publication['conference_dates']))
    query = ((query) & (db.publications.conference_town == publication['conference_town']))
    _extend_ids(db, query, ids)        

    # title, conference date and conference town
    query = ((query) & (db.publications.conference_dates == publication['conference_dates']))
    query = ((query) & (db.publications.conference_town == publication['conference_town']))
    _extend_ids(db, query, ids)        
    
    # title, conference title and conference town
    query = ((qmain) & (db.publications.conference_title == publication['conference_title']))
    query = ((query) & (db.publications.conference_town == publication['conference_town']))
    _extend_ids(db, query, ids)        
263 264 265 266
    
    return ids


267
def duplicate_report(publication):
268
    """Look for duplicate report.
269 270
    The comparison is performed on report published by the given team 
    using the following criteria:
271
    
272 273 274 275
        - title
    
    @type publication: dict or gluon.storage.Storage
    @param publication: contains the publication fields and theirs values
276 277 278 279 280 281 282 283
    
    @rtype: list
    @return: list of ids corresponding to duplicate entries
    
    """
    ids = []
    db = current.globalenv['db']
    
284
    qcat = db.categories.code == 'AP'
285
    
286 287 288 289 290 291 292 293 294
    qmain = get_where_query(db.publications)
    qmain = ((qmain) & (qcat))
    qmain = ((qmain) & (db.publications.id_teams == publication['id_teams']))
    qmain = ((qmain) & (db.publications.title == publication['title']))

    if 'id' in publication:
        qmain = ((qmain) & (db.publications.id != publication['id']))
    
    _extend_ids(db, qmain, ids)        
LE GAC Renaud's avatar
LE GAC Renaud committed
295
            
296 297
    return ids