# -*- coding: utf-8 -*- """a collection of tools to check rows. @author: R. Le Gac """ import re import regex from gluon import current from plugin_dbui import (UNDEF, UNDEF_ID, get_id, get_where_query) # syntax for the submission date YYYY-MM or YYYY-MM-DD REG_SUBMITTED = re.compile(regex.REG_SUBMITTED) # HTML code like > REG_HTML = re.compile('&[a-z]+;') def check_publication(row): """Check the publication fields. @type row: gluon.dal.Row @param row: record defining a publication. Its contains the publications table as well as its reference tables. @rtype: tuple @return: - the first element contains the list of message - the second one contains the list of duplicate ids. """ T, li, idset = current.T, [], set() # status code if row.status.code == '???': text = T("The status is ???") li.append(text) # category if row.categories.code == UNDEF: text = T("The category is undefined") li.append(text) # team if row.publications.id_teams == UNDEF_ID: text = T("The team is undefined") li.append(text) # project if row.publications.id_projects == UNDEF_ID: text = T("The project is undefined") li.append(text) # authors list if 'et al' in row.publications.authors: text = T("'et al.' in authors") li.append(text) # CPPM authors (team name, ...) if row.teams.team in row.publications.authors_institute: text = T("The institute authors contains the team name?") li.append(text) # submitted date if not row.publications.submitted: text = T("Submitted date is not defined") li.append(text) if row.publications.submitted: if not REG_SUBMITTED.match(row.publications.submitted): text = T("Submitted date is not valid") li.append(text) # publication URL if row.publications.publication_url: if 'pdf' not in row.publications.publication_url: text = T("Check that the publication URL corresponds to a pdf file.") li.append(text) # latex syntax title = row.publications.title rules = "√" in title or \ ("^" in title and "$" not in title) or \ ("→" in title and "$" not in title) or \ ("->" in title) or \ ("s**(1/2)" in title) or \ REG_HTML.search(title) if rules: text = T("Check latex syntax in the title") li.append(text) # "Note :" in report number value = row.publications.report_numbers rules = "Note :" in value or \ "Note:" in value or \ ";" in value if rules: text = T('Report numbers contains "Note :" or ";"') li.append(text) # duplicate by origin ids = duplicate_origin(row.publications) if len(ids): idset = idset.union(ids) text = T("Entries with duplicate origin") li.append(text) # specific fields for article if row.categories.usual == 'article': if row.publications.id_publishers == UNDEF_ID: text = T("Publishers is not defined") li.append(text) if not row.publications.volume: text = T("Volume number is not defined") li.append(text) if not row.publications.pages: text = T("Pages range is not defined") li.append(text) if not row.publications.preprint: text = T("Preprint number is not defined") li.append(text) ids = duplicate_article(row.publications) if ids: idset = idset.union(ids) text = T("Possible duplicate entries") li.append(text) # specific fields for proceeding and talk if row.categories.usual in ('proceeding', 'talk'): if not row.publications.conference_title: text = T("Conference title is not defined") li.append(text) if not row.publications.conference_dates: text = T("Conference dates is not defined") li.append(text) if not row.publications.conference_town: text = T("Conference town is not defined") li.append(text) if not row.publications.id_countries: text = T("Conference country is not defined") li.append(text) if not row.publications.conference_speaker: text = T("Conference speaker is missing") li.append(text) ids = duplicate_conference(row.publications) if ids: idset = idset.union(ids) text = T("Possible duplicate entries") li.append(text) # specific fields for report if row.categories.usual == 'report': if not row.publications.report_numbers: text = T("Report number is missing") li.append(text) ids = duplicate_report(row.publications) if ids: idset = idset.union(ids) text = T("Possible duplicate entries") li.append(text) return (li, list(idset)) def extend_ids(db, query, ids): """helper functions @type db: gluon.dal.DAL @param db: @type query: gluon.dal.query @param query: @type ids: list of string @param ids: the current list of ids @note: the current list of publication ids will be extend by those corresponding to the C{query}. The id are unique in the list. """ set = db(query) if set.count(): for row in set.select(): id = str(row.publications.id) if id not in ids: ids.append(id) def duplicate_article(publication): """Look for duplicate article. The comparison is performed on article published by the given team using the following criteria: - title, publishers, volume and pages - publisher, volume and pages - publisher and title @type publication: dict or gluon.storage.Storage @param publication: contains the publication fields and theirs values @rtype: list @return: list of ids corresponding to duplicate entries """ ids = [] db = current.globalenv['db'] qcat = (db.categories.code == 'ACL') | (db.categories.code == 'ACLN') qmain = get_where_query(db.publications) qmain = ((qmain) & (qcat)) qmain = ((qmain) & (db.publications.id_teams == publication['id_teams'])) qmain = ((qmain) & (db.publications.id_publishers == publication['id_publishers'])) if 'id' in publication and publication['id']: qmain = ((qmain) & (db.publications.id != publication['id'])) # title, publishers, volume and pages query = ((qmain) & (db.publications.title == publication['title'])) query = ((query) & (db.publications.volume == publication['volume'])) query = ((query) & (db.publications.pages == publication['pages'])) extend_ids(db, query, ids) # publisher, volume and pages query = ((qmain) & (db.publications.volume == publication['volume'])) query = ((query) & (db.publications.pages == publication['pages'])) extend_ids(db, query, ids) # publisher and title query = ((qmain) & (db.publications.title == publication['title'])) extend_ids(db, query, ids) return ids def duplicate_conference(publication): """Look for duplicate talk / proceeding. The comparison is performed on conference talk/proceeding published by the given team using the following criteria: - title, conference title, conference date and conference town - title, conference date and conference town - title, conference title and conference town @type publication: dict or gluon.storage.Storage @param publication: contains the publication fields and theirs values @rtype: list @return: list of ids corresponding to duplicate entries """ ids = [] db = current.globalenv['db'] qcat = (db.categories.code == 'ACTI') | \ (db.categories.code == 'ACTN') | \ (db.categories.code == 'COM') qmain = get_where_query(db.publications) qmain = ((qmain) & (qcat)) qmain = ((qmain) & (db.publications.id_teams == publication['id_teams'])) qmain = ((qmain) & (db.publications.title == publication['title'])) if 'id' in publication and publication['id']: qmain = ((qmain) & (db.publications.id != publication['id'])) # title, conference title, conference date and conference town query = ((qmain) & (db.publications.conference_title == publication['conference_title'])) query = ((query) & (db.publications.conference_dates == publication['conference_dates'])) query = ((query) & (db.publications.conference_town == publication['conference_town'])) extend_ids(db, query, ids) # title, conference date and conference town query = ((query) & (db.publications.conference_dates == publication['conference_dates'])) query = ((query) & (db.publications.conference_town == publication['conference_town'])) extend_ids(db, query, ids) # title, conference title and conference town query = ((qmain) & (db.publications.conference_title == publication['conference_title'])) query = ((query) & (db.publications.conference_town == publication['conference_town'])) extend_ids(db, query, ids) return ids def duplicate_origin(publication): """Look for publications with the same value in the origin field. @type publication: dict or gluon.storage.Storage @param publication: contains the publication fields and theirs values @rtype: list @return: list of ids corresponding to duplicate entries """ ids = [] db = current.globalenv['db'] # protection against empty origin field if not publication['origin']: return ids # look for publication with the same origin field query = db.publications.id != publication['id'] query = ((query) & (db.publications.origin == publication['origin'])) set = db(query) if set.count(): for row in set.select(): ids.append(str(row.id)) return ids def duplicate_report(publication): """Look for duplicate report. The comparison is performed on report published by the given team using the following criteria: - title @type publication: dict or gluon.storage.Storage @param publication: contains the publication fields and theirs values @rtype: list @return: list of ids corresponding to duplicate entries """ ids = [] db = current.globalenv['db'] qcat = db.categories.code == 'AP' qmain = get_where_query(db.publications) qmain = ((qmain) & (qcat)) qmain = ((qmain) & (db.publications.id_teams == publication['id_teams'])) qmain = ((qmain) & (db.publications.title == publication['title'])) if 'id' in publication and publication['id']: qmain = ((qmain) & (db.publications.id != publication['id'])) extend_ids(db, qmain, ids) return ids