"""a collection of tools to check rows. """ import re from . import regex from gluon import current from plugin_dbui import (UNDEF, UNDEF_ID, get_where_query) # syntax for the submission date YYYY-MM or YYYY-MM-DD REG_SUBMITTED = re.compile(regex.REG_SUBMITTED) # HTML code like > REG_HTML = re.compile("&[a-z]+;") def check_publication(row): """Check the publication fields. Args: row (gluon.dal.Row): record defining a publication. Its contains the publication table as well as its foreign tables. Returns: tuple: * the first element contains the list of message * the second one contains the list of duplicate *ids*. """ T, li, idset = current.T, [], set() categories = row.categories category_code = categories.code category_usual = categories.usual publication = row.publications # status code if row.status.code == "???": text = T("The status is ???") li.append(text) # category if category_code == UNDEF: text = T("The category is undefined") li.append(text) # team if publication.id_teams == UNDEF_ID: text = T("The team is undefined") li.append(text) # project if publication.id_projects == UNDEF_ID: text = T("The project is undefined") li.append(text) # authors list if "et al" in publication.authors: text = T("'et al.' in authors") li.append(text) # CPPM authors (team name, ...) if row.teams.team in publication.authors_institute: text = T("The institute authors contains the team name?") li.append(text) # submitted date submitted = publication.submitted if not submitted: text = T("Submitted date is not defined") li.append(text) if submitted: if not REG_SUBMITTED.match(submitted): text = T("Submitted date is not valid") li.append(text) # publication URL publication_url = publication.publication_url if publication_url: if "pdf" not in publication_url: text = \ T("Check that the publication URL corresponds to a pdf file.") li.append(text) # latex syntax title = publication.title rules = "√" in title or \ ("^" in title and "$" not in title) or \ ("→" in title and "$" not in title) or \ ("->" in title) or \ ("s**(1/2)" in title) or \ REG_HTML.search(title) if rules: text = T("Check latex syntax in the title") li.append(text) # "Note :" in report number value = publication.report_numbers rules = "Note :" in value or \ "Note:" in value or \ ";" in value if rules: text = T('Report numbers contains "Note :" or ";"') li.append(text) # duplicate by origin ids = duplicate_origin(publication) if len(ids): idset = idset.union(ids) text = T("Entries with duplicate origin") li.append(text) # specific fields for article if category_code in ("ACL", "ACLN"): if publication.id_publishers == UNDEF_ID: text = T("Publishers is not defined") li.append(text) if not publication.volume: text = T("Volume number is not defined") li.append(text) if not publication.pages: text = T("Pages range is not defined") li.append(text) if not publication.preprint: text = T("Preprint number is not defined") li.append(text) ids = duplicate_article(publication) if ids: idset = idset.union(ids) text = T("Possible duplicate entries") li.append(text) # specific fields for preprint if category_code == "PRE": if not publication.preprint: text = T("Preprint number is not defined") li.append(text) # specific fields for proceeding and talk if category_usual in ("proceeding", "talk"): if not publication.conference_title: text = T("Conference title is not defined") li.append(text) if not publication.conference_dates: text = T("Conference dates is not defined") li.append(text) if not publication.conference_town: text = T("Conference town is not defined") li.append(text) if not publication.id_countries: text = T("Conference country is not defined") li.append(text) if not publication.conference_speaker: text = T("Conference speaker is missing") li.append(text) ids = duplicate_conference(publication, category_usual == "proceeding") if ids: idset = idset.union(ids) text = T("Possible duplicate entries") li.append(text) # specific fields for report if category_usual == "report": if not publication.report_numbers: text = T("Report number is missing") li.append(text) ids = duplicate_report(publication) if ids: idset = idset.union(ids) text = T("Possible duplicate entries") li.append(text) return (li, list(idset)) def extend_ids(db, query, ids): """Extend list of publication *ids* with those found by the *query*. Note: The *id* are unique in the list. Args: db (gluon.dal.DAL): database connection. query (gluon.dal.query): database query ids (list of string): the current list of *ids* """ set_records = db(query) if set_records.count(): for row in set_records.select(): id_rec = str(row.publications.id) if id_rec not in ids: ids.append(id_rec) def duplicate_article(publication): """Look for duplicate article. The comparison is performed on article published by the given team using the following criteria: * title, publishers, volume and pages * publisher, volume and pages * publisher and title Args: publication (dict or gluon.storage.Storage): contains the publication fields and theirs values. Returns: list: list of *ids* corresponding to duplicate entries. """ ids = [] db = current.db categories = db.categories publications = db.publications qcat = (categories.code == "ACL") | (categories.code == "ACLN") qpub = publications.id_publishers == publication["id_publishers"] qmain = get_where_query(publications) qmain = ((qmain) & (qcat)) qmain = ((qmain) & (publications.id_teams == publication["id_teams"])) qmain = ((qmain) & (qpub)) if "id" in publication and publication["id"]: qmain = ((qmain) & (publications.id != publication["id"])) # title, publishers, volume and pages query = ((qmain) & (publications.title == publication["title"])) query = ((query) & (publications.volume == publication["volume"])) query = ((query) & (publications.pages == publication["pages"])) extend_ids(db, query, ids) # publisher, volume, pages and year query = ((qmain) & (publications.volume == publication["volume"])) query = ((query) & (publications.pages == publication["pages"])) query = ((query) & (publications.year == publication["year"])) extend_ids(db, query, ids) # publisher and title query = ((qmain) & (publications.title == publication["title"])) extend_ids(db, query, ids) return ids def duplicate_conference(publication, proceeding=False): """Look for duplicate talk / proceeding. The comparison is performed on conference talk/proceeding published by the given team using the following criteria: * title, conference title, conference date and conference town * title, conference date and conference town * title, conference title and conference town Args: publication (dict or gluon.storage.Storage): contains the publication fields and theirs values. proceeding (bool): tag the publication either as talk or a proceeding. Returns: list: list of *ids* corresponding to duplicate entries. """ ids = [] db = current.db categories = db.categories publications = db.publications if proceeding: qcat = (categories.code == "ACTI") | (categories.code == "ACTN") else: qcat = categories.code == "COM" qmain = get_where_query(publications) qmain &= qcat qmain &= publications.id_teams == publication["id_teams"] qmain &= publications.title == publication["title"] if "id" in publication and publication["id"]: qmain &= publications.id != publication["id"] # title, conference title, conference date and conference town qtitle = publications.conference_title == publication["conference_title"] qdates = publications.conference_dates == publication["conference_dates"] qtown = publications.conference_town == publication["conference_town"] query = ((qmain) & (qtitle) & (qdates) & (qtown)) extend_ids(db, query, ids) # title, conference date and conference town query = ((qmain) & (qdates) & (qtown)) extend_ids(db, query, ids) # title, conference title and conference town query = ((qmain) & (qtitle) & (qtown)) extend_ids(db, query, ids) return ids def duplicate_origin(publication): """Look for publications with the same value in the origin field. Args: publication (dict or gluon.storage.Storage): contains the publication fields and theirs values. Returns: list: list of *ids* corresponding to duplicate entries. """ ids = [] db = current.db publications = db.publications # protection against empty origin field if not publication["origin"]: return ids # look for publication with the same origin field query = publications.id != publication["id"] query = ((query) & (publications.origin == publication["origin"])) set_records = db(query) if set_records.count(): for row in set_records.select(): ids.append(str(row.id)) return ids def duplicate_report(publication): """Look for duplicate report. The comparison is performed on report published by the given team using the *title*. Args: publication (dict or gluon.storage.Storage): contains the publication fields and theirs values. Returns: list: list of *ids* corresponding to duplicate entries. """ ids = [] db = current.db publications = db.publications qmain = get_where_query(publications) qmain = ((qmain) & (db.categories.code == "AP")) qmain = ((qmain) & (publications.id_teams == publication["id_teams"])) qmain = ((qmain) & (publications.title == publication["title"])) if "id" in publication and publication["id"]: qmain = ((qmain) & (publications.id != publication["id"])) extend_ids(db, qmain, ids) return ids