Commit 01775da5 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

First set of algorithm to detect duplicate entries in the validation procedure.

parent df0b0329
......@@ -7,7 +7,10 @@
import re
from gluon import current
from plugin_dbui import UNDEF, UNDEF_ID
from plugin_dbui import (UNDEF,
UNDEF_ID,
get_id,
get_where_query)
# syntax for the submission date YYYY or YYYY-MM or YYYY-MM-DD
......@@ -21,7 +24,7 @@ def check_publication(row):
"""Check the fields of a publication.
@type row: gluon.dal.Row
@param row: record defineing a publication. Its contains the publications
@param row: record defining a publication. Its contains the publications
table as well as its reference tables.
@rtype: list
......@@ -89,7 +92,12 @@ def check_publication(row):
if not row.publications.preprint:
text = T("Preprint number is not defined")
li.append(text)
ids = duplicate_article(row)
if ids:
text = T("Possible duplicate entries [%s]") % s
li.append(text)
# specific fields for proceeding and talk
if row.categories.usual in ('proceeding', 'talk'):
......@@ -112,6 +120,11 @@ def check_publication(row):
if not row.publications.conference_speaker:
text = T("Conference speaker is missing")
li.append(text)
ids = duplicate_conference(row)
if ids:
text = T("Possible duplicate entries [%s]") % s
li.append(text)
# specific fields for report
if row.categories.usual == 'report':
......@@ -119,5 +132,124 @@ def check_publication(row):
if not row.publications.report_numbers:
text = T("Report number is missing")
li.append(text)
ids = duplicate_report(row)
if ids:
text = T("Possible duplicate entries [%s]") % s
li.append(text)
return li
def duplicate_article(row):
"""Look for duplicate article.
@type row: gluon.dal.Row
@param row: record defining a publication. Its contains the publications
table as well as its reference tables.
@rtype: list
@return: list of ids corresponding to duplicate entries
"""
ids = []
db = current.globalenv['db']
qmain = get_where_query(db.publications)
qmain = ((qmain) & (db.publications.id != row.publications.id))
qmain = ((qmain) & (db.categories.code == 'ACL'))
qmain = ((qmain) & (db.teams.team == row.teams.team))
qmain = ((qmain) & (db.publications.id_publishers == row.publications.id_publishers))
# look for publications with the same title, review and team
query = ((qmain) & (db.publications.title == row.publications.title))
query = ((query) & (db.publications.volume == row.publications.volume))
query = ((query) & (db.publications.pages == row.publications.pages))
set = db(query)
if set.count():
for xrow in set.select():
ids.append(str(xrow.publications.id))
# less stringent test same review and team
query = ((qmain) & (db.publications.volume == row.publications.volume))
query = ((query) & (db.publications.pages == row.publications.pages))
set = db(query)
if set.count():
for xrow in set.select():
xid = str(xrow.publications.id)
if xid not in ids:
ids.append(xid)
# less stringent test same title and team
query = ((qmain) & (db.publications.title == row.publications.title))
set = db(query)
if set.count():
for xrow in set.select():
xid = str(xrow.publications.id)
if xid not in ids:
ids.append(xid)
return ids
def duplicate_conference(row):
"""Look for duplicate talk / proceeding.
@type row: gluon.dal.Row
@param row: record defining a publication. Its contains the publications
table as well as its reference tables.
@rtype: list
@return: list of ids corresponding to duplicate entries
"""
ids = []
db = current.globalenv['db']
query = get_where_query(db.publications)
query = ((query) & (db.publications.id != row.publications.id))
query = ((query) & ((db.categories.code == 'ACTI') | (db.categories.code == 'COM')))
query = ((query) & (db.teams.team == row.teams.team))
query = ((query) & (db.publications.conference_title == row.publications.conference_title))
query = ((query) & (db.publications.conference_dates == row.publications.conference_dates))
query = ((query) & (db.publications.conference_town == row.publications.conference_town))
query = ((query) & (db.publications.title == row.publications.title))
set = db(query)
if set.count():
for xrow in set.select():
ids.append(str(xrow.publications.id))
return ids
def duplicate_report(row):
"""Look for duplicate report.
@type row: gluon.dal.Row
@param row: record defining a publication. Its contains the publications
table as well as its reference tables.
@rtype: list
@return: list of ids corresponding to duplicate entries
"""
ids = []
db = current.globalenv['db']
query = get_where_query(db.publications)
query = ((query) & (db.publications.id != row.publications.id))
query = ((query) & (db.categories.code == 'AP'))
query = ((query) & (db.teams.team == row.teams.team))
query = ((query) & (db.publications.title == row.publications.title))
set = db(query)
if set.count():
for xrow in set.select():
ids.append(str(xrow.publications.id))
return li
\ No newline at end of file
return ids
\ No newline at end of file
......@@ -4,6 +4,7 @@ HEAD
- Migrate to plugin_dbui 0.4.13.1
- Add the module check_tools.
- More stringent tests in the validation procedure
including duplicate entries.
0.8.2 (Apr 2013)
- Consolidation version.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment