Commit 68597c28 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Apply PEP-8 rules.

parent 4d708b04
......@@ -13,6 +13,10 @@ from plugin_dbui import (CALLBACK_ERRORS,
get_where_query)
MSG_DUPLICATE = \
"Can't delete this record since several publications refer to it."
def INHIBIT_CASCADE_DELETE(set_records):
"""Inhibit the delete when publications use the reference field.
......@@ -51,8 +55,7 @@ def INHIBIT_CASCADE_DELETE(set_records):
query = (query) & (set_records.query)
if db(query).count():
field._table[CALLBACK_ERRORS] = \
T("Can't delete this record since several publications refer to it.")
field._table[CALLBACK_ERRORS] = T(MSG_DUPLICATE)
return True
return False
......@@ -80,12 +83,12 @@ def INHIBIT_DUPLICATE_PUBLICATION(publication):
if ids:
db.publications[CALLBACK_ERRORS] = [
T("Can't insert the article."),
T("An article already exists with the same:"),
T("• title, publisher, volume and pages"),
T("• or publisher, volume and pages"),
T("• or publisher and title."),
T("See publication id(s) %s") % ', '.join(ids)]
T("Can't insert the article."),
T("An article already exists with the same:"),
T("• title, publisher, volume and pages"),
T("• or publisher, volume and pages"),
T("• or publisher and title."),
T("See publication id(s) %s") % ', '.join(ids)]
return True
......@@ -99,12 +102,12 @@ def INHIBIT_DUPLICATE_PUBLICATION(publication):
if ids:
db.publications[CALLBACK_ERRORS] = [
T("Can't insert the talk/proceeding."),
T("A talk/proceeding already exists with the same:"),
T("• title, conference title, date and town"),
T("• or title, conference date and town"),
T("• or title, conference title and town"),
T("See publication id(s) %s") % ', '.join(ids)]
T("Can't insert the talk/proceeding."),
T("A talk/proceeding already exists with the same:"),
T("• title, conference title, date and town"),
T("• or title, conference date and town"),
T("• or title, conference title and town"),
T("See publication id(s) %s") % ', '.join(ids)]
return True
......@@ -116,9 +119,9 @@ def INHIBIT_DUPLICATE_PUBLICATION(publication):
if ids:
db.publications[CALLBACK_ERRORS] = [
T("Can't insert the report."),
T("A report already exists with the same title"),
T("See publication id(s) %s") % ', '.join(ids)]
T("Can't insert the report."),
T("A report already exists with the same title"),
T("See publication id(s) %s") % ', '.join(ids)]
return True
......@@ -158,9 +161,9 @@ def INHIBIT_HARVESTER_ON_CATEGORY(harvester):
code = db.categories[id_category].code
db.harvesters[CALLBACK_ERRORS] = [
T("Can't insert the harvester."),
T("Harvester already exists with the same automaton "),
T("but with different category: %s") % code]
T("Can't insert the harvester."),
T("Harvester already exists with the same automaton "),
T("but with different category: %s") % code]
return True
......@@ -190,7 +193,7 @@ def INHIBIT_PUBLICATION_DELETE_ON_OK(s):
id_rec = s.query.second
if db.publications[id_rec].id_status == id_ok:
db.publications[CALLBACK_ERRORS] = \
T("Can't delete a publication marked OK.")
T("Can't delete a publication marked OK.")
return True
return False
......@@ -221,8 +224,7 @@ def INHIBIT_PUBLICATION_UPDATE_ON_OK(s, f):
if db.publications[id_rec].id_status == id_ok:
db.publications[CALLBACK_ERRORS] = \
T("Can't updated a publication marked OK.")
T("Can't updated a publication marked OK.")
return True
return False
......@@ -78,7 +78,8 @@ def check_publication(row):
# publication URL
if row.publications.publication_url:
if 'pdf' not in row.publications.publication_url:
text = T("Check that the publication URL corresponds to a pdf file.")
text = \
T("Check that the publication URL corresponds to a pdf file.")
li.append(text)
# latex syntax
......@@ -224,30 +225,34 @@ def duplicate_article(publication):
ids = []
db = current.globalenv['db']
qcat = (db.categories.code == 'ACL') | (db.categories.code == 'ACLN')
categories = db.categories
publications = db.pulications
qmain = get_where_query(db.publications)
qcat = (categories.code == 'ACL') | (categories.code == 'ACLN')
qpub = publications.id_publishers == publication['id_publishers']
qmain = get_where_query(publications)
qmain = ((qmain) & (qcat))
qmain = ((qmain) & (db.publications.id_teams == publication['id_teams']))
qmain = ((qmain) & (db.publications.id_publishers == publication['id_publishers']))
qmain = ((qmain) & (publications.id_teams == publication['id_teams']))
qmain = ((qmain) & (qpub))
if 'id' in publication and publication['id']:
qmain = ((qmain) & (db.publications.id != publication['id']))
qmain = ((qmain) & (publications.id != publication['id']))
# title, publishers, volume and pages
query = ((qmain) & (db.publications.title == publication['title']))
query = ((query) & (db.publications.volume == publication['volume']))
query = ((query) & (db.publications.pages == publication['pages']))
query = ((qmain) & (publications.title == publication['title']))
query = ((query) & (publications.volume == publication['volume']))
query = ((query) & (publications.pages == publication['pages']))
extend_ids(db, query, ids)
# publisher, volume, pages and year
query = ((qmain) & (db.publications.volume == publication['volume']))
query = ((query) & (db.publications.pages == publication['pages']))
query = ((query) & (db.publications.year == publication['year']))
query = ((qmain) & (publications.volume == publication['volume']))
query = ((query) & (publications.pages == publication['pages']))
query = ((query) & (publications.year == publication['year']))
extend_ids(db, query, ids)
# publisher and title
query = ((qmain) & (db.publications.title == publication['title']))
query = ((qmain) & (publications.title == publication['title']))
extend_ids(db, query, ids)
return ids
......@@ -272,32 +277,39 @@ def duplicate_conference(publication):
ids = []
db = current.globalenv['db']
qcat = (db.categories.code == 'ACTI') | \
(db.categories.code == 'ACTN') | \
(db.categories.code == 'COM')
categories = db.categories
publications = db.publications
qcat = (categories.code == 'ACTI') | \
(categories.code == 'ACTN') | \
(categories.code == 'COM')
qmain = get_where_query(db.publications)
qmain = get_where_query(publications)
qmain = ((qmain) & (qcat))
qmain = ((qmain) & (db.publications.id_teams == publication['id_teams']))
qmain = ((qmain) & (db.publications.title == publication['title']))
qmain = ((qmain) & (publications.id_teams == publication['id_teams']))
qmain = ((qmain) & (publications.title == publication['title']))
if 'id' in publication and publication['id']:
qmain = ((qmain) & (db.publications.id != publication['id']))
qmain = ((qmain) & (publications.id != publication['id']))
# title, conference title, conference date and conference town
query = ((qmain) & (db.publications.conference_title == publication['conference_title']))
query = ((query) & (db.publications.conference_dates == publication['conference_dates']))
query = ((query) & (db.publications.conference_town == publication['conference_town']))
qtitle = publications.conference_title == publication['conference_title']
qdates = publications.conference_dates == publication['conference_dates']
qtown = publications.conference_town == publication['conference_town']
query = ((qmain) & (qtitle))
query = ((query) & (qdates))
query = ((query) & (qtown))
extend_ids(db, query, ids)
# title, conference date and conference town
query = ((query) & (db.publications.conference_dates == publication['conference_dates']))
query = ((query) & (db.publications.conference_town == publication['conference_town']))
query = ((query) & (qdates))
query = ((query) & (qtown))
extend_ids(db, query, ids)
# title, conference title and conference town
query = ((qmain) & (db.publications.conference_title == publication['conference_title']))
query = ((query) & (db.publications.conference_town == publication['conference_town']))
query = ((qmain) & (qtitle))
query = ((query) & (qtown))
extend_ids(db, query, ids)
return ids
......@@ -316,13 +328,15 @@ def duplicate_origin(publication):
ids = []
db = current.globalenv['db']
publications = db.publications
# protection against empty origin field
if not publication['origin']:
return ids
# look for publication with the same origin field
query = db.publications.id != publication['id']
query = ((query) & (db.publications.origin == publication['origin']))
query = publications.id != publication['id']
query = ((query) & (publications.origin == publication['origin']))
set_records = db(query)
if set_records.count():
......@@ -349,15 +363,15 @@ def duplicate_report(publication):
ids = []
db = current.globalenv['db']
qcat = db.categories.code == 'AP'
publications = db.publications
qmain = get_where_query(db.publications)
qmain = ((qmain) & (qcat))
qmain = ((qmain) & (db.publications.id_teams == publication['id_teams']))
qmain = ((qmain) & (db.publications.title == publication['title']))
qmain = get_where_query(publications)
qmain = ((qmain) & (db.categories.code == 'AP'))
qmain = ((qmain) & (publications.id_teams == publication['id_teams']))
qmain = ((qmain) & (publications.title == publication['title']))
if 'id' in publication and publication['id']:
qmain = ((qmain) & (db.publications.id != publication['id']))
qmain = ((qmain) & (publications.id != publication['id']))
extend_ids(db, qmain, ids)
......
......@@ -2,13 +2,13 @@
""" countries
List of countries extract from the geographical database www.geonames.org:
1. get the file C{countryInfo.txt}
1. get the file C{countryInfo.txt}
from U{http://download.geonames.org/export/dump/}
2. open the file with libreoffice calc
2. open the file with libreoffice calc
and remove all columns but the country names
Extract in Nov. 2014
"""
COUNTRIES = ['Andorra',
......
......@@ -4,77 +4,78 @@
"""
def CLEAN_COLLABORATION(value):
"""Correct stupid mistakes on the collaboration field.
- No heading and trailing spaces
- No duplicate entries
- Remove entries starting with C{on behalf}
- Collaboration always start with a Capital letter
@type value: str
@param value: string where collaborations are separated by comma
@rtype: str
"""
li = []
for el in value.split(','):
# Fix to remove space at the beginning and at the end
el = el.strip()
# Fix "XXX collaboration" as "XXX Collaboration"
el = el.replace('collaboration', 'Collaboration')
el = el.replace('consortium', 'Consortium')
el = el.replace('group', 'Group')
# Fix to avoid duplicate entries
if el in li:
continue
# Fix to remove 'on behalf of the LHCb Collaboration'
if el.startswith('on behalf'):
continue
li.append(el)
return ', '.join(li)
def CLEAN_REVIEW(value):
"""Correct stupid mistakes on the paper_editor field.
- Remove dot and coma
- No heading and trailing spaces
@type value: str
@param value: review abbreviation
@rtype: str
"""
# Fix to remove dot and comma
value = value.replace(".", "").replace(",", "")
# Fix to have only one space between word
value = ' '.join(value.split())
return value
return value
def CLEAN_THESIS_DEFENSE(value):
"""Correct stupid mistakes on the thesis_defense field.
- Remove prefix like C{Presented}, C{on}, ...
@type value: str
@param value: string with the defense date
@rtype: str
"""
value = value.replace('Presented ', '')
value = value.replace('presented ', '')
value = value.replace('on ', '')
return value
\ No newline at end of file
return value
......@@ -159,7 +159,7 @@ class Automaton(object):
db = self.db
try:
rec_id = db.publications.insert (**fields)
rec_id = db.publications.insert(**fields)
if rec_id:
return 1
......@@ -358,7 +358,7 @@ class Automaton(object):
# fix origin field
ok = db.publications[rec_id].origin and \
db.publications[rec_id].origin == oai_url
db.publications[rec_id].origin == oai_url
if not ok:
if not self.dry_run:
db.publications[rec_id] = dict(origin=oai_url)
......
......@@ -128,7 +128,7 @@ def learn_my_authors(db,
for elem in diff:
if isinstance(elem, unicode):
elem = elem.encode('utf8')
family_name = elem[elem.rfind('. ') + 2:] # extract family name
family_name = elem[elem.rfind('. ') + 2:]
if family_name not in row.authors:
elems.append(elem)
......@@ -138,5 +138,5 @@ def learn_my_authors(db,
db.my_authors[row.id] = dict(authors=', '.join(database_authors))
class ToolException(Exception): pass
class ToolException(Exception):
pass
......@@ -28,5 +28,3 @@ class MsgCollection(Storage):
"""
return self.url.replace("of=id", "of=hb")
......@@ -66,9 +66,11 @@ def is_institute(record):
bool: true when the MARC record describes an institute
"""
# u'980': [{'b': [u'CK90', u'HEP200', u'PDGLIST', u'PPF', u'TOP500', u'WEB']},
# {'a': u'INSTITUTION'},
# {'a': u'CORE'}]}
# u'980': [
# {'b': [u'CK90', u'HEP200', u'PDGLIST', u'PPF', u'TOP500', u'WEB']},
# {'a': u'INSTITUTION'},
# {'a': u'CORE'}
# ]
if u"980" in record:
if isinstance(record[u"980"], list):
......@@ -77,9 +79,8 @@ def is_institute(record):
if k == "a" and v == u"INSTITUTION":
return True
elif isinstance(record[u"980"], dict) and \
"a" in record[u"980"] and \
record[u"980"]["a"] == u"INSTITUTION":
elif isinstance(record[u"980"], dict) and "a" in record[u"980"] and \
record[u"980"]["a"] == u"INSTITUTION":
return True
return False
......
......@@ -35,21 +35,21 @@ _ref1 = r"(?P<p>[A-Za-z\. ]+) +(?P<v>\d+),? +(?P<c>[\d-]+) +\((?P<y>[\d]+)\)"
_ref2 = r"(?P<p>[A-Za-z\. ]+) +\((?P<y>\d+)\) +(?P<v>[\d]+):(?P<c>[\d-]+)"
DECODE_REF = [re.compile(_ref1), re.compile(_ref2)]
MONTHS = {u'Jan':'01',
u'Feb':'02',
u'Fev':'02',
u'Mar':'03',
u'Apr':'04',
u'Avr':'04',
u'May':'05',
u'Mai':'05',
u'Jun':'06',
u'Jul':'07',
u'Aug':'08',
u'Sep':'09',
u'Oct':'10',
u'Nov':'11',
u'Dec':'12'}
MONTHS = {u'Jan': '01',
u'Feb': '02',
u'Fev': '02',
u'Mar': '03',
u'Apr': '04',
u'Avr': '04',
u'May': '05',
u'Mai': '05',
u'Jun': '06',
u'Jul': '07',
u'Aug': '08',
u'Sep': '09',
u'Oct': '10',
u'Nov': '11',
u'Dec': '12'}
MSG_INVALID_HOST = "Invalid host"
......@@ -79,7 +79,7 @@ REG_COLLABORATION = re.compile(regex.REG_COLLABORATION)
REG_CONF_DATES_1 = re.compile("(\d+) *-? *(\d+) *([A-Z][a-z]{2}) *(\d{4})")
REG_CONF_DATES_2 = \
re.compile("(\d+) *([A-Z][a-z]{2}) *-? *(\d+) *([A-Z][a-z]{2}) *(\d{4})")
re.compile("(\d+) *([A-Z][a-z]{2}) *-? *(\d+) *([A-Z][a-z]{2}) *(\d{4})")
REG_CONF_DATES = re.compile(regex.REG_CONF_DATES)
REG_SUBMITTED = re.compile(regex.REG_SUBMITTED)
......@@ -619,7 +619,7 @@ class CheckAndFix(object):
"""
# might have been computed when affiliation is checked
rec_id = record.id()
if rec_id in self.__my_authors:
if rec_id in self.__my_authors:
li = self.__my_authors[rec_id]
li.sort(key=cmpFct)
value = u', '.join(li)
......@@ -807,7 +807,7 @@ class CheckAndFix(object):
# 22 03 2011
m = DECODE_DD_MM_YYYY.match(dates[i])
if m:
data (m.group(3), int(m.group(2)), int(m.group(1)))
data = (m.group(3), int(m.group(2)), int(m.group(1)))
dates[i] = '%s-%02i-%02i' % data
continue
......
......@@ -2,6 +2,8 @@
""" invenio_tools.exception
"""
class ExceptionUTF8(Exception):
"""Exception in which unicode arguments are encoded as a string.
......@@ -14,8 +16,21 @@ class ExceptionUTF8(Exception):
Exception.__init__(self, *args)
class CdsException(ExceptionUTF8): pass
class CheckException(ExceptionUTF8): pass
class Marc12Exception(ExceptionUTF8): pass
class RecordException(ExceptionUTF8): pass
class XmlException(ExceptionUTF8): pass
class CdsException(ExceptionUTF8):
pass
class CheckException(ExceptionUTF8):
pass
class Marc12Exception(ExceptionUTF8):
pass
class RecordException(ExceptionUTF8):
pass
class XmlException(ExceptionUTF8):
pass
......@@ -435,11 +435,11 @@ class InvenioStore(object):
U{http://invenio-demo.cern.ch/help/hacking/search-engine-api}.
@rtype: unicode
@return: The format of the string (HTML, XML) depend on the keyword C{of}.
For MARC12 format use C{xm}.
@return: The format of the string (HTML, XML) depend on the
keyword C{of}. For MARC12 format use C{xm}.
@deprecated: the method L{get_ids} coupled with L{get_record} are much more
efficient.
@deprecated: the method L{get_ids} coupled with L{get_record} are
much more efficient.
"""
for k in kwargs:
......
......@@ -68,12 +68,22 @@ class IterRecord(object):
concatenate the following dictionary::
record[field] = [dict(subfield1=val1), dict(subfield2=val2), dict(subfield3=val3),...]
record[field] = [dict(subfield1=val1), dict(subfield2=val2, subfield3=val3),...]
record[field] = [
dict(subfield1=val1),
dict(subfield2=val2),
dict(subfield3=val3),...
]
record[field] = [
dict(subfield1=val1),
dict(subfield2=val2,
subfield3=val3),...
]
into a single one::
record[field] = dict1(subfield1=val1, subfield2=val2, subfield3=val3)
record[field] = dict1(subfield1=val1,
subfield2=val2,
subfield3=val3)
@type record: Record
@param record:
......@@ -84,7 +94,7 @@ class IterRecord(object):
if not isinstance(record[field], list):
continue
nkeys = [len(di) for di in record[field]]
nkeys = [len(di) for di in record[field]]
# several dictionary with more than one nkeys
# don't know how to treat that case
......@@ -277,4 +287,3 @@ class IterRecord(object):
else:
raise StopIteration()
......@@ -20,9 +20,9 @@ class Record(dict):
record[field] = [dict1(subfield1=..., subfield2=...),
dict2(subfield1=..., subfield2=...), ...]
In the MARC standard, the C{field} is a string containing at least three digit
while the C{subfield} is a letter. The type of the C{field} is unicode
and C{subfield} is string.
In the MARC standard, the C{field} is a string containing at least three
digit while the C{subfield} is a letter. The type of the C{field} is
unicode and C{subfield} is string.
The class comes with a collection of methods to extract the record
information masking the C{field} and the C{subfield} codification.
......
......@@ -74,7 +74,8 @@ class RecordConf(RecordPubli):
"""
location = self._get(u"111", "c")