Commit 4520c839 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Deploy filters in the model and in the invenio Record class.

parent 522a35ee
......@@ -125,6 +125,7 @@
'enter a number between %(min)g and %(max)g': 'enter a number between %(min)g and %(max)g',
'enter an integer between %(min)g and %(max)g': 'entrez un entier entre %(min)g et %(max)g',
'enter an integer greater than or equal to %(min)g': 'entrez un entier plus grand ou égual à %(min)g',
'Error !!!': 'Erreur !!!',
'essai à blanc': 'essai à blanc',
'export as csv file': 'exporter un fichier CSV',
'extract authors': 'extraire les auteurs',
......@@ -327,7 +328,6 @@
'Select an harvester.': 'Selectionner un moissonneur.',
'select...': 'sélectionner...',
'selected': 'sélectionnez',
'Server error !!!': 'Erreur du serveur !!!',
'Service': 'Service',
'Sort by': 'Trier par',
'Sort field': 'Trier par',
......
......@@ -4,8 +4,10 @@
Core tables for the applications.
"""
import filters
import locale
import plugin_dbui as dbui
from datetime import datetime
from gluon.tools import PluginManager
......@@ -64,7 +66,7 @@ db.define_table("collaborations",
Field("collaboration", "string", notnull=True, unique=True),
migrate="collaborations.table")
db.collaborations.collaboration.filter_in = dbui.CLEAN_SPACES
db.collaborations.collaboration.filter_in = filters.CLEAN_COLLABORATION
db.define_table("countries",
Field("country", "string", notnull=True, unique=True),
......@@ -85,7 +87,7 @@ db.define_table("publishers",
migrate="publishers.table")
db.publishers.publisher.filter_in = dbui.CLEAN_SPACES
db.publishers.abbreviation.filter_in = dbui.CLEAN_SPACES
db.publishers.abbreviation.filter_in = filters.CLEAN_REVIEW
db.define_table("reports",
Field("type", "string", notnull=True, unique=True),
......@@ -200,6 +202,7 @@ IS_IN_DB(db, 'publishers.abbreviation')
db.publications.submitted.requires = IS_MATCH('^\d{4}(-\d{2})?(-\d{2})?$')
db.publications.volume.requires = IS_EMPTY_OR(IS_MATCH('^\d+(-\d+)?$'))
db.publications.title.filter_in = dbui.CLEAN_SPACES
db.publications.conference_title.filter_in = dbui.CLEAN_SPACES
db.publications.conference_town.filter_in = dbui.CLEAN_SPACES
db.publications.defense.filter_in = filters.CLEAN_THESIS_DEFENSE
db.publications.title.filter_in = dbui.CLEAN_SPACES
"""a collections of functions to correct entries in the database.
@author: R. Le Gac
"""
def CLEAN_COLLABORATION(value):
"""Correct stupid mistakes on the collaboration field.
@type value: str
@param value: string where collaborations are separated by comma
@rtype: str
"""
li = []
for el in value.split(','):
# Fix to remove space at the beginning and at the end
el = el.strip()
# Fix "XXX collaboration" as "XXX Collaboration"
el = el.replace('collaboration', 'Collaboration')
# Fix to avoid duplicate entries
if el in li:
continue
# Fix to remove 'on behalf of the LHCb Collaboration'
if el.startswith('on behalf'):
continue
li.append(el)
return ', '.join(li)
def CLEAN_REVIEW(value):
"""Correct stupid mistakes on the paper_editor field.
@type value: str
@param value: review abbreviation
@rtype: str
"""
# Fix to remove dot and comma
value = value.replace(".", "").replace(",", "")
# Fix to have only one space between word
value = ' '.join(value.split())
return value
def CLEAN_THESIS_DEFENSE(value):
"""Correct stupid mistakes on the thesis_defense field.
@type value: str
@param value: string with the defense date
@rtype: str
"""
value = value.replace('Presented ', '')
value = value.replace('on ', '')
return value
\ No newline at end of file
......@@ -35,7 +35,7 @@ MSG_PREPRINT_IS_PAPER = current.T("Reject preprint is a published paper")
MSG_PREPRINT_IS_CONFERENCE = current.T("Reject preprint is a conference")
MSG_PREPRINT_IS_THESIS = current.T("Reject preprint is a thesis")
MSG_PREPRINT_NO_NUMBER = current.T("Reject no preprint number nor submission date")
MSG_SERVER_ERROR = current.T("Server error !!!")
MSG_SERVER_ERROR = current.T("Error !!!")
MSG_TRANFORM = current.T("Transform the talk into a proceeding")
# error messages
......
......@@ -10,7 +10,6 @@
"""
import httplib
import json
import pprint
......@@ -18,6 +17,12 @@ import re
import sys
import urllib
from filters import (CLEAN_COLLABORATION,
CLEAN_REVIEW,
CLEAN_THESIS_DEFENSE)
from plugin_dbui import CLEAN_SPACES
from xml.dom.minidom import parseString
......@@ -632,22 +637,8 @@ class Record(dict):
Return a string where collaborations are separated by comma
"""
li1 = self._get(710, 'g', force_list=True)
# Fix to remove space at the beginning and at the end
# Fix "XXX collaboration" as "XXX Collaboration"
for i in range(len(li1)):
li1[i] = li1[i].strip()
li1[i] = li1[i].replace('collaboration', 'Collaboration')
# Fix to avoid duplicate entries
# Fix to remove 'on behalf of the LHCb Collaboration'
li2 = []
for el in li1:
if not (el in li2 or el.startswith('on behalf')):
li2.append(el)
return ', '.join(li2)
li = self._get(710, 'g', force_list=True)
return CLEAN_COLLABORATION(', '.join(li))
def conference_dates(self):
......@@ -661,7 +652,7 @@ class Record(dict):
def conference_country(self):
loc = self.conference_location()
if loc:
return loc.split(',')[1].strip()
return CLEAN_SPACES(loc.split(',')[1])
return ''
......@@ -672,17 +663,17 @@ class Record(dict):
def conference_location(self):
"""Return a string 'town, country'."""
return self._get(111, 'c')
return CLEAN_SPACES(self._get(111, 'c'))
def conference_title(self):
return self._get(111, 'a')
return CLEAN_SPACES(self._get(111, 'a'))
def conference_town(self):
loc = self.conference_location()
if loc:
return loc.split(',')[0].strip()
return CLEAN_SPACES(loc.split(',')[0])
return ''
......@@ -870,7 +861,7 @@ class Record(dict):
"""The abbreviated version of the review: Phys Lett B
The return string does not contains dot.
Return a single value or a list.
Return a value or a list.
The latter append when there are erratum.
"""
......@@ -890,24 +881,21 @@ class Record(dict):
volumes = [volumes[0]]
for i in range(len(editors)):
#add space after the dot Phys.Rev -> Phys. Rev
editor = re.sub(r'\.([A-Z])', r'. \1', editors[i])
# get the volume letter
m = re.match(r'([A-Z]+) *(\d+)', volumes[i])
if m:
if m and m.group(1) != editor[-1]:
editor = "%s %s" % (editor, m.group(1))
li.append(editor)
# Protection
# Remove dot and comma
# Only one spave between word
for i in range(len(li)):
li[i] = li[i].replace(".", "").replace(",", "")
el = ' '.join(li[i].split())
li.append(CLEAN_REVIEW(editor))
# return the value when one element in the list
if len(li) == 1:
return li[0]
# return the list
return li
......@@ -1061,9 +1049,7 @@ class Record(dict):
"""
val = self._get(500, 'a')
val = val.replace('Presented ', '')
val = val.replace('on ', '')
return val
return CLEAN_THESIS_DEFENSE(val)
def these_level(self):
......@@ -1092,7 +1078,16 @@ class Record(dict):
def title(self):
return self._get(245, 'a')
val = self._get(245, 'a')
if isinstance(val, (unicode, str)):
return CLEAN_SPACES(val)
elif isinstance(val, list):
for i in range(len(val)):
val[i] = CLEAN_SPACES(val[i])
else:
return val
def year(self):
......
......@@ -4,7 +4,7 @@ HEAD
- Migrate to plugin_dbui 0.4.12.1
* Replace local_import by the conventional import statement.
* New syntax for the linked comboxes
* Use the corrector CLEAN_SPACES
* Use the filter CLEAN_SPACES
- Bugs fixed and polish the user interface.
- Add constant to deal with migrate flag and roles.
- Consolidate the authentification procedure.
......@@ -13,7 +13,8 @@ HEAD
- Check collaboration in Proceeding, Reports and Talks harvester.
- Rename the field / table "publishers" in "reviews" using label and
the french translation table (poor man approach).
- Remove the dot in the review abbreviation
- Deploy filters in the model and the invenio_tools.Record
- Remove the dot in the review abbreviation (CLEAN_PAPER_EDITOR)
- First draft of a tool to find "doublon"
0.8.1 (Dec 2012)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment