Commit 0b05556d authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Migrate to plugin_dbui 0.6.1.3 in order to use inline script.

Redesign the harvester report using inline javascript and Ext.grid.Panel.
parent 5724eec9
......@@ -4,11 +4,18 @@
import traceback
from gluon.storage import Storage
from harvest_tools import get_harvester_tool, ToolException
from harvest_tools import (get_harvester_tool,
to_logsStore,
ToolException,
to_statStore)
from plugin_dbui import Selector
from docutils.nodes import Inline
MSG_NO_HARVESTER = T("No harvesters for your selection !!!")
INLINE_ALERT = "<script>Ext.Msg.alert('%s', '%s');</script>"
def free_run():
"""Run a free harvester.
All harvester parameters are defined via the selector.
......@@ -31,7 +38,7 @@ def free_run():
msg = T('All fields of the form have to be defined !!!')
msg += "<br>"
msg += T('The field "%s" is missing.') % el
return msg
return INLINE_ALERT % (T('Error'), msg)
tool_class = get_harvester_tool(selector.controller)
tool = tool_class(db, selector, debug=False)
......@@ -64,7 +71,7 @@ def insert_marcxml():
tool_class = get_harvester_tool(selector.controller)
if not tool_class:
return T('Select a controller.')
return INLINE_ALERT % (T('Error'), T('Select a controller.'))
tool = tool_class(db, selector, debug=False)
tool.process()
......@@ -101,7 +108,7 @@ def run():
tool_class = get_harvester_tool(selector.controller)
if not tool_class:
return T('Select an harvester.')
return INLINE_ALERT % (T('Error'), T('Select an harvester.'))
tool = tool_class(db, selector, debug=False)
tool.process()
......@@ -123,11 +130,9 @@ def run_all():
"""Run all harvesters in one go.
"""
collection_logs = []
collections = []
logs = []
nfound = 0
ninsert = 0
urls = []
try:
selector = Selector(virtdb.run_all_harvesters_selector,
......@@ -142,26 +147,24 @@ def run_all():
else:
query = q
rows = db(query).select(db.harvesters.ALL)
if not len(rows):
return MSG_NO_HARVESTER
harvesters = db(query).select(db.harvesters.ALL)
if not len(harvesters):
return INLINE_ALERT % (T('Error'), MSG_NO_HARVESTER)
for row in rows:
selector.controller = row.controller
selector.id_projects = row.id_projects
selector.id_teams = row.id_teams
for harvester in harvesters:
selector.controller = harvester.controller
selector.id_projects = harvester.id_projects
selector.id_teams = harvester.id_teams
collections.extend(row.collections.split(','))
collections.extend(harvester.collections.split(','))
tool_class = get_harvester_tool(selector.controller)
tool = tool_class(db, selector, debug=False)
tool.process()
collection_logs.extend(tool.collection_logs)
logs.extend(tool.logs)
nfound += tool.nfound
ninsert += tool.ninsert
urls.extend(tool.search_urls)
except ToolException, e:
return T(str(e))
......@@ -172,11 +175,18 @@ def run_all():
msg += '<hr/>'
return msg
# tune harvester / selector parameters used in the report title
harvester = Storage(controller='all harvesters',
collections=','.join(collections))
if query == None:
selector.id_projects = None
# delegate rendering to the report view
response.view = 'harvest/layout.%s' % request.extension
return dict(harvester=Storage(controller='all harvesters',
collections=','.join(collections)),
return dict(cfg_statStore=to_statStore(collection_logs, logs),
cfg_logsStore=to_logsStore(logs),
collection_logs=collection_logs,
harvester=harvester,
logs=logs,
nfound=nfound,
ninsert=ninsert,
selector=selector,
urls=urls)
selector=selector)
......@@ -17,11 +17,15 @@
'ACL': 'ACL',
'ACLN': 'ACLN',
'ACTI': 'ACTI',
'Action': 'Action',
'ACTN': 'ACTN',
'Add': 'Ajouter',
'Added': 'Ajouté',
'Address of the invenio store where the search is performed.': 'Addresse du site invenio où les recherches sont effectuées.',
'administrators, librairians,...': 'administrateurs, documentalistes,...',
'Affiliation': 'Affiliation',
'Agencies': 'Agences',
'All': 'Tous',
'All fields of the form have to be defined !!!': 'Tous les champs doivent ếtre définis !!!',
'Already in the database': 'Publication déjà enregistré dans la base de donnée',
'An article already exists with the same:': 'Un article existe déja avec le même:',
......@@ -159,8 +163,11 @@
'enter an integer between %(min)g and %(max)g': 'entrez un entier entre %(min)g et %(max)g',
'enter an integer greater than or equal to %(min)g': 'entrez un entier plus grand ou égual à %(min)g',
'Entries with duplicate origin [%s]': 'Enregistrements avec la même origine [%s]',
'Error': 'Erreur',
'Error !!!': 'Erreur !!!',
'essai à blanc': 'essai à blanc',
'Exist': 'Existe',
'Existing': 'Existant',
'export as csv file': 'exporter un fichier CSV',
'extract authors': 'extraire les auteurs',
'Fill': 'Remplir',
......@@ -180,12 +187,15 @@
'First Author': 'Premier Auteur',
'First name': 'First name',
'Fixed the origin field': 'Corrige le champ origine',
'Fixed the page field': 'Fixed the page field',
'Footer': 'Pied de page',
'Forgot username?': 'Forgot username?',
'forgot username?': 'forgot username?',
'Format': 'Format',
'Formattor': 'Formattor',
'Forms': 'Formulaires',
'Found': 'Trouvé',
'from %s to %s': 'de %s à %s',
'Function disabled': 'Fonction desactivée',
'General': 'Général',
'Granularity': 'Granularité',
......@@ -197,6 +207,7 @@
'groups': 'groupes',
'Harvest': 'Moissonner',
'Harvester parameters not defined in the database.': 'Les paramètres du moissoneur ne sont pas définis dans la base de données.',
'Harvester took %s seconds': 'La moisson à durée %s secondes',
'Harvester(s)': 'Moissonneur(s)',
'harvesters': 'moissonneurs',
'Header': 'En tête',
......@@ -250,6 +261,7 @@
'Login': 'Login',
'Logout': 'Logout',
"Look for %s in '%s'": "Recherche des %s dans '%s'",
'Lost': 'Perdu',
'Lost Password': 'Lost Password',
'Lost password?': 'Lost password?',
'Main layout': 'Main layout',
......@@ -257,8 +269,10 @@
'Metric': 'Métrique',
'metrics': 'métriques',
'Mode': 'Mode',
'Modified': 'Modifié',
'Modified By': 'Modified By',
'Modified On': 'Modified On',
'Modify': 'Modifier',
'Name': 'Nom',
'Name of a function located in the modules list_postprocessing. Can be a list of name separated by comma.': 'Nom de la fonction localisées dans le module list_postprocessing, ou liste de nom séparé par une virgule.',
'Name of the database table containing the publications shown in this section.': 'Nom de la table qui contient les publications qui seront présentées dans cette section.',
......@@ -276,12 +290,17 @@
'Number for the first pages or a range 69-80': 'Numéro de la première page ou un range 69-80',
'Number of invalid records': "Nombre d'enregistrement non valide",
'Number of invalid records: %s': "Nombre d'enregistrement non valide : %s",
'Number of records added in the database': "Nombre d'enregistrements ajoutés à la base de donnée",
'Number of records already registered': "Nombre d'enregistrements déjà enregistrés dans la base de donnée",
'Number of records already validated': "Nombre d'enregistrements déjà validé",
'Number of records already validated: %s': "Nombre d'enregistrements déjà validé : %s",
'Number of records analysed': "Nombre d'enregistrements analysés",
'Number of records found': "Nombre d'enregistrements trouvés",
'Number of records found: %s': "Nombre d'enregistrements trouvés : %s",
'Number of records inserted in the database': "Nombre d'enregistrements ajoutés à la base de donnée",
'Number of records inserted in the database: %s': "Nombre d'enregistrements ajoutés à la base de donnée : %s",
'Number of records modified in the database': "Nombre d'enregistrements modifiés dans la base de donnée",
'Number of records rejected': "Nombre d'enregistrements rejetés",
'Number of records validated': "Nombre d'enregistrements validé",
'Number of records validated: %s': "Nombre d'enregistrements validé : %s",
'Number(s)': 'Numéro(s)',
......@@ -349,13 +368,14 @@
'Reject no authors': "Rejeté pas d'auteurs",
'Reject no conference information': "Rejeté pas d'information sur la conférence",
'Reject no CPPM authors': "Rejeté pas d'auteurs du CPPM",
'Reject no OAI identifier': "Rejeté pas d'identifiant OAI",
'Reject no preprint number nor submission date': 'Rejeté pas de numéro de preprint ou de date de soumission',
'Reject not a thesis record': "Rejeté cet enregistement n'est pas une thèse",
'Reject preprint is a conference': 'Rejeté ce preprint est une conférence',
'Reject preprint is a published paper': 'Rejeté ce preprint est un article publié',
'Reject preprint is a thesis': 'Rejeté ce preprint est une thèse',
'Reject the talk match a proceeding': 'Rejeté cette présentation correspond à un actes de conférence',
'Reject. The oai identifier is missing for id %s': "Rejeté. L'identifiant oai est manquant pour l'id %s",
'Rejected': 'Rejeté',
'Relation team / projects': 'Relation équipe / projets',
'relation user / groups': 'relation utilisateur / groupes',
'Removing affiliation failed. Rules might be missing for the given string. Contact the author.': 'Removing affiliation failed. Rules might be missing for the given string. Contact the author.',
......@@ -409,6 +429,7 @@
'Sort the publications list associated to the section according to the database field. The field has to belong to the table publications. The publications of this section will be sort according to this field': 'Ordonne les publications de la sections en fonction de ce champ. Il doit appartenir à la table "publications".',
'Speaker': 'Orateur',
'state': 'state',
'Statistics': 'Statistique',
'Status': 'Status',
'status': 'status',
'Store': 'Entrepot',
......
......@@ -8,6 +8,7 @@ and to push them in the database.
import difflib
import datetime
import invenio_tools
import json
import pprint
import re
......@@ -21,25 +22,25 @@ from plugin_dbui import (UNDEF_ID,
DRY_RUN = current.T("dry run")
# explain message
MSG_DELETE_TALK = current.T("Delete the associated talk")
MSG_FIX_ORIGIN = current.T("Fixed the origin field")
MSG_FIX_PAGE = current.T("Fixed the page field")
MSG_IN_DB = current.T("Already in the database")
MSG_LOAD = current.T("Load in the database")
MSG_MATCH = current.T("Reject the talk match a proceeding")
MSG_NO_AUTHOR = current.T("Reject no authors")
MSG_NO_CONFERENCE = current.T("Reject no conference information")
MSG_NO_CPPM_AUTHOR = current.T("Reject no CPPM authors")
MSG_NO_EDITOR = current.T("Reject article is not published")
MSG_NO_OAI = current.T("Reject. The oai identifier is missing for id %s")
MSG_NO_THESIS = current.T("Reject not a thesis record")
MSG_PREPRINT_IS_PAPER = current.T("Reject preprint is a published paper")
MSG_PREPRINT_IS_CONFERENCE = current.T("Reject preprint is a conference")
MSG_PREPRINT_IS_THESIS = current.T("Reject preprint is a thesis")
MSG_PREPRINT_NO_NUMBER = current.T("Reject no preprint number nor submission date")
MSG_SERVER_ERROR = current.T("Error !!!")
MSG_TRANSFORM_PREPRINT = current.T("Transform the preprint into an article")
MSG_TRANSFORM_TALK = current.T("Transform the talk into a proceeding")
MSG_DELETE_TALK = current.T("Delete the associated talk", lazy=False)
MSG_FIX_ORIGIN = current.T("Fixed the origin field", lazy=False)
MSG_FIX_PAGE = current.T("Fixed the page field", lazy=False)
MSG_IN_DB = current.T("Already in the database", lazy=False)
MSG_LOAD = current.T("Load in the database", lazy=False)
MSG_MATCH = current.T("Reject the talk match a proceeding", lazy=False)
MSG_NO_AUTHOR = current.T("Reject no authors", lazy=False)
MSG_NO_CONFERENCE = current.T("Reject no conference information", lazy=False)
MSG_NO_CPPM_AUTHOR = current.T("Reject no CPPM authors", lazy=False)
MSG_NO_EDITOR = current.T("Reject article is not published", lazy=False)
MSG_NO_OAI = current.T("Reject no OAI identifier", lazy=False)
MSG_NO_THESIS = current.T("Reject not a thesis record", lazy=False)
MSG_PREPRINT_IS_PAPER = current.T("Reject preprint is a published paper", lazy=False)
MSG_PREPRINT_IS_CONFERENCE = current.T("Reject preprint is a conference", lazy=False)
MSG_PREPRINT_IS_THESIS = current.T("Reject preprint is a thesis", lazy=False)
MSG_PREPRINT_NO_NUMBER = current.T("Reject no preprint number nor submission date", lazy=False)
MSG_SERVER_ERROR = current.T("Error !!!", lazy=False)
MSG_TRANSFORM_PREPRINT = current.T("Transform the preprint into an article", lazy=False)
MSG_TRANSFORM_TALK = current.T("Transform the talk into a proceeding", lazy=False)
# error messages
MSG_NO_CAT = 'Select a "category" !!!'
......@@ -403,6 +404,97 @@ def learn_cppm_authors(db, authors=None,
db.cppm_authors[row.id] = dict(authors=', '.join(database_authors))
def to_logsStore(logs):
"""Convert a list of publication message into the configuration
of the Ext.data.ArrayStore. It will contain the information on
what happen to each publication found in the harvester repository.
@type logs: list of L{Msg}
@param logs: list of publication message
@rtype: str
@return: the configuration of the Ext JS store is serialized
as a JSON string.
"""
cfg = dict(groupField='txt',
sorters=['txt', 'title'])
cfg['fields'] = [{'name': 'collection', 'type': 'string'},
{'name': 'title', 'type': 'string'},
{'name': 'txt', 'type': 'string'},
{'name': 'url', 'type': 'string'},
{'name': 'year', 'type': 'string'}]
cfg['data'] = []
for row in logs:
cfg['data'].append([row.collection,
row.title,
row.txt,
row.url,
row.year])
return json.dumps(cfg)
def to_statStore(collection_logs, logs):
"""Convert a list of collection and publication messages into the
configuration of the Ext.data.ArrayStore. It will containing the
statistic of the harvester scan for each collection.
@type logs: list of L{MsgCollection}
@param logs: list of collection message
@type logs: list of L{Msg}
@param logs: list of publication message
@rtype: str
@return: the configuration of the Ext JS store is serialized
as a JSON string.
"""
cfg = dict(sorters=['collection'])
cfg['fields'] = [{'name': 'add', 'type': 'int'},
{'name': 'error', 'type': 'string'},
{'name': 'found', 'type': 'int'},
{'name': 'idle', 'type': 'int'},
{'name': 'lost', 'type': 'int'},
{'name': 'modify', 'type': 'int'},
{'name': 'reject', 'type': 'int'},
{'name': 'title', 'type': 'string'},
{'name': 'url', 'type': 'string'}]
cfg['data'] = []
for collection in collection_logs:
# count database action for this collection
count = Storage(idle=0, load=0, modify=0, process=0, reject=0)
for row in logs:
if row.collection != collection.title:
continue
count.process += 1
count[row.action] += 1
stat = [count.load,
collection.error,
collection.found,
count.idle,
collection.found - count.process,
count.modify,
count.reject,
collection.title,
collection.url_hb()]
cfg['data'].append(stat)
return json.dumps(cfg)
class Msg(Storage):
"""Message and action taken for a publication.
- The publication is found by an harvester tool, in a store.
......@@ -414,12 +506,14 @@ class Msg(Storage):
- C{modify}
- C{reject}
The class contains four public attributes:
The class contains the attributes:
- C{action}: action taken
- C{collection}: the harvester collection
- C{url}: URL used to access to the record in the store
- C{title}: title of the publication
- C{txt}: text of the message
- C{year}: year of the publication
"""
......@@ -430,7 +524,7 @@ class Msg(Storage):
@param txt: message
"""
self.action = None
self.action = 'idle'
self.txt = txt
def load(self, txt):
......@@ -464,6 +558,32 @@ class Msg(Storage):
self.txt = txt
class MsgCollection(Storage):
"""Message for a collection. The class contains five public attributes:
- C{error}: error when scanning the collection
- C{found}: number of publication found in the harvester repository
- C{url}: URL used to scan the harvester repository, returning a list ids.
- C{title}: title of the collection
"""
def __init__(self, error="", found=0, title="", url=""):
Storage.__init__(self)
self.error = error
self.found = found
self.title = title
self.url = url
def url_hb(self):
"""
@rtype: str
@return: the URL return a list of record in readable format.
"""
return self.url.replace("of=id", "of=hb")
class PublicationsTool(object):
"""Base class to search and process publications.
- Decode the parameter of a selector defining user criteria.
......@@ -491,17 +611,15 @@ class PublicationsTool(object):
"""
self.cppm_authors = None
self.collection_logs = []
self.db = db
self.dbg = debug
self.harvester = None
self.logs = []
self.nfound = 0
self.ninsert = 0
self.marc12 = invenio_tools.Marc12Svc()
self.marc12.set_format_author_name(format_author_fr)
self.search_urls = []
self.selector = selector
......@@ -750,6 +868,8 @@ class PublicationsTool(object):
id_projects=selector.id_projects,
id_teams=selector.id_teams)
self.collection_logs.append(MsgCollection(found=1))
self.process_xml(selector.xml)
return
......@@ -780,6 +900,10 @@ class PublicationsTool(object):
collections = self.harvester.collections
collections = re.sub(' *, *', ',', collections).split(',')
# alias
controller = self.harvester.controller.title()
project = self.db.projects[self.harvester.id_projects].project
# extract the list of publications from the store for each collection
# the search is perform on a range of creation date
# if not defined all element are return
......@@ -789,18 +913,25 @@ class PublicationsTool(object):
for collection in collections:
# log collection information
# A collection is identified as "Project Controller collection"
title = "%s %s / %s" % (project, controller, collection)
self.collection_logs.append(MsgCollection(title=title))
# search record in the harvester repository
kwargs = self._search_parameters(collection)
try:
ids = cds.get_ids(**kwargs)
except invenio_tools.CdsSvcException as error:
tu = (cds.last_search_url(), error)
self.collection_logs[-1].url = cds.last_search_url()
self.collection_logs[-1].error = error
continue
tu = (cds.last_search_url(), len(ids))
self.search_urls.append(tu)
self.collection_logs[-1].url = cds.last_search_url()
self.collection_logs[-1].found = len(ids)
if not ids:
continue
......@@ -838,7 +969,6 @@ class PublicationsTool(object):
li = self.marc12.process(xml)
for record in li:
self.nfound += 1
if self.dbg:
print "record decoded"
......@@ -846,9 +976,11 @@ class PublicationsTool(object):
oai_url = record.oai_url()
self.logs.append(Msg(url=oai_url))
self.logs[-1].title = record.title()
self.logs[-1].collection = self.collection_logs[-1].title
self.logs[-1].year = record.year()
if not oai_url:
self.logs[-1].reject(MSG_NO_OAI % record.id())
self.logs[-1].reject(MSG_NO_OAI)
continue
if not self.select_record(record):
......@@ -858,7 +990,6 @@ class PublicationsTool(object):
print "load record in the database", self.logs[-1].title
i = self.load_db(record)
self.ninsert += i
if self.dbg:
print self.logs[-1].txt
......@@ -869,21 +1000,21 @@ class PublicationsTool(object):
@rtype: dict
@return:
- C{harvester} (Storage):
- C{logs} (List) one L{Msg} for each publications
- C{nfound} (int) number of publications found
- C{ninsert} (int) number of publications insert in the database
- C(cfg_statStore) (str) configuration of the statStore JSON encoded
- C(cfg_logsStore) (str) configuration of the logsStore JSON encoded
- C{collection_logs) (list) one L{MsgCollection}) for each collection
- C{harvester} (Storage)
- C{logs) (list) one L{Msg} for each publication
- C{selector} (Selecor)
- C{urls} (list) urls use for the searches
"""
return dict(harvester=self.harvester,
return dict(cfg_statStore=to_statStore(self.collection_logs, self.logs),
cfg_logsStore=to_logsStore(self.logs),
collection_logs=self.collection_logs,
harvester=self.harvester,
logs=self.logs,
nfound=self.nfound,
ninsert=self.ninsert,
selector=self.selector,
urls=self.search_urls)
selector=self.selector)
class Articles(PublicationsTool):
......
<!--
The title of the report
Compute an unique identifier for each DIV associated to a grid.
-->
{{
my_style = """
.my-li,
.my-ol,
.my-p,
.my-p-smallcap {
font-family: Liberation Sherif, Sherif;
font-size: 11px;
}
.my-li,
.my-ol {
margin-left: 35px;
margin-bottom: 0px;
}
.my-ol {
list-style-type: decimal;
}
.my-p,
.my-p-smallcap {
margin-left: 20px;
}
.my-p-smallcap {
font-variant: small-caps;
margin-bottom: 1ex;
}
"""
response.write(BR())
response.write(STYLE(XML(my_style)))
def myli(txt, *args):
return LI(XML(txt), *args, _class="my-li")
def myol(*args):
return OL(*args, _class="my-ol")
def myp(txt):
return P(txt, _class="my-p")
def myp_smallcap(txt):
return P(txt, _class="my-p-smallcap")
#
# Header
#
txt = harvester.controller.title()
response.write(myp_smallcap(txt))
#
# Scanning condition and results
#
ul = UL()
if selector.year_start:
t1 = T("Scan %s") % selector.year_start