Commit a5415edd authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Merge branch '36-list-widget-synonym' into 'master'

36 use list widget for synonym

* Move the class `CheckAndFix` from the package `invenio_tools` to `harvest_tools`.
* All tools use the function `search_synonym`.
* Add to the script `run` the subcommands `loop`, `mysql` and `dump`.
* Close #36

See merge request !32
parents b220456c 344482e0
......@@ -7,14 +7,14 @@ import traceback
from gluon import current
from gluon.restricted import RestrictedError
from harvest_tools import (build_harvester_tool,
CheckAndFix,
CheckException,
DRY_RUN,
format_author_fr,
family_name_fr,
search_synonym,
ToolException)
from invenio_tools import (CheckAndFix,
CheckException,
load_record,
from invenio_tools import (load_record,
OAI_URL,
RecordConf,
RecordThesis)
......
......@@ -429,7 +429,8 @@ Au court d'un moissonnage, l'automate rejette une publication si la
*collaboration*, le *pays* ou la *revue* ne correspond pas à une valeur
existante dans la base de donnée.
Ces informations sont sujettes à beaucoup de variations : ::
Ces informations sont sujettes à beaucoup de variations. Par example, pour
la collaboration LHCb, c'est différentes forms sont utilisées : ::
LHCb Collaboration, LHCb col., LHCb c., LHCb, ...
......@@ -449,6 +450,36 @@ La logique est la suivante :
``collaborations`` en tant que synonyme par exemple,
puis relancer le moissonneur.
La :numref:`fig-ui-synonym` montre le fomulaire qui permet d'ajouter ou
modifier une ``collaboration``. Il permet aussi de définir la liste
des synonyme associée à la collaboration.
Pour ce faire, il suffit d'entrer une valeur par ligne puis d'actualiser.
Pour modifier un synonyme, cliquer dessus et changer la valeur puis actualiser.
.. _fig-ui-synonym:
.. figure:: images/ui-synonym.png
:align: center
:width: 70%
Le formulaire pour définir les synonyms d'une collaboration.
Le nombre de ligne peut être insuffisant. On peut aussi avoir envie de modifier
l'ordre des synonymes dans la liste ou de détruire une valeur.
Pour ce faire, il faut utiliser le menu contextuelle montré sur la
:numref:`fig-ui-synonym-contextmenu`. Un *click droit* sur un élement de
la liste le fait apparaître.
.. _fig-ui-synonym-contextmenu:
.. figure:: images/ui-synonym-contextmenu.png
:align: center
:width: 70%
Le menu contextuel pour ajouter / détruire des éléments dans la liste.
Les mêmes outils sont disponibles pour les ``pays`` et les ``revues``.
.. _arcanes:
Les arcanes du format MARC
......
......@@ -332,7 +332,7 @@
'List of collections separated by comma: LHCb Papers, LHCb Talks': 'Liste des collections separé par une virgule : LHCb Papers, LHCb Talks',
'List of directors separated by comma: J. Doe, P.-Y. Smith': 'Noms des directeurs de thèse separé par une virgule: J. Doe, P.-Y. Smith',
'List of sections separated by a comma: Articles, Proceedings.': 'Liste de sections separé par une virgule: Articles, Proceedings.',
'List of synonyms separated by comma.': 'Liste de synonymes séparés par une virgule.',
'List of synonyms, one entry per row.': 'Liste de synonymes, une valeur par ligne.',
'List of university separated by comma': 'Liste des universités séparé par une virgule',
'List shorted by AERES category, by project and by year': 'Liste triée par catégorie AERES par projet et par année',
'List shorted by usual category, by team and by year': 'Liste triée par catégorie usuelle par équipe et par année',
......@@ -480,7 +480,9 @@
'Reject': 'Rejeter',
'Reject article is not published': "Rejeté l'article n'est pas publié",
'Reject collaboration is not well formed': 'Rejeté la collaboration est mal formatté',
'Reject collaborations is not defined': "Rejeté la collaboration n'est pas définie",
'Reject conference dates is not well formed': 'Rejecté les dates de la conférence dates sont mal formatté',
'Reject countries is not defined': "Rejeté le pays n'est pas définie",
'Reject editor is not well formed': "Rejeté l'éditeur est mal formatté",
'Reject incomplete paper reference': 'Rejeté la référence du papier est incomplète',
'Reject invalid country': 'Rejeté pays inconnu',
......@@ -502,11 +504,15 @@
'Reject preprint is a conference': 'Rejeté ce preprint est une conférence',
'Reject preprint is a published paper': 'Rejeté ce preprint est un article publié',
'Reject preprint is a thesis': 'Rejeté ce preprint est une thèse',
'Reject publishers is not defined': "Rejeté la revue n'est pas définie",
'Reject submission date is not well formed': "Rejeté la date de soumission n'est pas correcte",
'Reject the talk match a proceeding': 'Rejeté cette présentation correspond à un actes de conférence',
'Reject to many first author': 'Rejeté trop de premier autheur',
'Reject to many submit date': 'Rejeté plusieurs date de soumission',
'Reject to many year': 'Rejeté plusieurs année',
'Reject too many collaborations synonyms': 'Rejeté synonyme de collaboration défini plusieurs fois',
'Reject too many countries synonyms': 'Rejeté synonyme de pays défini plusieurs fois',
'Reject too many publishers synonyms': 'Rejeté synonyme de revue défini plusieurs fois',
'Reject XML is not well formed': "Rejeté la chaine XML n'est pas correcte",
'Rejected': 'Rejeté',
"Rejeté la chaine XML n'est pas correcte": "Rejeté la chaine XML n'est pas correcte",
......
......@@ -10,11 +10,11 @@ T("Collaboration(s) signing the publication: "
"CTA Consortium.")
tp_synonyms = \
T("List of synonyms separated by comma.")
T("List of synonyms, one entry per row.")
db.define_table("collaborations",
Field("collaboration", "string", length=255, comment=tp_collaboration, notnull=True, unique=True),
Field("synonyms", "text", comment=tp_synonyms),
Field("synonyms", "list:string", comment=tp_synonyms),
migrate="collaborations.table")
db.collaborations._before_delete.append(INHIBIT_CASCADE_DELETE)
......
......@@ -4,7 +4,7 @@
"""
db.define_table("countries",
Field("country", "string", length=255, notnull=True, unique=True),
Field("synonyms", "text", comment=tp_synonyms),
Field("synonyms", "list:string", comment=tp_synonyms),
migrate="countries.table")
db.countries._before_delete.append(INHIBIT_CASCADE_DELETE)
......
......@@ -5,7 +5,7 @@
db.define_table("publishers",
Field("publisher", "string", length=255, default="", label="Review"),
Field("abbreviation", "string", length=255, notnull=True, unique=True),
Field("synonyms", "text", comment=tp_synonyms),
Field("synonyms", "list:string", comment=tp_synonyms),
migrate="publishers.table")
db.publishers._before_delete.append(INHIBIT_CASCADE_DELETE)
......
......@@ -9,6 +9,7 @@
#-------------------------------------------------------------------------------
fieldsModifier = dbui.FieldsModifier('collaborations')
fieldsModifier.configure_field('collaboration', xtype='textarea')
fieldsModifier.configure_field('synonyms', minimumRows=5)
#-------------------------------------------------------------------------------
#
......
......@@ -13,6 +13,8 @@
# FORM CONFIGURATiON
#
#-------------------------------------------------------------------------------
fieldsModifier = dbui.FieldsModifier('countries')
fieldsModifier.configure_field('synonyms', minimumRows=5)
#-------------------------------------------------------------------------------
#
......
......@@ -13,6 +13,8 @@
# FORM CONFIGURATiON
#
#-------------------------------------------------------------------------------
fieldsModifier = dbui.FieldsModifier('publishers')
fieldsModifier.configure_field('synonyms', minimumRows=5)
#-------------------------------------------------------------------------------
#
......
......@@ -6,11 +6,12 @@ and to push them in the database.
from base import (DRY_RUN,
family_name_fr,
format_author_fr,
search_synonym,
ToolException)
search_synonym)
from automaton import Automaton
from articles import Articles
from checkandfix import CheckAndFix
from exception import CheckException, ToolException
from factory import build_harvester_tool, get_harvester_tool
from msg import Msg
from msgcollection import MsgCollection
......
......@@ -13,7 +13,7 @@ from base import (family_name_fr,
MSG_FIX_ORIGIN,
MSG_IN_DB,
MSG_LOAD)
from invenio_tools import CheckException
from checkandfix import CheckException
from plugin_dbui import get_id, UNDEF_ID
......
......@@ -10,9 +10,9 @@ from base import (MSG_FIX_ORIGIN,
MSG_IN_DB,
search_synonym,
ToolException)
from checkandfix import CheckAndFix
from gluon.storage import Storage
from invenio_tools import (CheckAndFix,
InvenioStore,
from invenio_tools import (InvenioStore,
Marc12,
OAI_URL)
from msg import Msg
......@@ -655,7 +655,6 @@ class Automaton(object):
def search_collaboration(self, value):
"""Get the database collaboration identifier using synonyms.
Create the collaboration, if it is well formed and does not exist.
Args:
value (unicode): the name of the collaboration.
......@@ -666,14 +665,12 @@ class Automaton(object):
* UNDEF_ID if value is not defined.
Raises:
ToolException: when more than one synonym is found.
ToolException: when more than one synonym is found or when the
collaboration is not defined.
"""
return search_synonym(self.db.collaborations,
"collaboration",
value,
True)
return search_synonym(self.db.collaborations, "collaboration", value)
def search_country(self, value):
"""Get the database country identifier using synonyms.
......
......@@ -2,6 +2,7 @@
""" harvest_tools.base
"""
from exception import ToolException
from invenio_tools import REG_AUTHOR
from plugin_dbui import get_id, UNDEF_ID
......@@ -12,8 +13,8 @@ MSG_CRASH = "Crash: %s"
MSG_FIX_ORIGIN = "Fixed the origin field"
MSG_IN_DB = "Already in the database"
MSG_LOAD = "Load in the database"
MSG_NO_ENTRY = "Reject %s is not defined."
MSG_TOOMANY_SYNONYM = "Reject too many %s synonyms."
MSG_NO_ENTRY = "Reject %s is not defined"
MSG_TOOMANY_SYNONYM = "Reject too many %s synonyms"
def family_name_fr(full_name):
......@@ -156,7 +157,7 @@ def search_synonym(table, fieldname, value, create=False):
Note:
The database table must have a field name *synonyms*.
It is a string containing values separated by a comma.
It contains a list of strings.
Args:
table (gluon.DAL.Table): database table.
fieldname (unicode): field of the database table
......@@ -203,11 +204,7 @@ def search_synonym(table, fieldname, value, create=False):
elif ncount == 1:
return setrows.select(table.id).first().id
# more than one synonyms - don't know how to choose
# more than one synonyms - don't know what to choose
else:
msg = MSG_TOOMANY_SYNONYM % table._tablename
raise ToolException(msg)
class ToolException(Exception):
pass
# -*- coding: utf-8 -*-
""" invenio_tools.checkandfix
""" harvest_tools.checkandfix
"""
import re
import regex
from base import (MSG_NO_CONF,
MSG_NO_COUNTRY,
MSG_NO_PUBLISHER,
MSG_WELL_FORMED_COLLABORATION,
MSG_NO_THESIS,
OAI_URL,
REG_AUTHOR,
REG_OAI,
REG_YEAR)
from base import search_synonym
from exception import CheckException
from invenio_tools import (load_record,
MSG_NO_CONF,
MSG_NO_THESIS,
OAI_URL,
RecordConf,
RecordThesis,
REG_AUTHOR,
REG_OAI,
REG_YEAR)
from filters import CLEAN_REVIEW
from gluon import current
from inveniostore import InvenioStore
from marc12 import Marc12
from plugin_dbui import get_id
from recordconf import RecordConf
from recordthesis import RecordThesis
DECODE_ARXIV = re.compile(r"arXiv:(\d{2})(\d{2})\.")
......@@ -88,25 +85,6 @@ REG_SUBMITTED = re.compile(regex.REG_SUBMITTED)
UNIVERSITY = "University"
def load_record(host, record_id):
"""Helper function to load a single record from an invenio store.
Args:
host (unicode): host of the store.
Possible values are ``cds.cern.ch`` or ``inspirehep.net``.
record_id (integer): the record identifier in the store
Returns:
Record: the decoded record.
"""
store = InvenioStore(host)
xml = store.get_record(record_id)
svc = Marc12()
return svc(xml)[0]
class CheckAndFix(object):
"""A collection of tools to check and repair the content
of the Marc12 record.
......@@ -378,21 +356,16 @@ class CheckAndFix(object):
record (RecordPubli): record describing a publication.
Raises:
CheckException: when the collaboration value is not well formed
and not entered as a synonym.
ToolException: when the collaboration value is defined
nor entered as a synonym.
"""
val = record.collaboration()
if not val:
return
if REG_COLLABORATION.match(val):
return
if self._is_synonym("collaborations", val):
return
raise CheckException(MSG_WELL_FORMED_COLLABORATION)
db = self.db
search_synonym(db.collaborations, "collaboration", val)
def country(self, record):
"""Check conference country.
......@@ -402,8 +375,8 @@ class CheckAndFix(object):
record (RecordConf): record describing a talk or a proceeding.
Raises:
CheckException: when the country is not defined
and not entered as a synonym.
ToolException: when the country is not defined
nor entered as a synonym.
"""
if not isinstance(record, RecordConf):
......@@ -411,15 +384,7 @@ class CheckAndFix(object):
db = self.db
val = record.conference_country()
id_country = get_id(db.countries, country=val)
if id_country:
return
if self._is_synonym("countries", val):
return
raise CheckException(MSG_NO_COUNTRY)
search_synonym(db.countries, "country", val)
def conference_date(self, record):
"""Check conference date.
......@@ -794,8 +759,8 @@ class CheckAndFix(object):
record (RecordPubli): record describing a publication.
Raises:
CheckException: when the publisher is not defined
and not entered as a synonym.
ToolException: when the publisher is not defined
nor entered as a synonym.
"""
db = self.db
......@@ -807,14 +772,7 @@ class CheckAndFix(object):
if isinstance(val, list):
val = val[0]
id_publisher = get_id(db.publishers, abbreviation=val)
if id_publisher:
return
if self._is_synonym("publishers", val):
return
raise CheckException(MSG_NO_PUBLISHER)
search_synonym(db.publishers, "abbreviation", val)
def recover_oai(self, record, host):
"""Recover the OAI identifier when it is not defined
......
# -*- coding: utf-8 -*-
""" harvest_tools.exception
"""
from invenio_tools import ExceptionUTF8
class CheckException(ExceptionUTF8):
pass
class ToolException(ExceptionUTF8):
pass
......@@ -5,12 +5,14 @@
import json
from base import MSG_NO_ENTRY, MSG_TOOMANY_SYNONYM
from gluon import current
from gluon.storage import Storage
from invenio_tools import (MSG_NO_COUNTRY,
MSG_NO_PUBLISHER,
MSG_WELL_FORMED_COLLABORATION,
OAI_URL)
from invenio_tools import OAI_URL
MSGS = (MSG_NO_ENTRY, MSG_TOOMANY_SYNONYM)
TABLES = ("collaborations", "countries", "publishers")
class Msg(Storage):
......@@ -117,14 +119,15 @@ class Msg(Storage):
self.action = 'reject'
if record is not None:
if str(txt) == MSG_NO_COUNTRY:
self.synonym = record.country()
elif str(txt) == MSG_WELL_FORMED_COLLABORATION:
self.synonym = record.collaboration()
elif str(txt) == MSG_NO_PUBLISHER:
self.synonym = record.publisher()
for msg in MSGS:
for tablename in TABLES:
if str(txt) == msg % tablename:
if tablename == "collaborations":
self.synonym = record.collaboration()
elif tablename == "countries":
self.synonym = record.country()
elif tablename == "publishers":
self.synonym = record.publisher()
if year is None and record is not None:
year = record.year()
......
......@@ -7,7 +7,7 @@ import traceback
from automaton import Automaton
from base import family_name_fr, format_author_fr, MSG_CRASH, MSG_LOAD
from invenio_tools import CheckException
from checkandfix import CheckException
from plugin_dbui import UNDEF_ID
......
......@@ -7,7 +7,8 @@ import traceback
from automaton import Automaton
from base import family_name_fr, format_author_fr, MSG_CRASH, MSG_LOAD
from invenio_tools import CheckException, RecordConf, RecordThesis
from checkandfix import CheckException
from invenio_tools import RecordConf, RecordThesis
from plugin_dbui import UNDEF_ID
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment