Commit a406fb2d authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Replace the method Automaton._search by the function search_synonym.

parent 2bbc6201
harvest_tools.automaton.search_synonym
======================================
.. currentmodule:: harvest_tools.automaton
.. autofunction:: search_synonym
\ No newline at end of file
......@@ -48,6 +48,7 @@ Helper functions
~base.family_name_fr
~base.format_author_fr
~base.learn_my_authors
~automaton.search_synonym
Logger
^^^^^^
......
......@@ -8,7 +8,7 @@ from base import (DRY_RUN,
format_author_fr,
ToolException)
from automaton import Automaton
from automaton import Automaton, search_synonym
from articles import Articles
from factory import build_harvester_tool, get_harvester_tool
from msg import Msg
......
......@@ -27,6 +27,61 @@ MSG_NO_OAI = "Reject no OAI identifier"
MSG_WELL_FORM_OAI = "Reject OAI is not well formed"
def search_synonym(table, fieldname, value, create=False):
"""Get the database identifier for the record having the database field
or the synonyms field matching the value.
Note:
The database table must have a field name *synonyms*.
It is a string containing values separated by a comma.
Args:
table (gluon.DAL.Table): database table.
fieldname (unicode): field of the database table
identified by its name.
value (unicode): value to be matched.
create(bool): create a new entry in the database table when
it is ``True``
Returns:
int:
* the id of the database record.
* UNDEF_ID if value is not defined.
Raises:
ToolException: when more than one synonym is found.
"""
if not value:
return UNDEF_ID
db = table._db
kwargs = {}
kwargs[fieldname] = value
id_rec = get_id(table, **kwargs)
if id_rec is not None:
return id_rec
# nothing found, have a look to the synonyms field
query = table.synonyms.contains(value)
setrows = db(query)
# no synonym found, create the entry
ncount = setrows.count()
if ncount == 0 and create:
return table.insert(**kwargs)
# one synonym found
elif ncount == 1:
return setrows.select(table.id).first().id
# more than one synonyms - don't know how to choose
else:
msg = MSG_TOOMANY_SYNONYM % table._tablename
raise ToolException(msg)
class Automaton(object):
"""Base class to search and process publications:
......@@ -213,62 +268,6 @@ class Automaton(object):
return True
def _search(self, tablename, fieldname, value, create=False):
"""Get the database identifier for the record having the database field
matching the value. The database field is defined by the argument
*tablename* and *fieldname*.
Note:
The search is also perform using the synonym field
when nothing is found.
Args:
tablename (unicode): name of the database table.
fieldname (unicode): name of the database field.
value (unicode): the search value.
create(bool): create a new entry in the database table when
it is ``True``
Returns:
int:
* the id of the database record.
* UNDEF_ID if value is not defined.
Raise:
ToolException: when more than one synonym is found.
"""
if not value:
return UNDEF_ID
db = self.db
table = db[tablename]
kwargs = {}
kwargs[fieldname] = value
id_rec = get_id(table, **kwargs)
if id_rec is not None:
return id_rec
# nothing found, have a look to the synonyms
query = table.synonyms.contains(value)
setrows = db(query)
# no synonym found, create the entry
ncount = setrows.count()
if ncount == 0 and create:
return table.insert(**kwargs)
# one synonym found
elif ncount == 1:
return setrows.select(table.id).first().id
# more than one synonyms - don't know how to choose
else:
msg = MSG_TOOMANY_SYNONYM % tablename
raise ToolException(msg)
def _search_parameters(self, collection):
"""Build the keywords to steer the URL search in invenio store.
The main parameter is the collection and the date range defined
......@@ -618,7 +617,7 @@ class Automaton(object):
def search_collaboration(self, value):
"""Get the database collaboration identifier using synonyms.
Create the collaboration, if it does not exist.
Create the collaboration, if it is well formed and does not exist.
Args:
value (unicode): the name of the collaboration.
......@@ -628,11 +627,15 @@ class Automaton(object):
* the id of the collaboration record.
* UNDEF_ID if value is not defined.
Raise:
Raises:
ToolException: when more than one synonym is found.
"""
return self._search("collaborations", "collaboration", value, True)
return search_synonym(self.db.collaborations,
"collaboration",
value,
True)
def search_country(self, value):
"""Get the database country identifier using synonyms.
......@@ -645,11 +648,11 @@ class Automaton(object):
* the id of the country record.
* UNDEF_ID if value is not defined.
Raise:
Raises:
ToolException: when more than one synonym is found.
"""
return self._search("countries", "country", value)
return search_synonym(self.db.countries, "country", value)
def search_publisher(self, value):
"""Get the database publisher identifier using synonyms.
......@@ -662,8 +665,8 @@ class Automaton(object):
* the id of the publisher record.
* UNDEF_ID if value is not defined.
Raise:
Raises:
ToolException: when more than one synonym is found.
"""
return self._search("publishers", "abbreviation", value)
return search_synonym(self.db.publishers, "abbreviation", value)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment