Commit a406fb2d authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Replace the method Automaton._search by the function search_synonym.

parent 2bbc6201
harvest_tools.automaton.search_synonym
======================================
.. currentmodule:: harvest_tools.automaton
.. autofunction:: search_synonym
\ No newline at end of file
...@@ -48,6 +48,7 @@ Helper functions ...@@ -48,6 +48,7 @@ Helper functions
~base.family_name_fr ~base.family_name_fr
~base.format_author_fr ~base.format_author_fr
~base.learn_my_authors ~base.learn_my_authors
~automaton.search_synonym
Logger Logger
^^^^^^ ^^^^^^
......
...@@ -8,7 +8,7 @@ from base import (DRY_RUN, ...@@ -8,7 +8,7 @@ from base import (DRY_RUN,
format_author_fr, format_author_fr,
ToolException) ToolException)
from automaton import Automaton from automaton import Automaton, search_synonym
from articles import Articles from articles import Articles
from factory import build_harvester_tool, get_harvester_tool from factory import build_harvester_tool, get_harvester_tool
from msg import Msg from msg import Msg
......
...@@ -27,6 +27,61 @@ MSG_NO_OAI = "Reject no OAI identifier" ...@@ -27,6 +27,61 @@ MSG_NO_OAI = "Reject no OAI identifier"
MSG_WELL_FORM_OAI = "Reject OAI is not well formed" MSG_WELL_FORM_OAI = "Reject OAI is not well formed"
def search_synonym(table, fieldname, value, create=False):
"""Get the database identifier for the record having the database field
or the synonyms field matching the value.
Note:
The database table must have a field name *synonyms*.
It is a string containing values separated by a comma.
Args:
table (gluon.DAL.Table): database table.
fieldname (unicode): field of the database table
identified by its name.
value (unicode): value to be matched.
create(bool): create a new entry in the database table when
it is ``True``
Returns:
int:
* the id of the database record.
* UNDEF_ID if value is not defined.
Raises:
ToolException: when more than one synonym is found.
"""
if not value:
return UNDEF_ID
db = table._db
kwargs = {}
kwargs[fieldname] = value
id_rec = get_id(table, **kwargs)
if id_rec is not None:
return id_rec
# nothing found, have a look to the synonyms field
query = table.synonyms.contains(value)
setrows = db(query)
# no synonym found, create the entry
ncount = setrows.count()
if ncount == 0 and create:
return table.insert(**kwargs)
# one synonym found
elif ncount == 1:
return setrows.select(table.id).first().id
# more than one synonyms - don't know how to choose
else:
msg = MSG_TOOMANY_SYNONYM % table._tablename
raise ToolException(msg)
class Automaton(object): class Automaton(object):
"""Base class to search and process publications: """Base class to search and process publications:
...@@ -213,62 +268,6 @@ class Automaton(object): ...@@ -213,62 +268,6 @@ class Automaton(object):
return True return True
def _search(self, tablename, fieldname, value, create=False):
"""Get the database identifier for the record having the database field
matching the value. The database field is defined by the argument
*tablename* and *fieldname*.
Note:
The search is also perform using the synonym field
when nothing is found.
Args:
tablename (unicode): name of the database table.
fieldname (unicode): name of the database field.
value (unicode): the search value.
create(bool): create a new entry in the database table when
it is ``True``
Returns:
int:
* the id of the database record.
* UNDEF_ID if value is not defined.
Raise:
ToolException: when more than one synonym is found.
"""
if not value:
return UNDEF_ID
db = self.db
table = db[tablename]
kwargs = {}
kwargs[fieldname] = value
id_rec = get_id(table, **kwargs)
if id_rec is not None:
return id_rec
# nothing found, have a look to the synonyms
query = table.synonyms.contains(value)
setrows = db(query)
# no synonym found, create the entry
ncount = setrows.count()
if ncount == 0 and create:
return table.insert(**kwargs)
# one synonym found
elif ncount == 1:
return setrows.select(table.id).first().id
# more than one synonyms - don't know how to choose
else:
msg = MSG_TOOMANY_SYNONYM % tablename
raise ToolException(msg)
def _search_parameters(self, collection): def _search_parameters(self, collection):
"""Build the keywords to steer the URL search in invenio store. """Build the keywords to steer the URL search in invenio store.
The main parameter is the collection and the date range defined The main parameter is the collection and the date range defined
...@@ -618,7 +617,7 @@ class Automaton(object): ...@@ -618,7 +617,7 @@ class Automaton(object):
def search_collaboration(self, value): def search_collaboration(self, value):
"""Get the database collaboration identifier using synonyms. """Get the database collaboration identifier using synonyms.
Create the collaboration, if it does not exist. Create the collaboration, if it is well formed and does not exist.
Args: Args:
value (unicode): the name of the collaboration. value (unicode): the name of the collaboration.
...@@ -628,11 +627,15 @@ class Automaton(object): ...@@ -628,11 +627,15 @@ class Automaton(object):
* the id of the collaboration record. * the id of the collaboration record.
* UNDEF_ID if value is not defined. * UNDEF_ID if value is not defined.
Raise: Raises:
ToolException: when more than one synonym is found. ToolException: when more than one synonym is found.
""" """
return self._search("collaborations", "collaboration", value, True)
return search_synonym(self.db.collaborations,
"collaboration",
value,
True)
def search_country(self, value): def search_country(self, value):
"""Get the database country identifier using synonyms. """Get the database country identifier using synonyms.
...@@ -645,11 +648,11 @@ class Automaton(object): ...@@ -645,11 +648,11 @@ class Automaton(object):
* the id of the country record. * the id of the country record.
* UNDEF_ID if value is not defined. * UNDEF_ID if value is not defined.
Raise: Raises:
ToolException: when more than one synonym is found. ToolException: when more than one synonym is found.
""" """
return self._search("countries", "country", value) return search_synonym(self.db.countries, "country", value)
def search_publisher(self, value): def search_publisher(self, value):
"""Get the database publisher identifier using synonyms. """Get the database publisher identifier using synonyms.
...@@ -662,8 +665,8 @@ class Automaton(object): ...@@ -662,8 +665,8 @@ class Automaton(object):
* the id of the publisher record. * the id of the publisher record.
* UNDEF_ID if value is not defined. * UNDEF_ID if value is not defined.
Raise: Raises:
ToolException: when more than one synonym is found. ToolException: when more than one synonym is found.
""" """
return self._search("publishers", "abbreviation", value) return search_synonym(self.db.publishers, "abbreviation", value)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment