Commit 945dcead authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Modify the logic to deal with synonyms.

parent 28b2ec83
......@@ -318,8 +318,10 @@ class Articles(Automaton):
year = record.paper_year()
# get the collaboration / publisher identifiers
id_collaboration = self.search_collaboration(record.collaboration())
id_publisher = self.search_publisher(editor)
id_collaboration = \
get_id(db.collaborations, collaboration=record.collaboration())
id_publisher = get_id(db.publishers, abbreviation=editor)
# get already published articles or preprint
# A preprint is transform into an article.
......
......@@ -672,64 +672,3 @@ class Automaton(object):
return dict(collection_logs=self.collection_logs,
controller=self.controller,
logs=self.logs)
def search_collaboration(self, value):
"""Get the database collaboration identifier using synonyms.
Args:
value (unicode):
the name of the collaboration.
Returns:
int:
* the id of the collaboration record.
* UNDEF_ID if value is not defined.
Raises:
ToolException:
when more than one synonym is found or when the
collaboration is not defined.
"""
return search_synonym(self.db.collaborations, "collaboration", value)
def search_country(self, value):
"""Get the database country identifier using synonyms.
Args:
value (unicode):
the name of the country.
Returns:
int:
* the id of the country record.
* UNDEF_ID if value is not defined.
Raises:
ToolException:
when more than one synonym is found ot when
the country is not defined.
"""
return search_synonym(self.db.countries, "country", value)
def search_publisher(self, value):
"""Get the database publisher identifier using synonyms.
Args:
value (unicode):
the abbreviation of the publisher.
Returns:
int:
* the id of the publisher record.
* UNDEF_ID if value is not defined.
Raises:
ToolException:
when more than one synonym is found or when
the publisher is not defined.
"""
return search_synonym(self.db.publishers, "abbreviation", value)
......@@ -127,7 +127,8 @@ def search_synonym(table, fieldname, value, create=False):
Raises:
ToolException:
more than one synonym is found.
* no synonym found and not allow to create a new one.
* more than one synonym is found.
"""
if not value:
......
......@@ -20,7 +20,7 @@ from invenio_tools import (MSG_NO_CONF,
from invenio_tools.recordpubli import PAPER_REFERENCE_KEYS
from itertools import imap
from plugin_dbui import CLEAN_SPACES, get_id
from plugin_dbui import CLEAN_SPACES, get_id, UNDEF_ID
DECODE_ARXIV = re.compile(r"arXiv:(\d{2})(\d{2})\.")
......@@ -53,6 +53,9 @@ MSG_NO_OAI = "Reject no OAI identifier"
MSG_NO_REF = "Reject incomplete paper reference. Check "
MSG_TEMPORARY_RECORD = "Temporary record"
MSG_UNKNOWN_COLLABORATION = "Reject collaboration is unknown."
MSG_UNKNOWN_COUNTRY = "Reject country is unknown."
MSG_UNKNOWN_PUBLISHER = "Reject publisher is unknown."
MSG_WELL_FORMED_DATE = "Reject submission date is not well formed"
REG_CONF_DATES_1 = re.compile("(\d+) *-? *(\d+) *([A-Z][a-z]{2}) *(\d{4})")
......@@ -342,8 +345,7 @@ class CheckAndFix(object):
raise CheckException(MSG_NO_AUTHOR)
def collaboration(self, record):
"""Check the collaboration.
Have a look to the synonyms when the collaboration is not well formed.
"""Check synonyms for collaboration by using by the proper value.
Args:
record (RecordPubli):
......@@ -351,8 +353,9 @@ class CheckAndFix(object):
Raises:
CheckException:
when the collaboration value is defined
nor entered as a synonym.
* the collaboration is unknown
(neither collaboration nor synonym)
* more than one synonym found.
"""
if self.dbg:
......@@ -363,12 +366,41 @@ class CheckAndFix(object):
return
try:
search_synonym(self.db.collaborations, "collaboration", val)
db = self.db
dbid = search_synonym(db.collaborations, "collaboration", val)
if dbid == UNDEF_ID:
raise ToolException(MSG_UNKNOWN_COLLABORATION)
collaboration = db.collaborations[dbid].collaboration
if collaboration != val:
# one collaboration
if isinstance(record[u"corporate_name"], dict):
record[u"corporate_name"][u"collaboration"] = collaboration
# several collaboration
# replace the list of dictionary by a single one
else:
record[u"corporate_name"] = \
{u"collaboration": collaboration}
except ToolException as e:
raise CheckException(*e.args)
def country(self, record):
"""Check synonyms for conference country by using by the proper value.
Args:
record (RecordPubli):
record describing a publication.
Raises:
CheckException:
* the country is unknown (neither country nor synonym)
* more than one synonym found.
"""
"""Check conference country.
Have a look to the synonyms when the country does not exist.
......@@ -390,7 +422,28 @@ class CheckAndFix(object):
val = record.conference_country()
try:
search_synonym(self.db.countries, "country", val)
db = self.db
dbid = search_synonym(db.countries, "country", val)
if dbid == UNDEF_ID:
raise ToolException(MSG_UNKNOWN_COUNTRY)
country = db.countries[dbid].country
if country != val:
obj = record[u"meeting_name"]
if isinstance(obj, dict):
location = obj[u"location"].replace(val, country)
record[u"meeting_name"][u"location"] = location
else:
for di in obj:
if u"location" in di:
di[u"location"] = \
di[u"location"].replace(val, country)
record[u"meeting_name"] = obj
except ToolException as e:
raise CheckException(*e.args)
......@@ -822,8 +875,7 @@ class CheckAndFix(object):
raise ToolException(MSG_NO_REF + "[year]")
def publisher(self, record):
"""Check publisher.
Have a look to the synonyms when the publisher does not exist.
"""Check synonyms for publisher by replacing by the abbreviation value.
Args:
record (RecordPubli):
......@@ -831,7 +883,8 @@ class CheckAndFix(object):
Raises:
CheckException:
the publisher is not defined nor entered as a synonym.
* the publisher is unknown (neither abbreviation nor synonym)
* more than one synonym found.
"""
if self.dbg:
......@@ -841,11 +894,18 @@ class CheckAndFix(object):
if len(val) == 0:
return
# convert ToolException to CheckExcpetion
try:
db = self.db
search_synonym(db.publishers, "abbreviation", val)
dbid = search_synonym(db.publishers, "abbreviation", val)
if dbid == UNDEF_ID:
raise ToolException(MSG_UNKNOWN_PUBLISHER)
abbreviation = db.publishers[dbid].abbreviation
if abbreviation != val:
record[u"publication_info"].loc[0, "title"] = abbreviation
# convert ToolException to CheckExcpetion
except ToolException as e:
raise CheckException(*e.args)
......
......@@ -8,7 +8,7 @@ from .automaton import Automaton
from .base import MSG_CRASH, MSG_LOAD
from .checkandfix import CheckException
from invenio_tools import RecordConf, RecordThesis
from plugin_dbui import UNDEF_ID
from plugin_dbui import get_id, UNDEF_ID
MSG_PREPRINT_IS_PAPER = "Reject preprint is a published paper"
......@@ -86,6 +86,8 @@ class Preprints(Automaton):
zero otherwise.
"""
db = self.db
# alias
first_author = record.first_author()
oai_url = record.oai_url()
......@@ -95,7 +97,8 @@ class Preprints(Automaton):
year = submitted[0:4]
# get the collaboration identifier
id_collaboration = self.search_collaboration(record.collaboration())
id_collaboration = \
get_id(db.collaborations, collaboration=record.collaboration())
# get existing preprint or article
fields = dict(first_author=first_author,
......
......@@ -7,7 +7,7 @@ import traceback
from .automaton import Automaton
from .base import MSG_CRASH, MSG_LOAD
from .checkandfix import CheckException
from plugin_dbui import UNDEF_ID
from plugin_dbui import get_id, UNDEF_ID
class Proceedings(Automaton):
......@@ -71,6 +71,8 @@ class Proceedings(Automaton):
zero otherwise.
"""
db = self.db
# alias
oai_url = record.oai_url()
year = record.paper_year()
......@@ -94,11 +96,13 @@ class Proceedings(Automaton):
conference_dates = record.conference_dates()
conference_title = record.conference_title()
first_author = record.first_author()
id_country = self.search_country(record.conference_country())
id_country = get_id(db.countries, country=record.conference_country())
# get the collaboration/publisher identifiers
id_collaboration = self.search_collaboration(record.collaboration())
id_publisher = self.search_publisher(editor)
id_collaboration = \
get_id(db.collaborations, collaboration=record.collaboration())
id_publisher = get_id(db.publishers, abbreviation=editor)
# get an already published proceeding
fields = dict(authors=authors,
......
......@@ -90,7 +90,8 @@ class Reports(Automaton):
id_status = get_id(db.status, code=UNKNOWN)
# get the collaboration identifier
id_collaboration = self.search_collaboration(record.collaboration())
id_collaboration = \
get_id(db.collaborations, collaboration=record.collaboration())
# get an already published reports
fields = dict(id_categories=self.id_category,
......
......@@ -4,10 +4,17 @@
import traceback
<<<<<<< HEAD
from .automaton import Automaton
from .base import MSG_CRASH, MSG_LOAD
from .checkandfix import CheckException
from plugin_dbui import UNDEF_ID
=======
from automaton import Automaton
from base import MSG_CRASH, MSG_LOAD
from checkandfix import CheckException
from plugin_dbui import get_id, UNDEF_ID
>>>>>>> Modify the logic to deal with synonyms.
class Talks(Automaton):
......@@ -67,6 +74,8 @@ class Talks(Automaton):
zero otherwise.
"""
db = self.db
# alias
oai_url = record.oai_url()
year = record.conference_year()
......@@ -75,12 +84,13 @@ class Talks(Automaton):
conference_dates = record.conference_dates()
conference_title = record.conference_title()
first_author = record.first_author()
id_country = self.search_country(record.conference_country())
id_country = get_id(db.countries, country=record.conference_country())
submitted = record.submitted()
title = record.title()
# get the collaboration identifier
id_collaboration = self.search_collaboration(record.collaboration())
id_collaboration = \
get_id(db.collaborations, collaboration=record.collaboration())
# get an already published talk
fields = dict(conference_title=conference_title,
......
......@@ -28,6 +28,9 @@ from harvest_tools.checkandfix import (
MSG_NO_OAI,
MSG_NO_REF,
MSG_TEMPORARY_RECORD,
MSG_UNKNOWN_COLLABORATION,
MSG_UNKNOWN_COUNTRY,
MSG_UNKNOWN_PUBLISHER,
MSG_WELL_FORMED_DATE)
from harvest_tools.preprints import (
......@@ -82,6 +85,9 @@ def messages():
T(MSG_PREPRINT_NO_NUMBER),
T(MSG_REPORT_NO_NUMBER),
T(MSG_TEMPORARY_RECORD),
T(MSG_UNKNOWN_COLLABORATION),
T(MSG_UNKNOWN_COUNTRY),
T(MSG_UNKNOWN_PUBLISHER),
T(MSG_WELL_FORMED_COLLABORATION),
T(MSG_WELL_FORMED_DATE)}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment