Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit 945dcead authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Modify the logic to deal with synonyms.

parent 28b2ec83
......@@ -318,8 +318,10 @@ class Articles(Automaton):
year = record.paper_year()
# get the collaboration / publisher identifiers
id_collaboration = self.search_collaboration(record.collaboration())
id_publisher = self.search_publisher(editor)
id_collaboration = \
get_id(db.collaborations, collaboration=record.collaboration())
id_publisher = get_id(db.publishers, abbreviation=editor)
# get already published articles or preprint
# A preprint is transform into an article.
......
......@@ -672,64 +672,3 @@ class Automaton(object):
return dict(collection_logs=self.collection_logs,
controller=self.controller,
logs=self.logs)
def search_collaboration(self, value):
"""Get the database collaboration identifier using synonyms.
Args:
value (unicode):
the name of the collaboration.
Returns:
int:
* the id of the collaboration record.
* UNDEF_ID if value is not defined.
Raises:
ToolException:
when more than one synonym is found or when the
collaboration is not defined.
"""
return search_synonym(self.db.collaborations, "collaboration", value)
def search_country(self, value):
"""Get the database country identifier using synonyms.
Args:
value (unicode):
the name of the country.
Returns:
int:
* the id of the country record.
* UNDEF_ID if value is not defined.
Raises:
ToolException:
when more than one synonym is found ot when
the country is not defined.
"""
return search_synonym(self.db.countries, "country", value)
def search_publisher(self, value):
"""Get the database publisher identifier using synonyms.
Args:
value (unicode):
the abbreviation of the publisher.
Returns:
int:
* the id of the publisher record.
* UNDEF_ID if value is not defined.
Raises:
ToolException:
when more than one synonym is found or when
the publisher is not defined.
"""
return search_synonym(self.db.publishers, "abbreviation", value)
......@@ -127,7 +127,8 @@ def search_synonym(table, fieldname, value, create=False):
Raises:
ToolException:
more than one synonym is found.
* no synonym found and not allow to create a new one.
* more than one synonym is found.
"""
if not value:
......
......@@ -20,7 +20,7 @@ from invenio_tools import (MSG_NO_CONF,
from invenio_tools.recordpubli import PAPER_REFERENCE_KEYS
from itertools import imap
from plugin_dbui import CLEAN_SPACES, get_id
from plugin_dbui import CLEAN_SPACES, get_id, UNDEF_ID
DECODE_ARXIV = re.compile(r"arXiv:(\d{2})(\d{2})\.")
......@@ -53,6 +53,9 @@ MSG_NO_OAI = "Reject no OAI identifier"
MSG_NO_REF = "Reject incomplete paper reference. Check "
MSG_TEMPORARY_RECORD = "Temporary record"
MSG_UNKNOWN_COLLABORATION = "Reject collaboration is unknown."
MSG_UNKNOWN_COUNTRY = "Reject country is unknown."
MSG_UNKNOWN_PUBLISHER = "Reject publisher is unknown."
MSG_WELL_FORMED_DATE = "Reject submission date is not well formed"
REG_CONF_DATES_1 = re.compile("(\d+) *-? *(\d+) *([A-Z][a-z]{2}) *(\d{4})")
......@@ -342,8 +345,7 @@ class CheckAndFix(object):
raise CheckException(MSG_NO_AUTHOR)
def collaboration(self, record):
"""Check the collaboration.
Have a look to the synonyms when the collaboration is not well formed.
"""Check synonyms for collaboration by using by the proper value.
Args:
record (RecordPubli):
......@@ -351,8 +353,9 @@ class CheckAndFix(object):
Raises:
CheckException:
when the collaboration value is defined
nor entered as a synonym.
* the collaboration is unknown
(neither collaboration nor synonym)
* more than one synonym found.
"""
if self.dbg:
......@@ -363,12 +366,41 @@ class CheckAndFix(object):
return
try:
search_synonym(self.db.collaborations, "collaboration", val)
db = self.db
dbid = search_synonym(db.collaborations, "collaboration", val)
if dbid == UNDEF_ID:
raise ToolException(MSG_UNKNOWN_COLLABORATION)
collaboration = db.collaborations[dbid].collaboration
if collaboration != val:
# one collaboration
if isinstance(record[u"corporate_name"], dict):
record[u"corporate_name"][u"collaboration"] = collaboration
# several collaboration
# replace the list of dictionary by a single one
else:
record[u"corporate_name"] = \
{u"collaboration": collaboration}
except ToolException as e:
raise CheckException(*e.args)
def country(self, record):
"""Check synonyms for conference country by using by the proper value.
Args:
record (RecordPubli):
record describing a publication.
Raises:
CheckException:
* the country is unknown (neither country nor synonym)
* more than one synonym found.
"""
"""Check conference country.
Have a look to the synonyms when the country does not exist.
......@@ -390,7 +422,28 @@ class CheckAndFix(object):
val = record.conference_country()
try:
search_synonym(self.db.countries, "country", val)
db = self.db
dbid = search_synonym(db.countries, "country", val)
if dbid == UNDEF_ID:
raise ToolException(MSG_UNKNOWN_COUNTRY)
country = db.countries[dbid].country
if country != val:
obj = record[u"meeting_name"]
if isinstance(obj, dict):
location = obj[u"location"].replace(val, country)
record[u"meeting_name"][u"location"] = location
else:
for di in obj:
if u"location" in di:
di[u"location"] = \
di[u"location"].replace(val, country)
record[u"meeting_name"] = obj
except ToolException as e:
raise CheckException(*e.args)
......@@ -822,8 +875,7 @@ class CheckAndFix(object):
raise ToolException(MSG_NO_REF + "[year]")
def publisher(self, record):
"""Check publisher.
Have a look to the synonyms when the publisher does not exist.
"""Check synonyms for publisher by replacing by the abbreviation value.
Args:
record (RecordPubli):
......@@ -831,7 +883,8 @@ class CheckAndFix(object):
Raises:
CheckException:
the publisher is not defined nor entered as a synonym.
* the publisher is unknown (neither abbreviation nor synonym)
* more than one synonym found.
"""
if self.dbg:
......@@ -841,11 +894,18 @@ class CheckAndFix(object):
if len(val) == 0:
return
# convert ToolException to CheckExcpetion
try:
db = self.db
search_synonym(db.publishers, "abbreviation", val)
dbid = search_synonym(db.publishers, "abbreviation", val)
if dbid == UNDEF_ID:
raise ToolException(MSG_UNKNOWN_PUBLISHER)
abbreviation = db.publishers[dbid].abbreviation
if abbreviation != val:
record[u"publication_info"].loc[0, "title"] = abbreviation
# convert ToolException to CheckExcpetion
except ToolException as e:
raise CheckException(*e.args)
......
......@@ -8,7 +8,7 @@ from .automaton import Automaton
from .base import MSG_CRASH, MSG_LOAD
from .checkandfix import CheckException
from invenio_tools import RecordConf, RecordThesis
from plugin_dbui import UNDEF_ID
from plugin_dbui import get_id, UNDEF_ID
MSG_PREPRINT_IS_PAPER = "Reject preprint is a published paper"
......@@ -86,6 +86,8 @@ class Preprints(Automaton):
zero otherwise.
"""
db = self.db
# alias
first_author = record.first_author()
oai_url = record.oai_url()
......@@ -95,7 +97,8 @@ class Preprints(Automaton):
year = submitted[0:4]
# get the collaboration identifier
id_collaboration = self.search_collaboration(record.collaboration())
id_collaboration = \
get_id(db.collaborations, collaboration=record.collaboration())
# get existing preprint or article
fields = dict(first_author=first_author,
......
......@@ -7,7 +7,7 @@ import traceback
from .automaton import Automaton
from .base import MSG_CRASH, MSG_LOAD
from .checkandfix import CheckException
from plugin_dbui import UNDEF_ID
from plugin_dbui import get_id, UNDEF_ID
class Proceedings(Automaton):
......@@ -71,6 +71,8 @@ class Proceedings(Automaton):
zero otherwise.
"""
db = self.db
# alias
oai_url = record.oai_url()
year = record.paper_year()
......@@ -94,11 +96,13 @@ class Proceedings(Automaton):
conference_dates = record.conference_dates()
conference_title = record.conference_title()
first_author = record.first_author()
id_country = self.search_country(record.conference_country())
id_country = get_id(db.countries, country=record.conference_country())
# get the collaboration/publisher identifiers
id_collaboration = self.search_collaboration(record.collaboration())
id_publisher = self.search_publisher(editor)
id_collaboration = \
get_id(db.collaborations, collaboration=record.collaboration())
id_publisher = get_id(db.publishers, abbreviation=editor)
# get an already published proceeding
fields = dict(authors=authors,
......
......@@ -90,7 +90,8 @@ class Reports(Automaton):
id_status = get_id(db.status, code=UNKNOWN)
# get the collaboration identifier
id_collaboration = self.search_collaboration(record.collaboration())
id_collaboration = \
get_id(db.collaborations, collaboration=record.collaboration())
# get an already published reports
fields = dict(id_categories=self.id_category,
......
......@@ -4,10 +4,17 @@
import traceback
<<<<<<< HEAD
from .automaton import Automaton
from .base import MSG_CRASH, MSG_LOAD
from .checkandfix import CheckException
from plugin_dbui import UNDEF_ID
=======
from automaton import Automaton
from base import MSG_CRASH, MSG_LOAD
from checkandfix import CheckException
from plugin_dbui import get_id, UNDEF_ID
>>>>>>> Modify the logic to deal with synonyms.
class Talks(Automaton):
......@@ -67,6 +74,8 @@ class Talks(Automaton):
zero otherwise.
"""
db = self.db
# alias
oai_url = record.oai_url()
year = record.conference_year()
......@@ -75,12 +84,13 @@ class Talks(Automaton):
conference_dates = record.conference_dates()
conference_title = record.conference_title()
first_author = record.first_author()
id_country = self.search_country(record.conference_country())
id_country = get_id(db.countries, country=record.conference_country())
submitted = record.submitted()
title = record.title()
# get the collaboration identifier
id_collaboration = self.search_collaboration(record.collaboration())
id_collaboration = \
get_id(db.collaborations, collaboration=record.collaboration())
# get an already published talk
fields = dict(conference_title=conference_title,
......
......@@ -28,6 +28,9 @@ from harvest_tools.checkandfix import (
MSG_NO_OAI,
MSG_NO_REF,
MSG_TEMPORARY_RECORD,
MSG_UNKNOWN_COLLABORATION,
MSG_UNKNOWN_COUNTRY,
MSG_UNKNOWN_PUBLISHER,
MSG_WELL_FORMED_DATE)
from harvest_tools.preprints import (
......@@ -82,6 +85,9 @@ def messages():
T(MSG_PREPRINT_NO_NUMBER),
T(MSG_REPORT_NO_NUMBER),
T(MSG_TEMPORARY_RECORD),
T(MSG_UNKNOWN_COLLABORATION),
T(MSG_UNKNOWN_COUNTRY),
T(MSG_UNKNOWN_PUBLISHER),
T(MSG_WELL_FORMED_COLLABORATION),
T(MSG_WELL_FORMED_DATE)}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment