Commit e98db28b authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Migrate check and fix method to RecordCdsConf and RecordHepConf

parent a0dd169b
...@@ -3,10 +3,12 @@ ...@@ -3,10 +3,12 @@
""" """
from .automaton import Automaton from .automaton import Automaton
from .base import (learn_my_authors, from .base import (learn_my_authors,
MSG_CRASH,
MSG_FIX_ORIGIN, MSG_FIX_ORIGIN,
MSG_IN_DB, MSG_IN_DB,
MSG_LOAD) MSG_LOAD)
from plugin_dbui import get_id, UNDEF_ID from plugin_dbui import get_id, UNDEF_ID
from store_tools import CheckException
MSG_NO_EDITOR = "Reject article is not published" MSG_NO_EDITOR = "Reject article is not published"
MSG_NOT_ARTICLE = "Reject publication is not and article" MSG_NOT_ARTICLE = "Reject publication is not and article"
...@@ -52,7 +54,7 @@ class Articles(Automaton): ...@@ -52,7 +54,7 @@ class Articles(Automaton):
can not be corrected. can not be corrected.
""" """
self.logger.debug(f"{T4}check record (article)") self.logger.debug(f"{T4}check and fix record (article)")
if record.subtype() == "article": if record.subtype() == "article":
self.logs[-1].reject(MSG_NOT_ARTICLE, record) self.logs[-1].reject(MSG_NOT_ARTICLE, record)
...@@ -74,10 +76,14 @@ class Articles(Automaton): ...@@ -74,10 +76,14 @@ class Articles(Automaton):
record.check_publisher(self.db) record.check_publisher(self.db)
record.check_paper_reference() record.check_paper_reference()
except Exception as e: except CheckException as e:
self.logs[-1].reject(e, record=record) self.logs[-1].reject(e, record=record)
return False return False
except Exception as e:
self.logs[-1].reject(MSG_CRASH % e, record=record, translate=False)
return False
return True return True
def get_record_by_fields(self, def get_record_by_fields(self,
......
...@@ -6,6 +6,8 @@ from .base import MSG_CRASH, MSG_LOAD ...@@ -6,6 +6,8 @@ from .base import MSG_CRASH, MSG_LOAD
from .checkandfix import CheckException from .checkandfix import CheckException
from plugin_dbui import get_id, UNDEF_ID from plugin_dbui import get_id, UNDEF_ID
MSG_NOT_PROCEEDING = "Reject publication is not a proceeding"
T4 = " "*4 T4 = " "*4
...@@ -13,6 +15,7 @@ class Proceedings(Automaton): ...@@ -13,6 +15,7 @@ class Proceedings(Automaton):
"""Automaton for conference proceedings. """Automaton for conference proceedings.
""" """
def check_record(self, record): def check_record(self, record):
"""Check the content of the proceeding in order to fix non conformities. """Check the content of the proceeding in order to fix non conformities.
...@@ -26,24 +29,29 @@ class Proceedings(Automaton): ...@@ -26,24 +29,29 @@ class Proceedings(Automaton):
corrected. corrected.
""" """
if not Automaton.check_record(self, record): self.logger.debug(f"{T4}check nd fix record (proceeding)")
return False
self.logger.debug(f"{T4}check record (proceeding)") if record.subtype() == "proceeding":
self.logs[-1].reject(MSG_NOT_PROCEEDING, record)
return False
try: try:
self.check.is_conference(record)
self.check.country(record)
self.check.conference_date(record)
self.check.submitted(record)
self.check.format_editor(record)
self.check.publisher(record)
self.check.paper_reference(record)
self.check.format_authors(record, fmt="F. Last") # is with authors form my institute
self.check.get_my_authors(record, sort=True) # standardise name of collaboration
# format authors according to my format
# extract authors form my institute signing the publication
# is submitted date well formed
record.check_and_fix(self.rex_institute,
fmt_author="F. Last",
sep_author=", ",
sort_author=True)
record.check_country()
record.check_conference_date()
record.format_editor()
record.check_publisher(self.db)
record.check_paper_reference()
except CheckException as e: except CheckException as e:
self.logs[-1].reject(e, record=record) self.logs[-1].reject(e, record=record)
......
...@@ -15,6 +15,7 @@ INS = ("inspirehep", "inspirehep.net") ...@@ -15,6 +15,7 @@ INS = ("inspirehep", "inspirehep.net")
MSG_INV_CONF = "Reject invalid conference information" MSG_INV_CONF = "Reject invalid conference information"
MSG_INV_CONF_KEY = "Reject invalid conference key" MSG_INV_CONF_KEY = "Reject invalid conference key"
MSG_NO_CONF = "Reject no conference information" MSG_NO_CONF = "Reject no conference information"
MSG_NO_CONF_DATE = "Reject no conference date"
MSG_NO_CONF_ID_KEY = "Reject no conference identifier and key" MSG_NO_CONF_ID_KEY = "Reject no conference identifier and key"
MSG_NO_COUNTRY = "Reject invalid country" MSG_NO_COUNTRY = "Reject invalid country"
MSG_NO_ENTRY = "Reject %s is not defined" MSG_NO_ENTRY = "Reject %s is not defined"
...@@ -25,6 +26,7 @@ MSG_NO_SHELF = "No shelf %s for store %s" ...@@ -25,6 +26,7 @@ MSG_NO_SHELF = "No shelf %s for store %s"
MSG_NO_THESIS = "Reject no thesis information" MSG_NO_THESIS = "Reject no thesis information"
MSG_TOOMANY_SYNONYM = "Reject too many %s synonyms" MSG_TOOMANY_SYNONYM = "Reject too many %s synonyms"
MSG_UNKNOWN_COLLABORATION = "Reject collaboration is unknown." MSG_UNKNOWN_COLLABORATION = "Reject collaboration is unknown."
MSG_UNKNOWN_COUNTRY = "Reject country is unknown."
MSG_WELL_FORMED_COLLABORATION = "Reject collaboration is not well formed" MSG_WELL_FORMED_COLLABORATION = "Reject collaboration is not well formed"
MSG_WELL_FORMED_DATE = "Reject submission date is not well formed" MSG_WELL_FORMED_DATE = "Reject submission date is not well formed"
......
"""confmixin.py """confmixin.py
""" """
from .base import (MSG_NO_CONF_DATE,
MSG_UNKNOWN_COUNTRY,
REG_DATE,
search_synonym,
T6)
from .exception import CheckException
from datetime import datetime from datetime import datetime
from plugin_dbui import CLEAN_SPACES from plugin_dbui import CLEAN_SPACES, UNDEF_ID
class ConfMixin(object): class ConfMixin(object):
"""Mixin to handle conference data. """Mixin to handle conference data.
The parent class must have the attribute ``conference``. The parent class must have the attribute ``conference``:
It is a dictionary with at least the following keys:
* addresses: [{cities: [], country: str, ...}, ...] +----------------+----------------------------------------+
* cnum: str | key | value |
* control_number: int +----------------+----------------------------------------+
* closing_date: str | addresses | [{cities: [], country: str, ...}, ...] |
* opening_date: str | cnum | str |
* titles: [{value: str}, ...] | control_number | int |
* urls: [{value: str}, ...] | closing_date | str |
| opening_date | str |
| titles | [{title: str}, ...] |
| urls | [{value: str}, ...] |
+----------------+----------------------------------------+
""" """
def check_conference_date(self):
"""Check conference date exists.
Note:
Date is well formed by construction (conference_date)
Raises:
CheckException:
dates are not found.
"""
self.logger.debug(f"{T6}check conference date")
val = self.conference_dates()
if len(val) == 0:
raise CheckException(MSG_NO_CONF_DATE)
def check_country(self, db=None):
"""Check synonyms for conference country by using by the proper value.
Args:
db (pydal.DAL):
database connection
Raises:
CheckException:
* the country is unknown (neither country nor synonym)
* more than one synonym found.
"""
if db is None:
self.logger.debug(f"{T6}skip check country -- db is None")
return
self.logger.debug(f"{T6}check country")
val = self.conference_country()
if len(val) == 0:
raise CheckException(MSG_UNKNOWN_COUNTRY)
dbid = search_synonym(db.countries, "country", val)
if dbid == UNDEF_ID:
raise CheckException(MSG_UNKNOWN_COUNTRY)
country = db.countries[dbid].country
if country != val:
self.conference["addresses"][0]["country"] = country
def conference_country(self): def conference_country(self):
"""The country where the conference took place. """The country where the conference took place.
...@@ -57,7 +114,10 @@ class ConfMixin(object): ...@@ -57,7 +114,10 @@ class ConfMixin(object):
opening = conference.get("opening_date", None) opening = conference.get("opening_date", None)
closing = conference.get("closing_date", None) closing = conference.get("closing_date", None)
if opening is None or closing is None: if opening is None or REG_DATE.match(opening) is None:
return ""
if closing is None or REG_DATE.match(closing) is None:
return "" return ""
ds = datetime.strptime(opening, "%Y-%m-%d") ds = datetime.strptime(opening, "%Y-%m-%d")
......
...@@ -3,9 +3,13 @@ ...@@ -3,9 +3,13 @@
""" """
import re import re
from .base import T4, T6 from .base import (MSG_WELL_FORMED_DATE,
REG_DATE_YYYYMM,
T4,
T6)
from .cdsstore import CdsStore from .cdsstore import CdsStore
from .confmixin import ConfMixin from .confmixin import ConfMixin
from .exception import CheckException
from .recordcdspubli import RecordCdsPubli from .recordcdspubli import RecordCdsPubli
REX_DATE8 = re.compile(r"(\d{4})(\d{2})(\d{2})") REX_DATE8 = re.compile(r"(\d{4})(\d{2})(\d{2})")
...@@ -17,14 +21,19 @@ class RecordCdsConfPaper(RecordCdsPubli, ConfMixin): ...@@ -17,14 +21,19 @@ class RecordCdsConfPaper(RecordCdsPubli, ConfMixin):
Attributes: Attributes:
conference (dict or None): conference (dict or None):
the conference metadata: the conference metadata:
* addresses: [{cities: [], country: str, ...}, ...]
* cnum: str +----------------+----------------------------------------+
* control_number: int | key | value |
* closing_date: str +----------------+----------------------------------------+
* opening_date: str | addresses | [{cities: [], country: str, ...}, ...] |
* titles: [{value: str}, ...] | cnum | str |
* urls: [{value: str}, ...] | control_number | int |
* year | closing_date | str |
| opening_date | str |
| titles | [{title: str}, ...] |
| urls | [{value: str}, ...] |
| year | str |
+----------------+----------------------------------------+
""" """
...@@ -61,7 +70,7 @@ class RecordCdsConfPaper(RecordCdsPubli, ConfMixin): ...@@ -61,7 +70,7 @@ class RecordCdsConfPaper(RecordCdsPubli, ConfMixin):
store = CdsStore("cds.cern.ch") store = CdsStore("cds.cern.ch")
if conf_id is not None: if conf_id is not None:
logger.debug(f"{T6}search by conference by id {conf_id}") logger.debug(f"{T6}search conference by id {conf_id}")
recjson = store.get_record(conf_id) recjson = store.get_record(conf_id)
if recjson["recid"] != int(conf_id): if recjson["recid"] != int(conf_id):
...@@ -72,7 +81,7 @@ class RecordCdsConfPaper(RecordCdsPubli, ConfMixin): ...@@ -72,7 +81,7 @@ class RecordCdsConfPaper(RecordCdsPubli, ConfMixin):
return return
elif conf_key is not None: elif conf_key is not None:
logger.debug(f"{T6}search by conference by key {conf_key}") logger.debug(f"{T6}search conference by key {conf_key}")
ids = store.get_ids(p=conf_key) ids = store.get_ids(p=conf_key)
mtch = False mtch = False
...@@ -132,3 +141,34 @@ class RecordCdsConfPaper(RecordCdsPubli, ConfMixin): ...@@ -132,3 +141,34 @@ class RecordCdsConfPaper(RecordCdsPubli, ConfMixin):
# #
# Append conference data # Append conference data
self.conference = dct self.conference = dct
def check_submitted_date(self):
"""Check that submitted date is either ``YYYY-MM`` or ``YYYY-MM-DD``.
Raises:
CheckException::
* the date is not well formed
"""
self.logger.debug(f"{T6}check submitted date")
xdate = self.submitted()
if REG_DATE_YYYYMM.match(xdate):
return
# recover by using the opening date of the conference
val = self.conference.get("opening_date", None)
if val is not None:
if "prepublication" in self:
prepublication = self["prepublication"]
if isinstance(prepublication, list):
prepublication[0]["date"] = val
else:
prepublication["date"] = val
else:
self["prepublication"] = {"date": val}
else:
raise CheckException(MSG_WELL_FORMED_DATE)
...@@ -833,7 +833,7 @@ class RecordCdsPubli(dict, AuthorsMixin, PublicationInfoMixin): ...@@ -833,7 +833,7 @@ class RecordCdsPubli(dict, AuthorsMixin, PublicationInfoMixin):
Returns: Returns:
str: str:
* "articles", "preprint", "note" or "report" * "articles", "preprint", "proceeding", "note" or "report"
* empty string when it is not defined * empty string when it is not defined
""" """
...@@ -843,7 +843,10 @@ class RecordCdsPubli(dict, AuthorsMixin, PublicationInfoMixin): ...@@ -843,7 +843,10 @@ class RecordCdsPubli(dict, AuthorsMixin, PublicationInfoMixin):
lst = [dct.get("primary", "").lower() for dct in collection] lst = [dct.get("primary", "").lower() for dct in collection]
# order matter since note can have preprint+note if "conferencepaper" in lst:
return "proceeding"
# order matter since we have (preprint+note)
for val in ("article", "note", "report", "preprint"): for val in ("article", "note", "report", "preprint"):
if val in lst: if val in lst:
return val return val
......
...@@ -3,8 +3,12 @@ ...@@ -3,8 +3,12 @@
""" """
import requests import requests
from .base import T4, T6 from .base import (MSG_WELL_FORMED_DATE,
REG_DATE_YYYYMM,
T4,
T6)
from .confmixin import ConfMixin from .confmixin import ConfMixin
from .exception import CheckException
from .recordheppubli import RecordHepPubli from .recordheppubli import RecordHepPubli
...@@ -21,13 +25,17 @@ class RecordHepConfPaper(RecordHepPubli, ConfMixin): ...@@ -21,13 +25,17 @@ class RecordHepConfPaper(RecordHepPubli, ConfMixin):
https://inspire-schemas.readthedocs.io/en/latest/schemas/ https://inspire-schemas.readthedocs.io/en/latest/schemas/
Main information are: Main information are:
* addresses: [{cities: [], country: str, ...}, ...] +----------------+----------------------------------------+
* cnum: str | key | value |
* control_number: int +----------------+----------------------------------------+
* closing_date: str | addresses | [{cities: [], country: str, ...}, ...] |
* opening_date: str | cnum | str |
* titles: [{value: str}, ...] | control_number | int |
* urls: [{value: str}, ...] | closing_date | str |
| opening_date | str |
| titles | [{title: str}, ...] |
| urls | [{value: str}, ...] |
+----------------+----------------------------------------+
""" """
...@@ -72,3 +80,25 @@ class RecordHepConfPaper(RecordHepPubli, ConfMixin): ...@@ -72,3 +80,25 @@ class RecordHepConfPaper(RecordHepPubli, ConfMixin):
# append conference data # append conference data
self.conference = obj.get("metadata", None) self.conference = obj.get("metadata", None)
def check_submitted_date(self):
"""Check that submitted date is either ``YYYY-MM`` or ``YYYY-MM-DD``.
Raises:
CheckException::
* the date is not well formed
"""
self.logger.debug(f"{T6}check submitted date")
xdate = self.submitted()
if REG_DATE_YYYYMM.match(xdate):
return
# recover by using the opening date of the conference
val = self.conference.get("opening_date", None)
if val is not None:
self["preprint_date"] = val
else:
raise CheckException(MSG_WELL_FORMED_DATE)
...@@ -275,7 +275,7 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin): ...@@ -275,7 +275,7 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin):
""" """
def check_submitted_date(self): def check_submitted_date(self):
"""Check that submitted date as ``YYYY-MM`` or ``YYYY-MM-DD``. """Check that submitted date is either ``YYYY-MM`` or ``YYYY-MM-DD``.
Raises: Raises:
CheckException:: CheckException::
...@@ -530,7 +530,7 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin): ...@@ -530,7 +530,7 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin):
Returns: Returns:
str: str:
* "articles", "preprint", "note" or "report" * "articles", "preprint", "proceeding", "note" or "report"
* empty string when it is not defined * empty string when it is not defined
""" """
...@@ -540,13 +540,14 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin): ...@@ -540,13 +540,14 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin):
doctype = (doctype[0] if isinstance(doctype, list) else doctype) doctype = (doctype[0] if isinstance(doctype, list) else doctype)
if doctype != "article": if doctype == "article":
return doctype pubinfo = self.get("publication_info", None)
return ("preprint" if pubinfo is None else "article")
# separate article from preprint elif doctype == "conference paper":
# in the latter case publication_info is missing return ("proceeding" if self.is_published() else "")
pubinfo = self.get("publication_info", None)
return ("preprint" if pubinfo is None else "article") return doctype
def title(self): def title(self):
"""The title of the publication. """The title of the publication.
......
...@@ -29,7 +29,7 @@ def record(): ...@@ -29,7 +29,7 @@ def record():
return load_record("cds.cern.ch", 2242641) return load_record("cds.cern.ch", 2242641)
def test_subtype_ins_13001(record): def test_subtype_13001(record):
assert record.subtype() == "article" assert record.subtype() == "article"
...@@ -120,91 +120,3 @@ def test_check_paper_reference_13022(record): ...@@ -120,91 +120,3 @@ def test_check_paper_reference_13022(record):
# check_paper_reference is a dummy method to preserve interface # check_paper_reference is a dummy method to preserve interface
assert record.paper_reference() == "Phys. Rev. D 95 2017 052005" assert record.paper_reference() == "Phys. Rev. D 95 2017 052005"
assert record.check_paper_reference() is None assert record.check_paper_reference() is None
# def test_format_editor_cds_13001(svc, reccds):
#
# # cds
# assert reccds.paper_editor() == "Phys. Rev. D"
# assert reccds.paper_volume() == "95"
#
# svc.format_editor(reccds)
#
# assert reccds.paper_editor() == "Phys. Rev. D"
# assert reccds.paper_volume() == "95"
#
#
# def test_format_editor_ins_13002(svc, recins):
# # inspire
# assert recins.paper_editor() == "Phys. Rev. D"
# assert recins.paper_volume() == "95"
#
# svc.format_editor(recins)
#
# assert recins.paper_editor() == "Phys. Rev. D"
# assert recins.paper_volume() == "95"
#
#
# def test_publisher_cds_13003(svc, reccds):
# assert svc.publisher(reccds) is None
#
#
# def test_paper_reference_cds_13004(svc, reccds):
#
# # check recovery procedure using DOI
# # remove the publisher and volume information
# paper_ref = reccds.paper_reference()
#
# reccds.df_info.loc[0, ["title", "volume"]] = ["", ""]
# svc.paper_reference(reccds)
#
# assert reccds.paper_reference() == paper_ref
#
#
# def test_submitted_cds_13005(svc, reccds):