Commit 21971181 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Migrate check and fix method to all automatons

parent 75ff496f
......@@ -6,7 +6,9 @@ from .base import (learn_my_authors,
MSG_CRASH,
MSG_FIX_ORIGIN,
MSG_IN_DB,
MSG_LOAD)
MSG_LOAD,
T4,
T6)
from plugin_dbui import get_id, UNDEF_ID
from store_tools import CheckException
......@@ -14,9 +16,6 @@ MSG_NO_EDITOR = "Reject article is not published"
MSG_NOT_ARTICLE = "Reject publication is not and article"
MSG_TRANSFORM_PREPRINT = "Transform the preprint into an article"
T4 = " "*4
T6 = " "*6
class Articles(Automaton):
"""Automaton for articles.
......@@ -61,7 +60,6 @@ class Articles(Automaton):
return False
try:
# is with authors form my institute
# standardise name of collaboration
# format authors according to my format
......
......@@ -8,6 +8,9 @@ MSG_FIX_ORIGIN = "Fixed the origin field"
MSG_IN_DB = "Already in the database"
MSG_LOAD = "Load in the database"
T4 = " "*4
T6 = " "*6
def family_name_fr(full_name):
"""Extract the family name when the full name is encoded as ``J. Doe``.
......
......@@ -2,11 +2,11 @@
"""
from .automaton import Automaton
from .base import MSG_CRASH, MSG_LOAD
from .base import MSG_CRASH, MSG_LOAD, T4
from .checkandfix import CheckException
from plugin_dbui import UNDEF_ID
T4 = " "*4
MSG_NOT_NOTE = "Reject publication is not a note"
class Notes(Automaton):
......@@ -17,6 +17,12 @@ class Notes(Automaton):
def check_record(self, record):
"""Check the content of the note in order to fix non conformities.
* is with authors form my institute
* standardise name of collaboration
* format authors according to my format
* extract authors form my institute signing the publication
* is submitted date well formed
Args:
record (RecordPubli):
record describing a note
......@@ -27,16 +33,22 @@ class Notes(Automaton):
corrected.
"""
if not Automaton.check_record(self, record):
return False
self.logger.debug(f"{T4}check and fix record (note)")
self.logger.debug(f"{T4}check record (note)")
if record.subtype() == "note":
self.logs[-1].reject(MSG_NOT_NOTE, record)
return False
try:
self.check.submitted(record)
self.check.format_authors(record, fmt="F. Last")
self.check.get_my_authors(record, sort=True)
# is with authors form my institute
# standardise name of collaboration
# format authors according to my format
# extract authors form my institute signing the publication
# is submitted date well formed
record.check_and_fix(self.rex_institute,
fmt_author="F. Last",
sep_author=", ",
sort_author=True)
except CheckException as e:
self.logs[-1].reject(e, record=record)
......
......@@ -5,12 +5,12 @@ from .automaton import Automaton
from .base import MSG_CRASH, MSG_LOAD, T4
from .checkandfix import CheckException
from plugin_dbui import get_id, UNDEF_ID
from .recordcdsconfpaper import RecordCdsConfPaper
from .recordhepconfpaper import RecordHepConfPaper
from .recordcdsthesis import RecordCdsThesis
from .recordhepthesis import RecordHepThesis
from store_tools import (RecordCdsConfPaper,
RecordHepConfPaper,
RecordCdsThesis,
RecordHepThesis)
MSG_NOT_ARTICLE = "Reject publication is not a preprint"
MSG_NOT_PREPRINT = "Reject publication is not a preprint"
MSG_PREPRINT_IS_PAPER = "Reject preprint is a published paper"
MSG_PREPRINT_IS_CONFERENCE = "Reject preprint is a conference"
MSG_PREPRINT_IS_THESIS = "Reject preprint is a thesis"
......@@ -44,7 +44,7 @@ class Preprints(Automaton):
self.logger.debug(f"{T4}check and fix record (preprint)")
if record.subtype() == "preprint":
self.logs[-1].reject(MSG_NOT_ARTICLE, record)
self.logs[-1].reject(MSG_NOT_PREPRINT, record)
return False
if record.is_published():
......
......@@ -2,14 +2,12 @@
"""
from .automaton import Automaton
from .base import MSG_CRASH, MSG_LOAD
from .base import MSG_CRASH, MSG_LOAD, T4
from .checkandfix import CheckException
from plugin_dbui import get_id, UNDEF_ID
MSG_NOT_PROCEEDING = "Reject publication is not a proceeding"
T4 = " "*4
class Proceedings(Automaton):
"""Automaton for conference proceedings.
......@@ -19,6 +17,18 @@ class Proceedings(Automaton):
def check_record(self, record):
"""Check the content of the proceeding in order to fix non conformities.
* is with authors form my institute
* standardise name of collaboration
* format authors according to my format
* extract authors form my institute signing the publication
* is submitted date well formed
* check conference country
* check conference dates
* format editor according to my criteria
* resolve published synonym
* check reference paper
Args:
record (RecordConf):
record describing a proceeding.
......@@ -29,14 +39,13 @@ class Proceedings(Automaton):
corrected.
"""
self.logger.debug(f"{T4}check nd fix record (proceeding)")
self.logger.debug(f"{T4}check and fix record (proceeding)")
if record.subtype() == "proceeding":
self.logs[-1].reject(MSG_NOT_PROCEEDING, record)
return False
try:
# is with authors form my institute
# standardise name of collaboration
# format authors according to my format
......
......@@ -2,12 +2,11 @@
"""
from .automaton import Automaton
from .base import MSG_CRASH, MSG_LOAD
from .base import MSG_CRASH, MSG_LOAD, T4
from .checkandfix import CheckException
from plugin_dbui import get_id, UNDEF_ID, UNKNOWN
MSG_REPORT_NO_NUMBER = "Reject no report number"
T4 = " "*4
class Reports(Automaton):
......@@ -18,6 +17,12 @@ class Reports(Automaton):
def check_record(self, record):
"""Check the content of the report in order to fix non conformities.
* is with authors form my institute
* standardise name of collaboration
* format authors according to my format
* extract authors form my institute signing the publication
* is submitted date well formed
Args:
record (RecordPubli):
record describing a report.
......@@ -28,20 +33,22 @@ class Reports(Automaton):
corrected.
"""
if not Automaton.check_record(self, record):
return False
self.logger.debug(f"{T4}check record (report)")
self.logger.debug(f"{T4}check and fix record (report)")
if not record.report_number():
self.logs[-1].reject(MSG_REPORT_NO_NUMBER, record=record)
return False
try:
self.check.submitted(record)
self.check.format_authors(record, fmt="F. Last")
self.check.get_my_authors(record, sort=True)
# is with authors form my institute
# standardise name of collaboration
# format authors according to my format
# extract authors form my institute signing the publication
# is submitted date well formed
record.check_and_fix(self.rex_institute,
fmt_author="F. Last",
sep_author=", ",
sort_author=True)
except CheckException as e:
self.logs[-1].reject(e, record=record)
......
......@@ -2,21 +2,32 @@
"""
from .automaton import Automaton
from .base import MSG_CRASH, MSG_LOAD
from .base import MSG_CRASH, MSG_LOAD, T4
from .checkandfix import CheckException
from plugin_dbui import get_id, UNDEF_ID
from store_tools import RecordCdsConfPaper, RecordHepConfPaper
T4 = " "*4
MSG_NOT_TALK = "Reject publication is not a talk"
class Talks(Automaton):
"""Automaton for conference talks.
"""
def check_record(self, record):
"""Check the content of the talk in order to fix non conformities.
* is conference
* is with authors form my institute
* standardise name of collaboration
* format authors according to my format
* extract authors form my institute signing the publication
* is submitted date well formed
* check conference country
* check conference dates
Args:
record (RecordConf):
record describing a conference.
......@@ -27,20 +38,25 @@ class Talks(Automaton):
corrected.
"""
if not Automaton.check_record(self, record):
return False
self.logger.debug(f"{T4}check and fix record (talk)")
self.logger.debug(f"{T4}check record (talk)")
if not isinstance(record, (RecordCdsConfPaper, RecordHepConfPaper)):
self.logs[-1].reject(MSG_NOT_TALK, record)
return False
try:
self.check.is_conference(record)
self.check.country(record)
self.check.conference_date(record)
self.check.submitted(record)
self.check.format_authors(record, fmt="F. Last")
self.check.get_my_authors(record, sort=True)
# is with authors form my institute
# standardise name of collaboration
# format authors according to my format
# extract authors form my institute signing the publication
# is submitted date well formed
record.check_and_fix(self.rex_institute,
fmt_author="F. Last",
sep_author=", ",
sort_author=True)
record.check_country()
record.check_conference_date()
except CheckException as e:
self.logs[-1].reject(e, record=record)
......
......@@ -4,11 +4,12 @@
import re
from .automaton import Automaton
from .base import MSG_CRASH, MSG_LOAD
from .base import MSG_CRASH, MSG_LOAD, T4
from .checkandfix import CheckException
from plugin_dbui import get_id, UNDEF_ID
from store_tools import RecordCdsThesis, RecordHepThesis
T4 = " "*4
MSG_NOT_THESIS = "Reject publication is not a thesis"
class Thesis(Automaton):
......@@ -29,19 +30,24 @@ class Thesis(Automaton):
corrected.
"""
if not Automaton.check_record(self, record):
return False
self.logger.debug(f"{T4}check and fix record (thesis)")
self.logger.debug(f"{T4}check record (thesis)")
if not isinstance(record, (RecordCdsThesis, RecordHepThesis)):
self.logs[-1].reject(MSG_NOT_THESIS, record)
return False
try:
self.check.is_thesis(record)
self.check.submitted(record)
self.check.format_universities(record)
self.check.format_authors(record, fmt="F. Last")
self.check.get_my_authors(record, sort=True)
# is with authors form my institute
# standardise name of collaboration
# format authors according to my format
# extract authors form my institute signing the publication
# is submitted date well formed
record.check_and_fix(self.rex_institute,
fmt_author="F. Last",
sep_author=", ",
sort_author=True)
record.format_universities()
except CheckException as e:
self.logs[-1].reject(e, record=record)
......
......@@ -6,6 +6,7 @@ import re
from .exception import ToolException
from plugin_dbui import get_id, UNDEF_ID
AFF_CPPM = "Marseille, CPPM"
ARXIV = "arXiv"
ARXIV_PDF = "http://arxiv.org/pdf/"
......@@ -53,6 +54,10 @@ T2, T4, T6 = " "*2, " "*4, " "*6
THESIS_DIR = "dir."
UNIV_AIX_MARSEILLE = "Aix Marseille Université"
UNIV_MARSEILLE = "Université de la Méditerrannée Aix-Marseille II"
UNIVERSITY = "University"
def is_conference(recjson):
"""True when the record describes a publication related to a conference.
......
""" store_tools.recordcdsthesis
"""
from .base import THESIS_DIR
from .base import (AFF_CPPM,
REG_YEAR,
THESIS_DIR,
UNIVERSITY,
UNIV_AIX_MARSEILLE,
UNIV_MARSEILLE,
T6)
from filters import CLEAN_THESIS_DEFENSE
from gluon import current
from store_tools.recordcdspubli import RecordCdsPubli
......@@ -56,6 +63,35 @@ class RecordCdsThesis(RecordCdsPubli):
return li
def format_universities(self):
"""Format the name of the university for PhD:
* Fix the name of Aix-Marseille University
* Replace U. by University
"""
self.logger.debug(f"{T6}format university")
values = self.these_universities()
if len(values) == 0:
return
# fix the name for Marseille university
# it depends on the year
if AFF_CPPM in values:
mtch = REG_YEAR.search(self.these_defense())
if mtch:
year = int(mtch.group(1))
val = (UNIV_MARSEILLE if year < 2012 else UNIV_AIX_MARSEILLE)
values = values.replace(AFF_CPPM, val)
# Other -- replace U. by University
university = current.T(UNIVERSITY).decode("utf8")
values = values.replace('U.', university)
self["dissertation_note"]["university"] = values
def these_defense(self):
"""The defence date for a master/phd thesis.
......
""" store_tools.recordhepthesis
"""
from .base import (AFF_CPPM,
REG_YEAR,
UNIVERSITY,
UNIV_AIX_MARSEILLE,
UNIV_MARSEILLE,
T6)
from gluon import current
from filters import CLEAN_THESIS_DEFENSE
from .recordheppubli import RecordHepPubli
......@@ -51,6 +58,35 @@ class RecordHepThesis(RecordHepPubli):
return li
def format_universities(self):
"""Format the name of the university for PhD:
* Fix the name of Aix-Marseille University
* Replace U. by University
"""
self.logger.debug(f"{T6}format university")
values = self.these_universities()
if len(values) == 0:
return
# fix the name for Marseille university
# it depends on the year
if AFF_CPPM in values:
mtch = REG_YEAR.search(self.these_defense())
if mtch:
year = int(mtch.group(1))
val = (UNIV_MARSEILLE if year < 2012 else UNIV_AIX_MARSEILLE)
values = values.replace(AFF_CPPM, val)
# Other -- replace U. by University
university = current.T(UNIVERSITY).decode("utf8")
values = values.replace('U.', university)
self["thesis_info"]["institutions"] = [{"name": values}]
def these_defense(self):
"""The defence date for a master/phd thesis.
......
......@@ -25,25 +25,31 @@ from store_tools import load_record
@pytest.fixture(scope="module")
def reccds():
def record():
return load_record("cds.cern.ch", 1632177)
def test_authors_as_list_cds_08001(reccds):
assert reccds.authors_as_list() == [u'Chen, Liming']
def test_authors_as_list_cds_08001(record):
assert record.authors_as_list() == [u'Chen, Liming']
def test_these_defense_cds_08002(reccds):
assert reccds.these_defense() == "2013-12-10"
def test_these_defense_cds_08002(record):
assert record.these_defense() == "2013-12-10"
def test_these_level_cds_08003(reccds):
assert reccds.these_level() == "PhD"
def test_these_level_cds_08003(record):
assert record.these_level() == "PhD"
def test_these_directors_cds_08004(reccds):
assert reccds.these_directors(sep=u"|") == ""
def test_these_directors_cds_08004(record):
assert record.these_directors(sep=u"|") == ""
def test_these_universities_cds_08005(reccds):
assert reccds.these_universities() == "Shandong U. & Marseille, CPPM"
def test_these_universities_cds_08005(record):
assert record.these_universities() == "Shandong U. & Marseille, CPPM"
def test_format_universities_cds_08006(record):
record.format_universities()
assert record.these_universities() == \
"Shandong Université & Aix Marseille Université"
......@@ -48,3 +48,9 @@ def test_these_directors_ins_09004(record):
def test_these_universities_ins_09005(record):
assert record.these_universities() == "Shandong U. & Marseille, CPPM"
def test_format_universities_cds_09006(record):
record.format_universities()
assert record.these_universities() == \
"Shandong Université & Aix Marseille Université"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment