Commit 7799693c authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Polish and improve check and fix for RecordCdsThesis

parent 21971181
......@@ -32,7 +32,7 @@ class Thesis(Automaton):
"""
self.logger.debug(f"{T4}check and fix record (thesis)")
if not isinstance(record, (RecordCdsThesis, RecordHepThesis)):
if record.subtype() == "thesis":
self.logs[-1].reject(MSG_NOT_THESIS, record)
return False
......
......@@ -833,7 +833,7 @@ class RecordCdsPubli(dict, AuthorsMixin, PublicationInfoMixin):
Returns:
str:
* "articles", "preprint", "proceeding", "note" or "report"
* articles, preprint, proceeding, note, report ot thesis
* empty string when it is not defined
"""
......@@ -841,13 +841,16 @@ class RecordCdsPubli(dict, AuthorsMixin, PublicationInfoMixin):
if collection is None:
return ""
if not isinstance(collection, list):
collection = [collection]
lst = [dct.get("primary", "").lower() for dct in collection]
if "conferencepaper" in lst:
return "proceeding"
# order matter since we have (preprint+note)
for val in ("article", "note", "report", "preprint"):
for val in ("article", "note", "report", "thesis", "preprint"):
if val in lst:
return val
......
......@@ -2,12 +2,15 @@
"""
from .base import (AFF_CPPM,
MSG_WELL_FORMED_DATE,
REG_YEAR,
REG_DATE_YYYYMM,
THESIS_DIR,
UNIVERSITY,
UNIV_AIX_MARSEILLE,
UNIV_MARSEILLE,
T6)
from .exception import CheckException
from filters import CLEAN_THESIS_DEFENSE
from gluon import current
from store_tools.recordcdspubli import RecordCdsPubli
......@@ -63,6 +66,37 @@ class RecordCdsThesis(RecordCdsPubli):
return li
def check_submitted_date(self):
"""Check that submitted date is either ``YYYY-MM`` or ``YYYY-MM-DD``.
Raises:
CheckException::
* the date is not well formed
"""
self.logger.debug(f"{T6}check submitted date")
xdate = self.submitted()
if REG_DATE_YYYYMM.match(xdate):
return
# recover by using the defense date
val = self.these_defense()
if REG_DATE_YYYYMM.match(val):
if "prepublication" in self:
prepublication = self["prepublication"]
if isinstance(prepublication, list):
prepublication[0]["date"] = val
else:
prepublication["date"] = val
else:
self["prepublication"] = {"date": val}
else:
raise CheckException(MSG_WELL_FORMED_DATE)
def format_universities(self):
"""Format the name of the university for PhD:
......@@ -86,6 +120,9 @@ class RecordCdsThesis(RecordCdsPubli):
val = (UNIV_MARSEILLE if year < 2012 else UNIV_AIX_MARSEILLE)
values = values.replace(AFF_CPPM, val)
if "Marseille U., Luminy" in values:
values = values.replace("Marseille U., Luminy", UNIV_MARSEILLE)
# Other -- replace U. by University
university = current.T(UNIVERSITY).decode("utf8")
values = values.replace('U.', university)
......
......@@ -530,7 +530,7 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin):
Returns:
str:
* "articles", "preprint", "proceeding", "note" or "report"
* articles, preprint, proceeding, note, report or thesis
* empty string when it is not defined
"""
......
......@@ -2,11 +2,14 @@
"""
from .base import (AFF_CPPM,
MSG_WELL_FORMED_DATE,
REG_DATE_YYYYMM,
REG_YEAR,
UNIVERSITY,
UNIV_AIX_MARSEILLE,
UNIV_MARSEILLE,
T6)
from .exception import CheckException
from gluon import current
from filters import CLEAN_THESIS_DEFENSE
from .recordheppubli import RecordHepPubli
......@@ -58,6 +61,28 @@ class RecordHepThesis(RecordHepPubli):
return li
def check_submitted_date(self):
"""Check that submitted date is either ``YYYY-MM`` or ``YYYY-MM-DD``.
Raises:
CheckException::
* the date is not well formed
"""
self.logger.debug(f"{T6}check submitted date")
xdate = self.submitted()
if REG_DATE_YYYYMM.match(xdate):
return
# recover by using the defense date
val = self.these_defense()
if REG_DATE_YYYYMM.match(val):
self["thesis_info"]["defense_date"] = val
else:
raise CheckException(MSG_WELL_FORMED_DATE)
def format_universities(self):
"""Format the name of the university for PhD:
......@@ -81,6 +106,9 @@ class RecordHepThesis(RecordHepPubli):
val = (UNIV_MARSEILLE if year < 2012 else UNIV_AIX_MARSEILLE)
values = values.replace(AFF_CPPM, val)
if "Marseille U., Luminy" in values:
values = values.replace("Marseille U., Luminy", UNIV_MARSEILLE)
# Other -- replace U. by University
university = current.T(UNIVERSITY).decode("utf8")
values = values.replace('U.', university)
......
......@@ -53,3 +53,10 @@ def test_format_universities_cds_08006(record):
record.format_universities()
assert record.these_universities() == \
"Shandong Université & Aix Marseille Université"
def test_format_universities_cds_08007():
record = load_record("cds.cern.ch", 1394605)
record.format_universities()
assert record.these_universities() == \
"Université de la Méditerrannée Aix-Marseille II"
......@@ -50,7 +50,15 @@ def test_these_universities_ins_09005(record):
assert record.these_universities() == "Shandong U. & Marseille, CPPM"
def test_format_universities_cds_09006(record):
def test_format_universities_ins_09006(record):
record.format_universities()
assert record.these_universities() == \
"Shandong Université & Aix Marseille Université"
def test_format_universities_ins_08007():
record = load_record("inspirehep.net", 1088032, shelf="literature")
record.format_universities()
assert record.these_universities() == \
"Université de la Méditerrannée Aix-Marseille II"
"""test_13_check_and_fix_article_cds
cds.cern.ch 2242641: Phys. Rev. D 95 (2017) 052005
(same as inspirehep.net article 15009922)
https://cds.cern.ch/record/2242641
(same as https://inspirehep.net/api/literature/15009922)
Test individual method of check and fix process for article:
......
"""test_14_check_and_fix_article_ins
inspirehep.net article 15009922: Phys. Rev. D 95 (2017) 052005
(same as cds.cern.ch 2242641)
https://inspirehep.net article/api/literature/15009922
(same as https://cds.cern.ch/record/2242641)
Test individual method of check and fix process for article:
......
"""test_15_check_and_fix_proceeding_cds
cds.cern.ch 1411352
(same as inspirehep.net 1089237)
https://cds.cern.ch/record/1411352
(same as https://inspirehep.net/api/literature/1089237)
Test individual method of check and fix process for article:
Test individual method of check and fix process for proceeding:
* publication is a published proceeding
* is with authors form my institute
......
"""test_16_check_and_fix_proceeding_ins
inspirehep.net 1089237
(same as cds.cern.ch 1411352)
https://inspirehep.net/api/literature/1089237
(same as https://cds.cern.ch/record/1411352)
Test individual method of check and fix process for article:
......
"""test_16_CheckAndFix_thesis
* Test CheckAndFix methods for thesis.
Use the one talk in cds.cern.ch
- is_thesis
- submitted
- format_universities
- format_authors (already test with article)
- get_my_authors (already test with article)
"""
import pytest
from harvest_tools.checkandfix import CheckAndFix
from store_tools import load_record
@pytest.fixture(scope="module")
def reccds():
return load_record("cds.cern.ch", 1394605)
@pytest.fixture(scope="module")
def svc():
return CheckAndFix()
def test_is_thesis_cds_17001(svc, reccds):
assert svc.is_thesis(reccds) is None
def test_submitted_cds_17002(svc, reccds):
assert reccds.these_defense() == "2011"
assert reccds.submitted() == ""
svc.submitted(reccds)
assert reccds.submitted() == "2011-11"
def test_format_universities_cds_17003(svc, reccds):
# Khanji en 2011 (Université de la Méditerrannée)
assert reccds["dissertation_note"]["university"] == \
"Marseille U., Luminy"
svc.format_universities(reccds)
assert reccds["dissertation_note"]["university"] == \
"Université de la Méditerrannée Aix-Marseille II"
# Chen en 2013 (Aix marseille Université)
reccds2 = load_record("cds.cern.ch", 1632177)
assert reccds2["dissertation_note"]["university"] == \
"Shandong U. & Marseille, CPPM"
svc.format_universities(reccds2)
assert reccds2["dissertation_note"]["university"] == \
"Shandong Université & Aix Marseille Université"
"""test_17_check_and_fix_thesis_cds
https://cds.cern.ch/record/1394605
(same as https://inspirehep.net/api/literature/10888032)
Test individual method of check and fix process for thesis:
* is with authors form my institute
* standardise name of collaboration
* format authors according to my format
* extract authors form my institute signing the publication
* is submitted date well formed
"""
import pytest
from gluon import current
from harvest_tools import get_rex_institute
from store_tools import CheckException, load_record
@pytest.fixture(scope="module")
def record():
return load_record("cds.cern.ch", 1394605)
def test_subtype_17001(record):
assert record.subtype() == "thesis"
# ............................................................................
#
# Check and fix implemented in the RecordCdsConf
#
def test_check_authors_17010(record):
# raise CheckException in case of problem
assert record.check_authors() is None
def test_check_my_affiliation_17011(record):
# raise CheckException in case of problem
rex_institute = get_rex_institute(current.db, current.app)
assert record.check_my_affiliation(rex_institute) is None
def test_check_collaboration_17012(record):
assert record.collaboration() == ""
record.check_collaboration(current.db)
assert record.collaboration() == ""
def test_check_format_authors_17013(record):
authors = record.authors_as_list()
assert len(authors) == 1
assert authors[0] == "Khanji, Basem"
record.check_format_authors(fmt="F. Last")
authors = record.authors_as_list()
assert authors[0] == "B. Khanji"
def test_extract_my_authors_17014(record):
# raise CheckException in case of problem
rex_institute = get_rex_institute(current.db, current.app)
assert record.my_authors is None
record.check_format_authors(fmt="F. Last")
assert record.extract_my_authors(rex_institute, sep="|", sort=True) is None
my_authors = record.my_authors
assert my_authors == "B. Khanji"
def test_check_submitted_date_17015(record):
# raise CheckException in case of problem
assert record.submitted() == ""
with pytest.raises(CheckException):
record.check_submitted_date()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment