Commit 1a93c0f4 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Update CheckAndFix and related tests

parent b47b5b7e
......@@ -12,7 +12,12 @@ from gluon import current
from store_tools import (MSG_NO_CONF,
MSG_NO_THESIS,
OAI_URL,
Record,
RecordConf,
RecordHep,
RecordHepConf,
RecordHepThesis,
RecordPubli,
RecordThesis,
REG_OAI,
REG_YEAR)
......@@ -223,8 +228,8 @@ class CheckAndFix(object):
# try to recover year when not defined
if not year:
# published article, proceeding
if record["publication_info"].year.iloc[0] != "":
year = record["publication_info"].year.iloc[0]
if record.df_info.year.iloc[0] != "":
year = record.df_info.year.iloc[0]
# start date of a conference
elif record._get("meeting_name", "opening_date") != "":
......@@ -307,10 +312,17 @@ class CheckAndFix(object):
opening = self._get_conference_dates(record)[0]
val = opening.strftime("%Y-%m-%d")
elif isinstance(record, RecordThesis):
elif isinstance(record, RecordHepConf):
opening = record.get("opening_data", None)
if opening is not None:
val = opening.strftime("%Y-%m-%d")
elif isinstance(record, (RecordHepThesis, RecordThesis)):
val = record.these_defense()
else:
# try with a preprint number
if val == "" or len(val) < 7:
report = record.preprint_number()
if report:
m_arxiv = DECODE_ARXIV.match(report)
......@@ -319,7 +331,11 @@ class CheckAndFix(object):
# last change use the creation date for the record
if val == "" or len(val) < 7:
val = record["creation_date"][0:7]
if isinstance(record, (RecordConf, RecordPubli, RecordThesis)):
val = record["creation_date"][0:7]
else:
val = record.get("legacy_creation_date", "")[0:7]
return val
......@@ -579,7 +595,7 @@ class CheckAndFix(object):
if not record.is_published():
return
df = record["publication_info"].iloc[0]
df = record.df_info.iloc[0]
editor = df.title
volume = df.volume
......@@ -706,11 +722,15 @@ class CheckAndFix(object):
"""
self.logger.debug(f"{T6}is conference")
if not isinstance(record, RecordConf):
raise CheckException(MSG_NO_CONF)
if isinstance(record, RecordConf):
if "meeting_name" in record:
return
if "meeting_name" not in record:
raise CheckException(MSG_NO_CONF)
if isinstance(record, RecordHepConf):
if getattr(record, "conference", None) is not None:
return
raise CheckException(MSG_NO_CONF)
def is_thesis(self, record):
"""Check that the record described a thesis.
......@@ -840,7 +860,7 @@ class CheckAndFix(object):
# * the row contains empty string when the record is not published.
# * iloc[0] returns a serie where the index are the column's name
#
columns = (record["publication_info"].iloc[0]
columns = (record.df_info.iloc[0]
.replace("", np.nan)
.dropna()
.index)
......@@ -859,13 +879,13 @@ class CheckAndFix(object):
# transform PhysRevD in Phys. Rev. D
li = re.split(r"([A-Z][a-z]+)", m.group(1))
title = ". ".join([el for el in li if len(el) > 0])
record["publication_info"].loc[0, "title"] = title
record.df_info.loc[0, "title"] = title
elif subfield == "volume":
record["publication_info"].loc[0, "volume"] = m.group(2)
record.df_info.loc[0, "volume"] = m.group(2)
elif subfield == "pagination":
record["publication_info"].loc[0, "pagination"] = m.group(3)
record.df_info.loc[0, "pagination"] = m.group(3)
elif subfield == "year":
raise ToolException(MSG_NO_REF + "[year]")
......@@ -899,7 +919,7 @@ class CheckAndFix(object):
abbreviation = db.publishers[dbid].abbreviation
if abbreviation != val:
record["publication_info"].loc[0, "title"] = abbreviation
record.df_info.loc[0, "title"] = abbreviation
# convert ToolException to CheckExcpetion
except ToolException as e:
......@@ -948,18 +968,23 @@ class CheckAndFix(object):
data = (m.group(3), int(m.group(2)), int(m.group(1)))
date = '%s-%02i-%02i' % data
# in some case we have to deal with a list (see cds 2234042)
# in some case it is not defined (e.g. phd thesis)
if "prepublication" in record:
# update
if isinstance(record, Record):
# in some case we have to deal with a list (see cds 2234042)
# in some case it is not defined (e.g. phd thesis)
if "prepublication" in record:
prepublication = record["prepublication"]
if isinstance(prepublication, list):
prepublication[0]["date"] = date
else:
prepublication["date"] = date
prepublication = record["prepublication"]
if isinstance(prepublication, list):
prepublication[0]["date"] = date
else:
prepublication["date"] = date
record["prepublication"] = {"date": date}
else:
record["prepublication"] = {"date": date}
elif isinstance(record, RecordHep):
record["preprint_date"] = date
def temporary_record(self, record):
"""Some records are marked temporary.
......
......@@ -29,6 +29,7 @@ from .inspirehepstore import InspirehepStore
from .inveniostore import InvenioStore
from .record import Record
from .recordconf import RecordConf
from .recordhep import RecordHep
from .recordhepconf import RecordHepConf
from .recordhepinst import RecordHepInst
from .recordheppubli import RecordHepPubli
......
......@@ -47,8 +47,8 @@ def test_format_editor_cds_13001(svc, reccds):
def test_format_editor_ins_13002(svc, recins):
# inspire
assert recins.paper_editor() == "Phys.Rev."
assert recins.paper_volume() == "D95"
assert recins.paper_editor() == "Phys. Rev. D"
assert recins.paper_volume() == "95"
svc.format_editor(recins)
......
......@@ -94,4 +94,5 @@ def test_submitted_cds_14008(svc, reccds):
def test_submitted_ins_14009(svc, recins):
assert recins.submitted() == "2011"
svc.submitted(recins)
assert recins.submitted() == "2010-12-06"
# Note le lgacy_creation_date is wrong in inspirehep.net version 2
assert recins.submitted() == "2012-02"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment