Commit 10b9b585 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Review protection in Record(s) and CheckAndFix classes.

parent 6dbde3d3
......@@ -24,7 +24,7 @@ MSG_NO_PROJECT = 'Select a "project" !!!'
MSG_NO_TEAM = 'Select a "team" !!!'
MSG_NO_OAI = "Reject no OAI identifier"
MSG_WELL_FORMED_OAI = "Reject OAI is not well formed"
MSG_WELL_FORM_OAI = "Reject OAI is not well formed"
class PublicationsTool(object):
......
......@@ -47,6 +47,7 @@ MONTHS = {u'Jan':'01',
MSG_NO_AUTHOR = "Reject no author(s)"
MSG_NO_COUNTRY = "Reject invalid country"
MSG_NO_CONF_DATE = "Reject no conference date"
MSG_NO_DATE = "Reject no submission date"
MSG_NO_MY_AUTHOR = "Reject no authors of my institute"
MSG_NO_REF = "Reject incomplete paper reference"
......@@ -119,12 +120,12 @@ class CheckAndFix(object):
if isinstance(record, RecordConf):
# INSPIREHEP start date encoded as 2014-12-31
if "x" in record["111"]:
val = record["111"]["x"]
if "x" in record[u"111"]:
val = record[u"111"]["x"]
# CDS end date encoded as 20141231
elif "z" in record["111"]:
val = record["111"]["z"]
elif "z" in record[u"111"]:
val = record[u"111"]["z"]
val = "%s-%s-%s" % (val[0:4], val[4:6], val[6:8])
elif isinstance(record, RecordThesis):
......@@ -155,34 +156,34 @@ class CheckAndFix(object):
"""
# standard case
if isinstance(record["773"], dict):
if isinstance(record[u"773"], dict):
if "o" in record["773"]:
if "o" in record[u"773"]:
for reg in DECODE_REF:
m = reg.match(record["773"]["o"])
m = reg.match(record[u"773"]["o"])
if m:
record["773"]["p"] = m.group("p")
record["773"]["v"] = m.group("v")
record["773"]["y"] = m.group("y")
record["773"]["c"] = m.group("c")
record[u"773"]["p"] = m.group("p")
record[u"773"]["v"] = m.group("v")
record[u"773"]["y"] = m.group("y")
record[u"773"]["c"] = m.group("c")
return
raise CheckException(MSG_NO_REF)
# list case -- paper with erratum
elif isinstance(record["773"], list):
elif isinstance(record[u"773"], list):
for i in range(len(record["773"])):
for i in range(len(record[u"773"])):
if "o" in record["773"][i]:
if "o" in record[u"773"][i]:
fixed = False
for reg in DECODE_REF:
m = reg.match(record["773"][i]["o"])
m = reg.match(record[u"773"][i]["o"])
if m:
record["773"][i]["p"] = m.group("p")
record["773"][i]["v"] = m.group("v")
record["773"][i]["y"] = m.group("y")
record["773"][i]["c"] = m.group("c")
record[u"773"][i]["p"] = m.group("p")
record[u"773"][i]["v"] = m.group("v")
record[u"773"][i]["y"] = m.group("y")
record[u"773"][i]["c"] = m.group("c")
fixed = True
break
......@@ -205,50 +206,50 @@ class CheckAndFix(object):
"""
if "100" not in record and "700" not in record:
if u"100" not in record and u"700" not in record:
raise CheckException(MSG_NO_AUTHOR)
if "100" in record and isinstance(record["100"], list):
if u"100" in record and isinstance(record[u"100"], list):
# from time to time first authors is duplicated
li = []
for di in record["100"]:
for di in record[u"100"]:
if di not in li:
li.append(di)
if len(li) == 1:
record["100"] = li[0]
record[u"100"] = li[0]
else:
raise CheckException(MSG_TO_MANY_FAUTHOR)
# alias
authors, first_author = None, None
if "700" in record:
authors = record["700"]
if u"700" in record:
authors = record[u"700"]
if "100" in record:
first_author = record["100"]
if u"100" in record:
first_author = record[u"100"]
# first author not defined
if not first_author and authors:
if isinstance(record["700"], list):
record["100"] = record["700"][0]
if isinstance(record[u"700"], list):
record[u"100"] = record[u"700"][0]
else:
record["100"] = record["700"]
record[u"100"] = record[u"700"]
# first author not in the authors list
elif first_author and authors:
if isinstance(record["700"], list):
if record["100"]["a"] != record["700"][0]["a"]:
record["700"].insert(0, record["100"])
if isinstance(record[u"700"], list):
if record[u"100"]["a"] != record[u"700"][0]["a"]:
record[u"700"].insert(0, record[u"100"])
elif record["700"]["a"] != record["100"]["a"]:
record["700"] = [record["100"], record["700"]]
elif record[u"700"]["a"] != record[u"100"]["a"]:
record[u"700"] = [record[u"100"], record[u"700"]]
# only the first author is defined
elif first_author and not authors:
record["700"] = record["100"]
record[u"700"] = record[u"100"]
def clean_erratum(self, record):
"""Clean record with erratum by removing them.
......@@ -265,10 +266,10 @@ class CheckAndFix(object):
# use the simplest algorithm by selecting the first entry in the list
# fare to assume that the article is published first.
record["773"] = record["773"][0]
record[u"773"] = record[u"773"][0]
# treat year and submitted date
for k in ("260", "269"):
for k in (u"260", u"269"):
if k in record and isinstance(record[k], list):
record[k] = record[k][0]
......@@ -305,6 +306,9 @@ class CheckAndFix(object):
raise CheckException(MSG_NO_COUNTRY)
# check and fix conference date
if not (u"111" in record and "d" in record[u"111"]):
raise CheckException(MSG_NO_CONF_DATE)
value = record[u"111"]["d"]
m = REG_CONF_DATES.match(value)
if not m:
......@@ -364,11 +368,11 @@ class CheckAndFix(object):
return
# standard case
if isinstance(record["773"], dict):
if "p" in record["773"] and "v" in record["773"]:
if isinstance(record[u"773"], dict):
if "p" in record[u"773"] and "v" in record[u"773"]:
editor = record["773"]["p"]
volume = record["773"]["v"]
editor = record[u"773"]["p"]
volume = record[u"773"]["v"]
# add space after the dot Phys.Rev -> Phys. Rev
editor = re.sub(r'\.([A-Z])', r'. \1', editor)
......@@ -382,14 +386,14 @@ class CheckAndFix(object):
# remove stupid mistake
editor = CLEAN_REVIEW(editor)
record["773"]["p"] = editor
record["773"]["v"] = volume
record[u"773"]["p"] = editor
record[u"773"]["v"] = volume
# list case -- publication with erratum
elif isinstance(record["773"], list):
elif isinstance(record[u"773"], list):
editors = record._get("773", 'p', force_list=True)
volumes = record._get("773", 'v', force_list=True)
editors = record._get(u"773", 'p', force_list=True)
volumes = record._get(u"773", 'v', force_list=True)
if len(editors) != len(volumes):
raise CheckException(MSG_WELL_FORMED_EDITOR)
......@@ -406,8 +410,8 @@ class CheckAndFix(object):
editor = CLEAN_REVIEW(editor)
record["773"][i]["p"] = editor
record["773"][i]["v"] = volume
record[u"773"][i]["p"] = editor
record[u"773"][i]["v"] = volume
def format_universities(self, record):
"""Format the name of the university for PhD:
......@@ -564,8 +568,8 @@ class CheckAndFix(object):
return
# list of reference (paper with erratum)
refs = record["773"]
if not isinstance(record["773"], list):
refs = record[u"773"]
if not isinstance(record[u"773"], list):
refs = [refs]
# INSPIREHEP
......@@ -647,7 +651,7 @@ class CheckAndFix(object):
if len(dates) != 1:
raise CheckException(MSG_TO_MANY_DATE)
if "269" not in record or isinstance(record["269"], list):
if u"269" not in record or isinstance(record[u"269"], list):
record[u"269"] = dict()
record[u"269"]["c"] = dates[0]
......@@ -662,8 +666,8 @@ class CheckAndFix(object):
"""
# found on INSPIREHEP (see record 1317573)
if "500" in record and "a" in record["500"]:
if record["500"]["a"] == u"*Temporary record*":
if u"500" in record and "a" in record[u"500"]:
if record[u"500"]["a"] == u"*Temporary record*":
raise CheckException(MSG_TEMPORARY_RECORD)
def year(self, record):
......@@ -687,7 +691,7 @@ class CheckAndFix(object):
val = ''
# protection against "publication date YYYY-MM-DD"
li = record._get("260", 'c', force_list=True)
li = record._get(u"260", 'c', force_list=True)
li = [el for el in li if REG_YEAR.match(el)]
if len(li) == 1:
......@@ -714,10 +718,10 @@ class CheckAndFix(object):
if val:
m = REG_YEAR.search(val)
if m:
if "260" in record and isinstance(record["260"], dict):
record["260"]["c"] = m.group(1)
if u"260" in record and isinstance(record[u"260"], dict):
record[u"260"]["c"] = m.group(1)
else:
record["260"] = {"c": m.group(1)}
record[u"260"] = {"c": m.group(1)}
return
raise CheckException(MSG_NO_YEAR)
......
......@@ -46,26 +46,29 @@ class RecordInst(Record):
def future_identifier(self):
"""
Returns:
unicode: the future inspirehep identifier.
unicode: the future inspirehep identifier or an empty string
if not defined.
"""
return self[u"110"]["t"]
return self._get(u"110", "t")
def identifier(self):
"""
Returns:
unicode: the current inspirehep identifier (2015).
unicode: the current inspirehep identifier (2015) or an empty
string if not defined.
"""
return self[u"110"]["u"]
return self._get(u"110", "u")
def name(self):
"""
Returns:
unicode: the name of the institute.
unicode: the name of the institute or an empty string if
not defined.
"""
return self[u"110"]["b"]
return self._get(u"110", "b")
def rex(self):
"""
......@@ -74,4 +77,4 @@ class RecordInst(Record):
in cds.cern.ch or inspirehep.net store
"""
return r"%s|%s" % (self[u"110"]["u"], self[u"110"]["t"])
return r"%s|%s" % (self.identifier(), self.future_identifier())
......@@ -152,7 +152,8 @@ class RecordPubli(Record):
s = ', '.join(s)
if regex.search(s):
return self[u"700"]["a"]
if "a" in self[u"700"]:
return self[u"700"]["a"]
return None
......@@ -168,7 +169,8 @@ class RecordPubli(Record):
s = ', '.join(di['u'])
if regex.search(s):
authors.append(di['a'])
if "a" in di:
authors.append(di['a'])
if cmpFct:
authors.sort(key=cmpFct)
......
......@@ -62,12 +62,14 @@ class RecordThesis(RecordPubli):
li = []
if u"700" in self and isinstance(self[u"700"], dict):
if "e" in self[u"700"] and self[u"700"]["e"] == THESIS_DIR:
li.append(self[u"700"]["a"])
if "a" in self[u"700"]:
li.append(self[u"700"]["a"])
elif u"700" in self and isinstance(self[u"700"], list):
for di in self[u"700"]:
if "e" in di and di["e"] == THESIS_DIR:
li.append(di["a"])
if "a" in di:
li.append(di["a"])
return ', '.join(li)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment