Commit cdfcf28a authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

More robust algorithm to determine paper editor, volume, page and year fields.

parent 26dd1d7b
......@@ -60,6 +60,14 @@ REG_BODY = re.compile("<body>(.*)</body>")
REG_IDS_OK = re.compile("^\[[\d, ]*\]$")
REG_INT = re.compile("^\d+$")
REG_OAI = re.compile('oai:([a-z\.]+):([\d]+)')
# Decode publication reference:
# Phys. Rev. Lett. 113, 032001 (2014)
# Eur. Phys. J. C (2014) 74:2883
reg_1 = re.compile("(?P<p>[A-Za-z\. ]+) +(?P<v>\d+),? +(?P<c>[\d-]+) +\((?P<y>[\d]+)\)")
reg_2 = re.compile("(?P<p>[A-Za-z\. ]+) +\((?P<y>\d+)\) +(?P<v>[\d]+):(?P<c>[\d-]+)")
REG_REF = [reg_1, reg_2]
REG_VOLUME = re.compile('[A-Z]+ *(\d.*)')
REG_YEAR = re.compile("(\d{4})")
......@@ -1413,7 +1421,41 @@ class Record(dict):
- Empty string when not defined
"""
return self._get("773", 'c')
# editor and year are mandatory
if "773" not in self:
return ''
for k in ("p", "y"):
if k not in self["773"]:
return ''
# Case in which page and volume are defined
if "c" in self["773"] and "v" in self["773"]:
return self._get("773", 'c')
# Case in which page or volume is not defined
# Recovery using the "o" field (invenio):
# Eur. Phys. J. C (2014) 74:2883
# Phys. Rev. Lett. 113, 032001 (2014)
elif "c" not in self["773"] or "v" not in self["773"]:
li = []
for reference in self._get("773", "o", force_list=True):
for reg in REG_REF:
m = reg.match(reference)
if m:
li.append(m.group('c'))
if not li:
return ''
if len(li) == 1:
return li[0]
return li
return ''
def paper_reference(self):
......@@ -1426,13 +1468,37 @@ class Record(dict):
published in a review.
"""
li, all_fields = [], True
if "773" not in self: return ''
li = []
for k in ('p', 'v', 'y', 'c'):
if k in self["773"]:
li.append(self["773"][k])
else:
all_fields = False
# recovery procedure using the "o" field (invenio):
# Eur. Phys. J. C (2014) 74:2883
# Phys. Rev. Lett. 113, 032001 (2014)
#
if not all_fields and "o" in self["773"]:
li = []
for reg in REG_REF:
m = reg.match(self["773"]["o"])
if m:
li = [m.group('p'),
m.group('v'),
m.group('y'),
m.group('c')]
if len(li) != 4:
return ''
# remove dot in the editor abbreviation
li[0] = CLEAN_REVIEW(li[0])
return ' '.join(li)
......@@ -1472,24 +1538,51 @@ class Record(dict):
- Empty list when nothing is found.
"""
li = []
# editor and year are mandatory
if "773" not in self:
return li
for k in ("p", "y"):
if k not in self["773"]:
return li
# Case in which page and volume are defined
#
# The encoding of the volume depends on the store !
# INVENIO: Phys. Lett. B + volume 673
# INSPIREHEP: Phys.Lett + volume B673
# Standardise the answer as 673
if "c" in self["773"] and "v" in self["773"]:
for volume in self._get("773", 'v', force_list=True):
m = REG_VOLUME.match(volume)
if m:
volume = m.group(1)
li.append(volume)
# Case in which page or volume is not defined
#
# Recovery using the "o" field (invenio):
# Eur. Phys. J. C (2014) 74:2883
# Phys. Rev. Lett. 113, 032001 (2014)
li = []
volumes = self._get("773", 'v', force_list=True)
for volume in volumes:
m = REG_VOLUME.match(volume)
if m:
volume = m.group(1)
li.append(volume)
elif "c" not in self["773"] or "v" not in self["773"]:
for reference in self._get("773", "o", force_list=True):
for reg in REG_REF:
m = reg.match(reference)
if m:
li.append(m.group('v'))
# return a single string
if len(li) == 1:
return li[0]
return li
def paper_year(self):
"""The year of the publication.
......@@ -1501,7 +1594,33 @@ class Record(dict):
- Empty string if the year is not defined.
"""
return self._get("773", 'y')
if "773" not in self:
return ''
years = self._get("773", 'y')
if years:
return years
# recovery using the "o" field (invenio):
# Eur. Phys. J. C (2014) 74:2883
# Phys. Rev. Lett. 113, 032001 (2014)
li = []
for reference in self._get("773", "o", force_list=True):
for reg in REG_REF:
m = reg.match(reference)
if m:
li.append(m.group('y'))
# empty string
if not li:
return ''
# return a single string
if len(li) == 1:
return li[0]
# return a list
return li
def preprint_number(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment