Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit 2c987eb6 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Redesign the algorithm to decide if a record is published or not.

parent 7a55378e
......@@ -7,7 +7,8 @@ import regex
from base import search_synonym, ToolException
from exception import CheckException
from invenio_tools import (load_record,
from invenio_tools import (DECODE_REF,
load_record,
MSG_NO_CONF,
MSG_NO_THESIS,
OAI_URL,
......@@ -27,13 +28,6 @@ DECODE_DD_MMM_YYYY = re.compile(r"(\d{1,2}) *([A-Za-z]{3}) *(\d{4})")
DECODE_DD_MM_YYYY = re.compile(r"(\d{1,2}) +(\d{1,2}) +(\d{4})")
DECODE_YYYY = re.compile(r"^(\d{4})$")
# Decode publication reference:
# Phys. Rev. Lett. 113, 032001 (2014)
# Eur. Phys. J. C (2014) 74:2883
_ref1 = r"(?P<p>[A-Za-z\. ]+) +(?P<v>\d+),? +(?P<c>[\d-]+) +\((?P<y>[\d]+)\)"
_ref2 = r"(?P<p>[A-Za-z\. ]+) +\((?P<y>\d+)\) +(?P<v>[\d]+):(?P<c>[\d-]+)"
DECODE_REF = [re.compile(_ref1), re.compile(_ref2)]
MONTHS = {u'Jan': '01',
u'Feb': '02',
u'Fev': '02',
......
......@@ -32,7 +32,7 @@ from marc12 import Marc12
from record import Record
from recordconf import RecordConf
from recordinst import RecordInst
from recordpubli import RecordPubli
from recordpubli import DECODE_REF, RecordPubli
from recordthesis import RecordThesis
......
......@@ -11,9 +11,19 @@ from base import (ARXIV,
REG_AUTHOR,
REG_YEAR)
from filters import CLEAN_COLLABORATION
from plugin_dbui import CLEAN_SPACES
from plugin_dbui import as_list, CLEAN_SPACES
from record import Record
# Decode publication reference:
# Phys. Rev. Lett. 113, 032001 (2014)
# Eur. Phys. J. C (2014) 74:2883
_ref1 = r"(?P<p>[A-Za-z\. ]+) +(?P<v>\d+),? +(?P<c>[\d-]+) +\((?P<y>[\d]+)\)"
_ref2 = r"(?P<p>[A-Za-z\. ]+) +\((?P<y>\d+)\) +(?P<v>[\d]+):(?P<c>[\d-]+)"
DECODE_REF = [re.compile(_ref1), re.compile(_ref2)]
# The MARC12 keys containing paper reference
PAPER_REFERENCE_KEYS = set(["c", "p", "v", "y"])
class RecordPubli(Record):
"""The MARC record describing a publication.
......@@ -402,14 +412,25 @@ class RecordPubli(Record):
if u"773" not in self:
return False
# Should have the field "p", "v", "y" and "c"
# It is in the state inprint when the dict contains only the o field.
di = self[u"773"]
if isinstance(di, dict):
s1 = set(di.iterkeys()).intersection(set(("c", "p", "v", "y")))
if len(s1) == 4:
# record can contains erratum
for di in as_list(self[u"773"]):
keys = di.keys()
# reference is complete and contains the keys "p", "v", "y" and "c"
if set(keys).issubset(PAPER_REFERENCE_KEYS):
return True
# paper reference my be incomplete or even wrong
# the recovery procedure will use the 773o
# check that 773o contains the paper reference:
# Eur. Phys. J. C (2014) 74:2883
# Phys. Rev. Lett. 113, 032001 (2014)
if "o" in di:
value = di["o"]
for reg in DECODE_REF:
if reg.match(value):
return True
return False
def is_with_erratum(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment