Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit 076d20c6 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Move the function recover_oai in the CheckAndFix class.

parent df4943f4
......@@ -6,7 +6,7 @@ import re
import traceback
from base import MSG_FIX_ORIGIN, MSG_IN_DB, recover_oai, ToolException
from base import MSG_FIX_ORIGIN, MSG_IN_DB, ToolException
from gluon.storage import Storage
from invenio_tools import (CheckAndFix,
InvenioStore,
......@@ -280,6 +280,8 @@ class Automaton(object):
print "check record"
try:
self.check.recover_oai(record, self.harvester.host)
if self.check.is_bad_oai_used(record):
self.logs[-1].idle(MSG_IN_DB, record.year())
return False
......@@ -528,22 +530,13 @@ class Automaton(object):
record_id=record.id(),
title=record.title()))
# the OAI is not defined -- recover it
oai = record.oai()
if oai is None:
recover_oai(record, self.harvester.host)
# the OAI is not well --recover it
if not REG_OAI.match(oai):
recover_oai(record, self.harvester.host)
# check that the record is well formed
# repair non-conformity as far as possible
if not self.check_record(record):
continue
if self.dbg:
print "start loading in the database"
print "insert record in the database"
# insert the record in the database
self.insert_record(record)
......
......@@ -14,10 +14,6 @@ MSG_FIX_ORIGIN = "Fixed the origin field"
MSG_IN_DB = "Already in the database"
MSG_LOAD = "Load in the database"
MSG_INVALID_HOST = "Invalid host"
OAI_INVENIO = "oai:%s:%s"
def family_name_fr(full_name):
"""Extract the family name when the full name is encoded as C{J. Doe}.
......@@ -145,28 +141,5 @@ def learn_my_authors(db,
db.my_authors[row.id] = dict(authors=', '.join(database_authors))
def recover_oai(record, host):
"""Helper function to recover the OAI identifier when it is not defined
or not well form.
@type record: Record
@param record:
"""
if host == "cds.cern.ch":
field, subfield = u"0248", "a"
elif host == "inspirehep.net":
field, subfield = u"909CO", "o"
else:
raise ValueError(MSG_INVALID_HOST)
if field not in record:
record[field] = dict()
record[field][subfield] = OAI_INVENIO % (host, record.id())
class ToolException(Exception): pass
......@@ -51,6 +51,8 @@ MONTHS = {u'Jan':'01',
u'Nov':'11',
u'Dec':'12'}
MSG_INVALID_HOST = "Invalid host"
MSG_NO_AUTHOR = "Reject no author(s)"
MSG_NO_COUNTRY = "Reject invalid country"
MSG_NO_CONF_DATE = "Reject no conference date"
......@@ -71,6 +73,8 @@ MSG_WELL_FORMED_DATE = "Reject submission date is not well formed"
MSG_WELL_FORMED_EDITOR = "Reject editor is not well formed"
OAI_INVENIO = "oai:%s:%s"
REG_COLLABORATION = re.compile(regex.REG_COLLABORATION)
REG_CONF_DATES_1 = re.compile("(\d+) *-? *(\d+) *([A-Z][a-z]{2}) *(\d{4})")
......@@ -740,6 +744,35 @@ class CheckAndFix(object):
self._repair_paper_reference(record)
return
def recover_oai(self, record, host):
"""Recover the OAI identifier when it is not defined
or not well form.
@type record: Record
@param record:
@type host: unicode
@param host: either cds.cern.ch or inspirehep.net
"""
oai = record.oai()
if oai is not None and REG_OAI.match(oai):
return
if host == "cds.cern.ch":
field, subfield = u"0248", "a"
elif host == "inspirehep.net":
field, subfield = u"909CO", "o"
else:
raise ValueError(MSG_INVALID_HOST)
if field not in record:
record[field] = dict()
record[field][subfield] = OAI_INVENIO % (host, record.id())
def submitted(self, record):
"""Standardize the submitted date as YYYY-MM or YYYY-MM-DD.
Look for alternative when it is not defined.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment