Commit 615dcd15 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Apply 2to3 converter to invenio_tools.

parent c2623351
......@@ -4,37 +4,37 @@ Note:
details on the invenio API at http://invenio-software.org/
"""
from base import (ARXIV,
ARXIV_PDF,
is_conference,
is_institute,
is_thesis,
MSG_NO_CONF,
MSG_NO_COUNTRY,
MSG_NO_PUBLISHER,
MSG_NO_THESIS,
MSG_WELL_FORMED_COLLABORATION,
OAI_URL,
REG_ARXIV_NUMBER,
REG_AUTHOR,
REG_OAI,
REG_YEAR,
THESIS_DIR)
from .base import (ARXIV,
ARXIV_PDF,
is_conference,
is_institute,
is_thesis,
MSG_NO_CONF,
MSG_NO_COUNTRY,
MSG_NO_PUBLISHER,
MSG_NO_THESIS,
MSG_WELL_FORMED_COLLABORATION,
OAI_URL,
REG_ARXIV_NUMBER,
REG_AUTHOR,
REG_OAI,
REG_YEAR,
THESIS_DIR)
from exception import (CdsException,
ExceptionUTF8,
Marc12Exception,
RecordException,
XmlException)
from .exception import (CdsException,
ExceptionUTF8,
Marc12Exception,
RecordException,
XmlException)
from inveniostore import InvenioStore
from iterrecord import IterRecord, REG_INT
from marc12 import Marc12
from record import Record
from recordconf import RecordConf
from recordinst import RecordInst
from recordpubli import DECODE_REF, RecordPubli
from recordthesis import RecordThesis
from .inveniostore import InvenioStore
from .iterrecord import IterRecord, REG_INT
from .marc12 import Marc12
from .record import Record
from .recordconf import RecordConf
from .recordinst import RecordInst
from .recordpubli import DECODE_REF, RecordPubli
from .recordthesis import RecordThesis
def load_record(host, record_id):
......
# -*- coding: utf-8 -*-
""" invenio_tools.base
"""
import re
ARXIV = u"arXiv"
ARXIV_PDF = u"http://arxiv.org/pdf/"
ARXIV = "arXiv"
ARXIV_PDF = "http://arxiv.org/pdf/"
MSG_NO_CONF = "Reject no conference information"
MSG_NO_COUNTRY = "Reject invalid country"
......@@ -13,8 +12,8 @@ MSG_NO_PUBLISHER = "Reject invalid publisher"
MSG_NO_THESIS = "Reject no thesis information"
MSG_WELL_FORMED_COLLABORATION = "Reject collaboration is not well formed"
OAI = u"oai:%s:%s"
OAI_URL = u"http://%s/record/%s"
OAI = "oai:%s:%s"
OAI_URL = "http://%s/record/%s"
REG_ARXIV_NUMBER = re.compile("\d+\.\d+")
......@@ -28,7 +27,7 @@ REG_AUTHOR = re.compile(r"^([\w\- ]+), (\w+)\.?[\- ]*(\w+)*\.?$", re.UNICODE)
REG_OAI = re.compile(r"oai:([a-z\.]+):([\d]+)")
REG_YEAR = re.compile(r"(\d{4})")
THESIS_DIR = u"dir."
THESIS_DIR = "dir."
def is_conference(record):
......@@ -43,16 +42,16 @@ def is_conference(record):
to a conference.
"""
if u"111" in record:
if "111" in record:
return True
# try with the conference key
# the location of this values depends on the store
# cds.cern.ch (962, n) and inspirehep.net (773,w).
if record.host().startswith("cds"):
field, subfield = u"962", "n"
field, subfield = "962", "n"
else:
field, subfield = u"773", "w"
field, subfield = "773", "w"
return len(record._get(field, subfield)) > 0
......@@ -68,21 +67,21 @@ def is_institute(record):
bool: ``True`` when the MARC record describes an institute.
"""
# u"980": [
# {"b": [u"CK90", u"HEP200", u"PDGLIST", u"PPF", u"TOP500", u"WEB"]},
# {"a": u"INSTITUTION"},
# {"a": u"CORE"}
# "980": [
# {"b": ["CK90", "HEP200", "PDGLIST", "PPF", "TOP500", "WEB"]},
# {"a": "INSTITUTION"},
# {"a": "CORE"}
# ]
if u"980" in record:
if "980" in record:
if isinstance(record[u"980"], list):
for di in record[u"980"]:
for k, v in di.iteritems():
if k == "a" and v == u"INSTITUTION":
if isinstance(record["980"], list):
for di in record["980"]:
for k, v in list(di.items()):
if k == "a" and v == "INSTITUTION":
return True
elif isinstance(record[u"980"], dict) and "a" in record[u"980"] and \
record[u"980"]["a"] == u"INSTITUTION":
elif isinstance(record["980"], dict) and "a" in record["980"] and \
record["980"]["a"] == "INSTITUTION":
return True
return False
......@@ -99,6 +98,6 @@ def is_thesis(record):
bool: ``True`` when the MARC record describes a thesis.
"""
li = record._get(u"980", "a", force_list=True)
li = record._get("980", "a", force_list=True)
val = ", ".join(li)
return "THESIS" in val
# -*- coding: utf-8 -*-
""" invenio_tools.exception
"""
class ExceptionUTF8(Exception):
"""Exception in which unicode arguments are encoded as a string.
"""
def __init__(self, *args):
args = (list(args) if isinstance(args, tuple) else args)
for i in range(len(args)):
if isinstance(args[i], unicode):
args[i] = args[i].encode("utf-8")
Exception.__init__(self, *args)
class CdsException(ExceptionUTF8):
class CdsException(Exception):
pass
class Marc12Exception(ExceptionUTF8):
class Marc12Exception(Exception):
pass
class RecordException(ExceptionUTF8):
class RecordException(Exception):
pass
class XmlException(ExceptionUTF8):
class XmlException(Exception):
pass
# -*- coding: utf-8 -*-
""" invenio_tools.inveniostore
"""
import httplib
import http.client
import json
import re
import requests
import time
from exception import CdsException
from .exception import CdsException
CDS_SEARCH_KEYS = ("req", "cc", "c", "ec", "p", "f", "rg", "sf", "so", "sp",
......@@ -81,7 +80,7 @@ class InvenioStore(object):
# the server is busy or return error wait one minute an retry.
# the number of trial is limited to 5
if code == httplib.SERVICE_UNAVAILABLE or code != httplib.OK:
if code == http.client.SERVICE_UNAVAILABLE or code != http.client.OK:
if self._try == 5:
raise CdsException("%s %s" % (MSG_HTTP_ERROR, code))
......
# -*- coding: utf-8 -*-
""" invenio_tools.iterrecord
"""
import re
from exception import Marc12Exception
from record import Record
from .exception import Marc12Exception
from .record import Record
from xml.dom.minidom import parseString
......@@ -109,14 +108,14 @@ class IterRecord(object):
keys = []
for di in record[field]:
keys.extend(di.iterkeys())
keys.extend(di.keys())
# in a set duplicate entries are removed
# the next statement is true when all keys are different
if len(keys) == len(set(keys)):
di = record[field][0]
for i in range(1, len(record[field])):
for (k, v) in record[field][i].iteritems():
for k, v in list(record[field][i].items()):
di[k] = v
record[field] = di
......@@ -135,12 +134,12 @@ class IterRecord(object):
di, ko = record[field][index], False
# check that key do not exist in the big one
keys = di.keys()
keys = list(di.keys())
for i in range(len(record[field])):
if i == index:
continue
for k in record[field][i].iterkeys():
for k in record[field][i].keys():
if k in di:
ko = True
break
......@@ -155,7 +154,7 @@ class IterRecord(object):
if i == index:
continue
for (k, v) in record[field][i].iteritems():
for k, v in list(record[field][i].items()):
di[k] = v
record[field] = di
......@@ -272,7 +271,7 @@ class IterRecord(object):
def __iter__(self):
return self
def next(self):
def __next__(self):
"""
Returns:
Record: the next decoded record.
......
# -*- coding: utf-8 -*-
""" invenio_tools.marc12
"""
import re
from base import (is_conference,
is_institute,
is_thesis,
MSG_NO_CONF,
REG_OAI)
from .base import (is_conference,
is_institute,
is_thesis,
MSG_NO_CONF,
REG_OAI)
from exception import Marc12Exception
from inveniostore import InvenioStore
from iterrecord import IterRecord
from recordconf import RecordConf
from recordinst import RecordInst
from recordpubli import RecordPubli
from recordthesis import RecordThesis
from .exception import Marc12Exception
from .inveniostore import InvenioStore
from .iterrecord import IterRecord
from .recordconf import RecordConf
from .recordinst import RecordInst
from .recordpubli import RecordPubli
from .recordthesis import RecordThesis
MSG_DECODING_FAILED = "Record decoding failed."
......@@ -66,14 +65,14 @@ class Marc12(object):
conference = self._get_conference(host, id_conf, key)
# protection id can be a reference to other object like book
if u"111" not in conference:
if "111" not in conference:
return
# copy conference information in the current record
# the conference URL is in 8564u
record[u"111"] = conference[u"111"]
record["111"] = conference["111"]
if "8564" in conference:
record[u"8564"] = conference[u"8564"]
record["8564"] = conference["8564"]
def __call__(self, xml, **kwargs):
"""
......@@ -142,7 +141,7 @@ class Marc12(object):
for conf_id in ids:
xml = cds.get_record(conf_id)
for conference in IterRecord(xml):
if conference._get(u"111", "g") == key:
if conference._get("111", "g") == key:
return conference
raise Marc12Exception(MSG_NO_CONF)
......@@ -154,10 +153,10 @@ class Marc12(object):
In that case the record looks like::
{
u"0248_": {"a": u"oai:cds.cern.ch:1366561"},
u"001": u"1366561",
u"980": {"c": u"DELETED"},
u"970": {"d": u"1366710"}
"0248_": {"a": "oai:cds.cern.ch:1366561"},
"001": "1366561",
"980": {"c": "DELETED"},
"970": {"d": "1366710"}
}
The method replace the old record by the new one, by using the oai URL.
......@@ -178,24 +177,24 @@ class Marc12(object):
not replaced by a new one.
"""
is_deleted = u"980" in record \
and "c" in record[u"980"] and record[u"980"]["c"] == "DELETED"
is_deleted = "980" in record \
and "c" in record["980"] and record["980"]["c"] == "DELETED"
if not is_deleted:
return record
is_replaced = \
u"970" in record and "d" in record[u"970"] \
and u"0248_" in record and "a" in record[u"0248_"]
"970" in record and "d" in record["970"] \
and "0248_" in record and "a" in record["0248_"]
if not is_replaced:
raise Marc12Exception(MSG_DECODING_FAILED)
match = REG_OAI.match(record[u"0248_"]["a"])
match = REG_OAI.match(record["0248_"]["a"])
if match:
cds = InvenioStore(match.group(1))
xml = cds.get_record(record[u"970"]["d"])
new_record = IterRecord(xml).next()
xml = cds.get_record(record["970"]["d"])
new_record = next(IterRecord(xml))
return new_record
raise Marc12Exception(MSG_DECODING_FAILED)
......
# -*- coding: utf-8 -*-
""" invenio_tools.record
"""
import pprint
from base import OAI, OAI_URL, REG_OAI
from .base import OAI, OAI_URL, REG_OAI
class Record(dict):
......@@ -47,14 +46,14 @@ class Record(dict):
# meta data
# the authors of my institutes signing the record
# string containing a list of name separated by a comma
self.my_authors = u""
self.my_authors = ""
def _get(self, field, subfield, force_list=False):
"""Get the value associated to the key C{field} and C{subfield}.
Args:
field (unicode): typical values are ``u"001"``, ``u"700"``,
``u"909CO"``, ....
field (unicode): typical values are ``"001"``, ``"700"``,
``"909CO"``, ....
subfield (str): typical values are ``"a"``, ``"b``", ....
......@@ -64,7 +63,7 @@ class Record(dict):
unicode or list: empty string / list when not defined.
"""
val = u""
val = ""
if field in self and subfield in self[field]:
val = self[field][subfield]
......@@ -102,7 +101,7 @@ class Record(dict):
if match:
return OAI_URL % (match.group(1), match.group(2))
return u""
return ""
def debug(self):
"""Print the record structure on the standard output.
......@@ -125,14 +124,14 @@ class Record(dict):
val = self.primary_oai()
if not val:
self.__host = None
return u""
return ""
match = REG_OAI.match(val)
if match:
self.__host = match.group(1)
return self.__host
return u""
return ""
def id(self):
"""The id of the record in the store.
......@@ -141,7 +140,7 @@ class Record(dict):
unicode: the unique id of the record in the store
"""
return self[u"001"]
return self["001"]
def oai(self):
"""The Open Archive Initiative identifier(s).
......@@ -154,13 +153,13 @@ class Record(dict):
"""
oai_1 = self.primary_oai()
if not oai_1:
return u""
return ""
oai_2 = self.secondary_oai()
if not oai_2:
return oai_1
return u"%s, %s" % (oai_1, oai_2)
return "%s, %s" % (oai_1, oai_2)
def oai_url(self):
"""The Open Archive Initiative identifier URL(s).
......@@ -174,13 +173,13 @@ class Record(dict):
"""
oai_url_1 = self.primary_oai_url()
if not oai_url_1:
return u""
return ""
oai_url_2 = self.secondary_oai_url()
if not oai_url_2:
return oai_url_1
return u"%s, %s" % (oai_url_1, oai_url_2)
return "%s, %s" % (oai_url_1, oai_url_2)
def primary_oai(self):
"""The primary Open Archive Initiative identifier.
......@@ -195,14 +194,14 @@ class Record(dict):
"""
# the location of the OAI information depends on the store
# cds: (248, a), inspirehep: (909C0, o)
if u"0248" in self:
field, subfield = u"0248", "a"
if "0248" in self:
field, subfield = "0248", "a"
elif u"909CO" in self:
field, subfield = u"909CO", "o"
elif "909CO" in self:
field, subfield = "909CO", "o"
else:
return u""
return ""
# standard case
value = self._get(field, subfield)
......@@ -215,7 +214,7 @@ class Record(dict):
if el.endswith(myid):
return el
return u""
return ""
return value
......@@ -254,21 +253,21 @@ class Record(dict):
"""
host = self.host()
if host == u"cds.cern.ch":
invenio_key = u"inspire"
secondary_host = u"inspirehep.net"
if host == "cds.cern.ch":
invenio_key = "inspire"
secondary_host = "inspirehep.net"
elif host == u"inspirehep.net":
invenio_key = u"cds"
secondary_host = u"cds.cern.ch"
elif host == "inspirehep.net":
invenio_key = "cds"
secondary_host = "cds.cern.ch"
else:
return u""
return ""
if u"035" not in self:
return u""
if "035" not in self:
return ""
values = self[u"035"]
values = self["035"]
if isinstance(values, list):
for di in values:
if "9" in di and "a" in di and di["9"].lower() == invenio_key:
......@@ -278,7 +277,7 @@ class Record(dict):
if values["9"].lower() == invenio_key:
return OAI % (secondary_host, values["a"])
return u""
return ""
def secondary_oai_url(self):
"""The Open Archive Initiative URL for the secondary OAI.
......@@ -292,4 +291,4 @@ class Record(dict):
return self._oai_url(self.secondary_oai())
def sysno(self):
return self._get(u"970", "a")
return self._get("970", "a")
# -*- coding: utf-8 -*-
""" invenio_tools.recordconf
"""
import re
from base import REG_YEAR
from .base import REG_YEAR
from datetime import datetime
from plugin_dbui import CLEAN_SPACES
from recordpubli import RecordPubli
from .recordpubli import RecordPubli
REG_DATE = re.compile(r"(\d{4}-\d{2}-\d{2})")
......@@ -47,11 +46,11 @@ class RecordConf(RecordPubli):
"""
if self.host().startswith("inspirehep"):
start = self._get(u"111", "x")
end = self._get(u"111", "y")
start = self._get("111", "x")
end = self._get("111", "y")
if not (REG_DATE.match(start) and REG_DATE.match(end)):
return u""
return ""
ds = datetime.strptime(start, "%Y-%m-%d")
de = datetime.strptime(end, "%Y-%m-%d")
......@@ -63,7 +62,7 @@ class RecordConf(RecordPubli):
value = "%s - %s" % tpl
else:
value = self._get(u"111", "d")
value = self._get("111", "d")
return value
......@@ -87,7 +86,7 @@ class RecordConf(RecordPubli):
unicode:
"""
return self._get(u"111", "g")
return self._get("111", "g")
def conference_location(self):
"""The conference location.
......@@ -96,10 +95,10 @@ class RecordConf(RecordPubli):
unicode: the pattern is ``town, country``
"""
location = self._get(u"111", "c")