record.py 8.28 KB
Newer Older
1 2 3 4 5 6 7
# -*- coding: utf-8 -*-
""" invenio_tools.record

"""
import pprint


8 9
from base import OAI, OAI_URL, REG_OAI
from __builtin__ import isinstance
10 11 12


class Record(dict):
13
    """The base class for MARC record.
14 15 16 17 18 19 20 21 22
    A dictionary with the following structure::

            record[field][subfield] = value

            record[field][subfield] = [val1, val2, ....]

            record[field] = [dict1(subfield1=..., subfield2=...),
                             dict2(subfield1=..., subfield2=...), ...]

23 24 25
    In the  `MARC <http://www.loc.gov/marc/>`_ standard, the ``field``
    is a string containing at least three digit while the  ``subfield`` is
    a letter. The type of the ``field`` is unicode and ``subfield`` is string.
26 27

    The class comes with a collection of methods to extract the record
28
    information masking the ``field`` and the ``subfield`` codification.
29

30
    The relation between methods and MARC fields are the following::
31

32 33 34 35 36 37 38
        +---------------+--------+----------+
        |               | CDS    | INSPIREP |
        +---------------+--------+----------+
        | id            | 001    |          |
        | primary oai   | 0248 a | 909CO o  |
        | secondary oai | 035  a | 035 a    |
        +---------------+--------+----------+
39 40

    """
41
    def __init__(self, *args):
42

43
        dict.__init__(self, *args)
44 45 46 47 48 49 50

        # private cache
        self.__host = None

        # meta data
        # the authors of my institutes signing the record
        # string containing a list of name separated by a comma
51
        self.my_authors = u""
52 53 54 55

    def _get(self, field, subfield, force_list=False):
        """Get the value associated to the key C{field} and C{subfield}.

56 57 58
        Args:
            field (unicode): typical values are ``u"001"``, ``u"700"``,
                ``u"909CO"``, ....
59

60
            subfield (str): typical values are ``"a"``, ``"b``", ....
61

62
            force_list (bool): always return a *list* when ``True``.
63

64 65
        Returns:
            unicode or list: empty string / list when not defined.
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86

        """
        val = u''

        if field in self and subfield in self[field]:
            val = self[field][subfield]

        elif field in self and isinstance(self[field], list):
            val = []
            for el in self[field]:
                if subfield in el:
                    val.append(el[subfield])

        if force_list and not isinstance(val, list):
            if val:
                val = [val]
            else:
                val = []

        return val

87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
    def _oai_url(self, value):
        """Build the Open Archive Initiative URL.

        Args:
            value (unicode): OAI identifier, *e*g* ``oai:host:id``

        Returns:
            unicode: the pattern of the string is ``http://host/record/id``.
                The string is empty when it is not defined or when the value
                is not well formed.

        """
        match = REG_OAI.match(value)
        if match:
            return OAI_URL % (match.group(1), match.group(2))

        return u""

105 106 107 108 109 110 111
    def debug(self):
        """Print the record structure on the standard output.

        """
        pprint.pprint(self)

    def host(self):
112
        """The store housing the record.
113

114 115 116
        Returns:
            unicode: ``inspirehep.net`` or ``cds.cern.ch`` or an empty string
                when not defined.
117 118 119 120 121 122

        """
        # The value is compute once and cache in self.__host
        if self.__host is not None:
            return self.__host

123
        val = self.primary_oai()
124
        if not val:
125 126
            self.__host = None
            return u''
127

LE GAC Renaud's avatar
LE GAC Renaud committed
128 129 130 131
        match = REG_OAI.match(val)
        if match:
            self.__host = match.group(1)
            return self.__host
132 133 134 135 136 137

        return u''

    def id(self):
        """The id of the record in the store.

138 139
        Returns:
            unicode: the unique id of the record in the store
140 141

        """
142
        return self[u"001"]
143 144

    def oai(self):
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
        """The Open Archive Initiative identifier(s).

        Returns:
            unicode: the primary and secondary OAI identifier are separated
                by a comma. The pattern of the identifier is ``oai:host:id`` or
                an empty string when it is not defined.

        """
        oai_1 = self.primary_oai()
        if not oai_1:
            return u""

        oai_2 = self.secondary_oai()
        if not oai_2:
            return oai_1

        return u"%s, %s" % (oai_1, oai_2)

    def oai_url(self):
        """The Open Archive Initiative identifier URL(s).

        Returns:
            unicode: the primary and secondary URLs are separated by a comma.
                The pattern of the URL is ``http://host/record/id`` or
                an empty string when it is not defined or when the OAI is
                not well formed.

        """
        oai_url_1 = self.primary_oai_url()
        if not oai_url_1:
            return u""

        oai_url_2 = self.secondary_oai_url()
        if not oai_url_2:
            return oai_url_1

        return u"%s, %s" % (oai_url_1, oai_url_2)

    def primary_oai(self):
        """The primary Open Archive Initiative identifier.

        The primary OAI identifier corresponds to the record of the store.
        It contains the value return by the method :meth:`.id`.
188

189
        Returns:
190
            unicode: the pattern of the string is ``oai:host:id``.
191
                It is an empty string when not defined
192 193

        """
194
        # the location of the OAI information depends on the store
195
        # cds: (248, a), inspirehep: (909C0, o)
196
        if u"0248" in self:
LE GAC Renaud's avatar
LE GAC Renaud committed
197
            field, subfield = u"0248", "a"
198

199
        elif u"909CO" in self:
LE GAC Renaud's avatar
LE GAC Renaud committed
200 201 202 203 204
            field, subfield = u"909CO", "o"

        else:
            return u""

205
        # standard case
LE GAC Renaud's avatar
LE GAC Renaud committed
206 207
        value = self._get(field, subfield)

208 209
        # in some case OAI is a list
        # select the OAI corresponding to the current ID.
LE GAC Renaud's avatar
LE GAC Renaud committed
210 211 212 213 214 215 216 217 218
        if isinstance(value, list):
            myid = self.id()
            for el in value:
                if el.endswith(myid):
                    return el

            return u""

        return value
219

220 221
    def primary_oai_url(self):
        """The Open Archive Initiative URL for the primary OAI.
222

223 224 225 226 227
        Note:
            A record can be deleted and replaced by a new one.
            In that case the OAI is not changed but the record has
            a new *id* and new *URL* which is return by this method.

228
        Returns:
229 230 231 232 233
            unicode: the pattern of the string is ``http://host/record/id``.
                The string is empty when it is not defined or when the OAI
                is not well formed.

        """
234 235 236 237 238 239 240 241
        oai = self.primary_oai()
        rec_id = self.id()

        if oai.endswith(rec_id):
            return self._oai_url(self.primary_oai())

        else:
            return OAI_URL % (self.host(), rec_id)
242

243 244 245 246 247 248 249 250 251
    def secondary_oai(self):
        """The secondary OAI identifier.

        If the current store is *cds.cern.ch*, the secondary OAI identifier
        corresponds to the record in the other store, *inspirehep.net*.

        Returns:
            unicode: the pattern of the string is ``oai:host:id``.
                It is an empty string when not defined
LE GAC Renaud's avatar
LE GAC Renaud committed
252

253
        """
254 255 256 257
        host = self.host()
        if host == u"cds.cern.ch":
            invenio_key = u"inspire"
            secondary_host = u"inspirehep.net"
LE GAC Renaud's avatar
LE GAC Renaud committed
258

259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
        elif host == u"inspirehep.net":
            invenio_key = u"cds"
            secondary_host = u"cds.cern.ch"

        else:
            return u""

        if u"035" not in self:
            return u""

        values = self[u"035"]
        if isinstance(values, list):
            for di in values:
                if "9" in di and "a" in di and di["9"].lower() == invenio_key:
                    return OAI % (secondary_host, di["a"])

        elif isinstance(values, dict) and "9" in values and "a" in values:
            if values["9"].lower() == invenio_key:
                return OAI % (secondary_host, values["a"])
LE GAC Renaud's avatar
LE GAC Renaud committed
278 279

        return u""
280

281 282 283 284 285 286 287 288 289 290 291
    def secondary_oai_url(self):
        """The Open Archive Initiative URL for the secondary OAI.

        Returns:
            unicode: the pattern of the string is ``http://host/record/id``.
                The string is empty when it is not defined or when the OAI
                is not well formed.

        """
        return self._oai_url(self.secondary_oai())

292
    def sysno(self):
293
        return self._get(u"970", "a")