record.py 4.55 KB
Newer Older
1 2 3 4 5 6 7
# -*- coding: utf-8 -*-
""" invenio_tools.record

"""
import pprint


8
from base import OAI_URL, REG_OAI
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23


class Record(dict):
    """MARC record (see U{http://www.loc.gov/marc/bibliographic/}).

    A dictionary with the following structure::

            record[field][subfield] = value

            record[field][subfield] = [val1, val2, ....]

            record[field] = [dict1(subfield1=..., subfield2=...),
                             dict2(subfield1=..., subfield2=...), ...]

    In the MARC standard, the C{field} is a string containing at least three digit
24 25
    while the  C{subfield} is a letter. The type of the C{field} is unicode
    and C{subfield} is string.
26 27

    The class comes with a collection of methods to extract the record
28
    information masking the C{field} and the C{subfield} codification.
29

30
    The relation between methods and MARC fields are the following::
31 32 33 34 35 36 37 38

                              |  CDS    | INSPIREP
        ----------------------+---------+----------
        id                    | 001     |
        oai                   | 0248 a  | 909CO o
        ----------------------+---------+----------

    """
39
    def __init__(self, *args):
40

41
        dict.__init__(self, *args)
42 43 44 45 46 47 48 49 50 51 52 53

        # private cache
        self.__host = None

        # meta data
        # the authors of my institutes signing the record
        # string containing a list of name separated by a comma
        self.my_authors = ''

    def _get(self, field, subfield, force_list=False):
        """Get the value associated to the key C{field} and C{subfield}.

54 55
        @type field: unicode
        @param field: typical values are u"001", u"700", u"909CO", ....
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131

        @type subfield: str
        @param subfield: typical values are "a", "b", ....

        @type force_list: bool
        @param force_list: always return a C{list} when C{True}.

        @rtype: unicode or list
        @return: empty string / list when not defined.

        """
        val = u''

        if field in self and subfield in self[field]:
            val = self[field][subfield]

        elif field in self and isinstance(self[field], list):
            val = []
            for el in self[field]:
                if subfield in el:
                    val.append(el[subfield])

        if force_list and not isinstance(val, list):
            if val:
                val = [val]
            else:
                val = []

        return val

    def debug(self):
        """Print the record structure on the standard output.

        """
        pprint.pprint(self)

    def host(self):
        """The host housing the record.

        @rtype: unicode
        @return: inspirehep.net or cds.cern.ch or an empty string
        when not defined.

        """
        # The value is compute once and cache in self.__host
        if self.__host is not None:
            return self.__host

        val = self.oai()
        if not val:
            self.__host = u''
            return self.__host

        if isinstance(val, unicode):
            match = REG_OAI.match(val)
            if match:
                self.__host = match.group(1)
                return self.__host

        # NOTE: in few case we can have a list
        # see cds.cern.ch/record/1513204
        # [u'oai:cds.cern.ch:1513204', u'oai:cds.cern.ch:1512766']
        if isinstance(val, list):
            for elt in val:
                if elt.endswith(self.id()):
                    match = REG_OAI.match(elt)
                    if match:
                        self.__host = match.group(1)
                        return self.__host

        return u''

    def id(self):
        """The id of the record in the store.

        @rtype: unicode
132
        @return: the unique id of the record in the store
133 134

        """
135
        return self[u"001"]
136 137

    def oai(self):
138
        """The Open Archive Initiative identifier
139 140 141 142 143 144 145

        @rtype: unicode
        @return: the pattern of the string is "oai:host:record_id"

        """
        # the location of the oai information depends on the store
        # cds: (248, a), inspirehep: (909C0, o)
146 147
        if u"0248" in self:
            return self._get(u"0248", "a")
148

149 150
        elif u"909CO" in self:
            return self._get(u"909CO", "o")
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165

    def oai_url(self):
        """The Open Archive Initiative URL

        @rtype: str
        @return: the URL of the record in the store, e.g
        "http://inspirehep.net/record/123456" or an empty string
        when it is not defined

        """
        val = self.oai()
        m = REG_OAI.match(val)
        return OAI_URL % (m.group(1), m.group(2))

    def sysno(self):
166
        return self._get(u"970", "a")