recordhep.py 4.34 KB
Newer Older
LE GAC Renaud's avatar
LE GAC Renaud committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
"""recordhep.py

"""
import pprint


class RecordHep(dict):
    """Base class for JSON record coming from inspirehep.net version v2.

    Schema documentation is defined here:
    https://inspire-schemas.readthedocs.io/en/latest/schemas/

    Args:
        recjson (dict):
            meta data from the JSON record returns by the store

    """

    def __init__(self, recjson):

        super().__init__(recjson)

        # meta data
        # the authors of my institutes signing the record
        # string containing a list of name separated by a comma
        self.my_authors = ""

    @staticmethod
    def _oai_url(value):
        """Build the Open Archive Initiative URL.

        Args:
            value (str):
                OAI identifier, *e.g.* ``oai:host:id``

        Returns:
            str:
                the pattern of the string is `https://host/api/literature/id`
                The string is empty when it is not defined or when the value
                is not well formed.

        """

    def debug(self):
        """Print the record structure on the standard output.

        """
        pprint.pprint(self)

    def host(self):
        """The store housing the record.

        Returns:
            str:

        """
        return "inspirehep.net"

    def id(self):
        """The id of the record in the store.

        Returns:
            int:

        """
        return self["control_number"]

    def oai(self):
        """The Open Archive Initiative identifier(s).

        Returns:
            str:
                the primary and secondary OAI identifier are separated
                by a comma. The pattern of the identifier is ``oai:host:id`` or
                an empty string when it is not defined.

        """
        lst = [self.primary_oai(), self.secondary_oai()]
        return ", ".join(lst).strip(", ")

    def oai_url(self):
        """The Open Archive Initiative identifier URL(s).

        Returns:
            str:
                the primary and secondary URLs are separated by a comma.
                The pattern of the URL is ``https://host/api/literature/id``
                or an empty string when it is not defined or when the OAI is
                not well formed.

        """
        lst = [self.primary_oai_url(), self.secondary_oai_url()]
        return ", ".join(lst).strip(", ")

    def primary_oai(self):
        """The primary Open Archive Initiative identifier.

        The primary OAI identifier matches the record identifier.

        Returns:
            str:
                the pattern of the string is ``oai:host:id``.
                It is an empty string when not defined

        """
        return f"oai:inspirehep.net:{self['control_number']}"

    def primary_oai_url(self):
        """The Open Archive Initiative URL for the primary OAI.

        Returns:
            str:
                the pattern of the string is ``https://host/api/literature/id``.
                The string is empty when it is not defined or when the OAI
                is not well formed.

        """
        recid = self["control_number"]
        return f"https://inspirehep.net/api/literature/{recid}"

    def secondary_oai(self):
        """The secondary OAI identifier.

        the secondary OAI identifier corresponds to the record in the
        store, *cds.cern.ch*.

        Returns:
            str:
                the pattern of the string is ``oai:host:id``.
                It is an empty string when not defined

        """
        if "external_system_identifiers" not in self:
            return ""

        for elt in self["external_system_identifiers"]:
            if elt["schema"] == "CDS":
                return f"oai:cds.cern.ch:{elt['value']}"

        return ""

    def secondary_oai_url(self):
        """The Open Archive Initiative URL for the secondary OAI.

        the secondary OAI URL corresponds to the record in the
        store, *cds.cern.ch*.

        Returns:
            str:
                the pattern of the string is ``https://host/record/id``.
                The string is empty when it is not defined or when the OAI
                is not well formed.

        """
        if "external_system_identifiers" not in self:
            return ""

        for elt in self["external_system_identifiers"]:
            if elt["schema"] == "CDS":
                return f"https://cds.cern.ch/record/{elt['value']}"

        return ""