recordcdsconfpaper.py 7.89 KB
Newer Older
1
""" store_tools.recordcdsconfpaper
2 3

"""
4 5 6 7
import re

from .base import REG_CONF, REG_YEAR, T4, T6
from .cdsstore import CdsStore
8
from plugin_dbui import CLEAN_SPACES
9 10 11
from .recordcdspubli import RecordCdsPubli

REX_DATE8 = re.compile(r"(\d{4})(\d{2})(\d{2})")
12 13


14
class RecordCdsConfPaper(RecordCdsPubli):
LE GAC Renaud's avatar
LE GAC Renaud committed
15 16
    """The record describing a conference talk or a proceeding.

17 18 19 20 21 22 23 24 25 26 27
    Attributes:
        conference (dict or None):
            the conference metadata:
                * addresses: [{cities: [], country: str, ...}, ...]
                * cnum: str
                * control_number: int
                * closing_date: str
                * opening_date: str
                * titles: [{value: str}, ...]
                * urls: [{value: str}, ...]
                * year
28

29
    """
LE GAC Renaud's avatar
LE GAC Renaud committed
30

31
    def __init__(self, recjson):
32

33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
        super().__init__(recjson)

        self.conference = None
        self._process_conference_data()

    def _process_conference_data(self):
        """Append the conference data to the record.

        """
        logger = self.logger
        logger.debug(f"{T4}process conference data")

        if "aleph_linking_page" not in self:
            logger.debug(f"{T6}no field 'aleph_linking_page'")
            return

        dct = self.get("aleph_linking_page", {})
        conf_id = dct.get("sysno", None)
        conf_key = dct.get("up_link", None)

        if conf_id is None and conf_key is None:
            logger.debug(f"{T6}no conference id and key")
            return

        # ........................................................................
        #
        # Get conference data first id and then by key
        #
        store = CdsStore("cds.cern.ch")

        if conf_id is not None:
            logger.debug(f"{T6}search by conference by id {conf_id}")
            recjson = store.get_record(conf_id)

            if recjson["recid"] != conf_id:
                logger.debug(f"{T6}failed to retrieve conference by id")

            if recjson.get("meeting_name", None) is None:
                logger.debug(f"{T6}no field 'meeting_name'")
                return

        elif conf_key is not None:
            logger.debug(f"{T6}search by conference by key {conf_key}")
            ids = store.get_ids(p=conf_key)

            mtch = False
            for cid in ids:
                recjson = store.get_record(cid)

                for elt in recjson.get("meeting_name", []):
                    ck = elt.get("coference_code", "")
                    if ck == conf_key:
                        mtch = True
                        break

                if mtch is True:
                    break

            if mtch is False:
                logger.debug(f"{T6}failed to retrieve conference by key")
                return

        # ........................................................................
        #
        # Decode conference data
        # Convert structure to the one provides by the new inspirehep.net
        #
        data = None
        for elt in recjson["meeting_name"]:
            if "year" in elt:
                data = elt
                break

        if data is None:
            logger.debug(f"{T6}conference data not found")
            return

        city, country = data.get("location", ",").split(",")

        dct = {
            "addresses": [{
                "cities": [city.strip()],
                "country": country.strip()}],
            "cnum": data.get("coference_code"),
            "closing_date": data.get("closing_date", None),
            "opening_date": data.get("opening_date", None),
            "titles": [{"value": data.get("meeting", None)}],
            "urls": [recjson.get("url", {}).get("url", None)],
            "year": data.get("year", None)}

        # date format issue YYYYMMDD to YYYY-MM-DD
        for k in ("closing_date", "opening_date"):
            mtch = REX_DATE8.match(dct[k])
            if mtch:
                dct[k] = "-".join(mtch.groups())

        # ........................................................................
        #
        # Append conference data
        self.conference = dct
133

LE GAC Renaud's avatar
LE GAC Renaud committed
134 135
    def conference_country(self):
        """The country where the conference took place.
136

137
        Returns:
138
            str:
LE GAC Renaud's avatar
LE GAC Renaud committed
139 140
                the filter *CLEAN_SPACES* is applied.
                The string is empty when the country is not defined.
141 142

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
143 144 145 146 147 148 149 150
        # NOTE:
        #  * country is extract from the location since it is defined
        #    for both cds and inspire store
        #
        #  * The subfield country contains the country code (IT? FR, ..).
        #    It is only defined for cds
        #
        location = self.conference_location()
151

LE GAC Renaud's avatar
LE GAC Renaud committed
152
        if len(location) == 0:
153
            return ""
154

LE GAC Renaud's avatar
LE GAC Renaud committed
155
        return CLEAN_SPACES(location.split(",")[-1])
156

LE GAC Renaud's avatar
LE GAC Renaud committed
157 158
    def conference_dates(self):
        """The dates of the conference.
159

LE GAC Renaud's avatar
LE GAC Renaud committed
160
        Returns:
161
            str:
LE GAC Renaud's avatar
LE GAC Renaud committed
162 163 164
                the usual pattern is ``6-5 March 2012`` but it can varies
                between records and between stores since it is not
                standardise.
165

LE GAC Renaud's avatar
LE GAC Renaud committed
166 167
        """
        # for list assume that the first item is the correct one
168
        val = self._get("meeting_name", "date")
LE GAC Renaud's avatar
LE GAC Renaud committed
169 170
        val = (val[0] if isinstance(val, list) and len(val) > 0 else val)
        return val
171

LE GAC Renaud's avatar
LE GAC Renaud committed
172 173
    def conference_id(self):
        """The conference identifier used in the store.
174

175
        Returns:
176
            int or None
177 178

        """
179 180 181 182
        if "meeting_note" not in self:
            return None

        return self["meeting_note"].get("recid")
183 184 185 186

    def conference_key(self):
        """The conference key used in the store.

187
        Returns:
188
            str:
LE GAC Renaud's avatar
LE GAC Renaud committed
189
                empty string when not defined
190 191

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
192 193
        # algorithm depends on the store
        # CDS
194 195
        if "aleph_linking_page" in self:
            value = self["aleph_linking_page"]["up_link"]
LE GAC Renaud's avatar
LE GAC Renaud committed
196 197

        # INSPIRE
198 199
        elif "publication_info" in self:
            df = self["publication_info"]
LE GAC Renaud's avatar
LE GAC Renaud committed
200 201 202 203 204 205
            cnums = df[df.cnum.str.match(REG_CONF.pattern) == True].cnum

            if len(cnums) == 1:
                value = cnums.iloc[0]

        else:
206
            value = ""
LE GAC Renaud's avatar
LE GAC Renaud committed
207 208

        return value
209 210 211 212

    def conference_location(self):
        """The conference location.

213
        Returns:
214
            str:
LE GAC Renaud's avatar
LE GAC Renaud committed
215 216 217
                - the pattern is ``town, country``
                - empty string when more than one location found
                - empty string when not defined
218 219

        """
220 221
        location = self._get("meeting_name", "location", force_list=True)
        location = (location[0] if len(location) == 1 else "")
222 223 224 225 226 227

        return CLEAN_SPACES(location)

    def conference_title(self):
        """The title of the conference.

228
        Returns:
229
            str:
230 231

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
232
        # for list assume that the first item is the correct one
233
        value = self._get("meeting_name", "meeting")
LE GAC Renaud's avatar
LE GAC Renaud committed
234 235
        value = (value[0] if isinstance(value, list) else value)
        return CLEAN_SPACES(value)
236 237 238 239

    def conference_town(self):
        """The town where the conference took place.

240
        Returns:
241
            str:
LE GAC Renaud's avatar
LE GAC Renaud committed
242
                empty string when it is not defined.
243 244

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
245 246 247
        location = self.conference_location()

        if len(location) == 0:
248
            return ""
LE GAC Renaud's avatar
LE GAC Renaud committed
249 250

        return CLEAN_SPACES(location.split(",")[0])
251 252 253 254

    def conference_url(self):
        """The URL of the conference home page.

255
        Returns:
256
            str:
LE GAC Renaud's avatar
LE GAC Renaud committed
257
                select arbitrarily the first URL when severals
LE GAC Renaud's avatar
LE GAC Renaud committed
258 259
                are founded. The string is empty string when the URL
                is not defined.
260 261

        """
262 263 264 265
        if "meeting_note" not in self:
            return ""

        return self["meeting_note"].get("url", "")
266 267 268 269

    def conference_year(self):
        """The year of the conference.

270
        Returns:
271
            str:
LE GAC Renaud's avatar
LE GAC Renaud committed
272
                empty string when it is not defined.
273 274

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
275
        # from the conference dates
276 277 278 279
        match = REG_YEAR.search(self.conference_dates())
        if match:
            return match.group(1)

280
        return ""