Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

recordpubli.py 24.7 KB
Newer Older
1 2 3 4 5 6
""" invenio_tools.recordpubli

"""
import re


7 8 9 10
from .base import (ARXIV,
                   ARXIV_PDF,
                   REG_ARXIV_NUMBER,
                   REG_YEAR)
11

12
from .exception import RecordException
13
from filters import CLEAN_COLLABORATION
14
from numpy import NaN
15
from pandas import concat, DataFrame
16
from plugin_dbui import as_list, CLEAN_SPACES
17
from .record import Record
18

19 20 21 22 23 24 25 26

AUTHOR_FORMATS = [
    "First, Last",
    "F. Last",
    "Last",
    "Last, First",
    "Last F."]

LE GAC Renaud's avatar
LE GAC Renaud committed
27 28 29
# decode publication reference:
# Phys. Rev. Lett. 113, 032001 (2014)
# Eur. Phys. J. C (2014) 74:2883
30 31 32 33
_ref1 = r"(?P<p>[A-Za-z\. ]+) +(?P<v>\d+),? +(?P<c>[\d-]+) +\((?P<y>[\d]+)\)"
_ref2 = r"(?P<p>[A-Za-z\. ]+) +\((?P<y>\d+)\) +(?P<v>[\d]+):(?P<c>[\d-]+)"
DECODE_REF = [re.compile(_ref1), re.compile(_ref2)]

34 35
MSG_INVALID_FMT = "Invalid format for author"

LE GAC Renaud's avatar
LE GAC Renaud committed
36 37
# the keys containing paper reference
PAPER_REFERENCE_KEYS = {"pagination", "title", "volume", "year"}
38

LE GAC Renaud's avatar
LE GAC Renaud committed
39 40
# extract initial of a first name
REG_INITIAL = initial = r"^(\w+)\.?(\-)* *(\w+)*\.?$"
41 42


43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
def to_initial(x, y, z):
    """Help function to extract initial from a first name split in x, y and z:

        Albert                (x="Albert", y="", z="")
        Antonio Augusto       (x="Antonio", y="", z="Augusto")
        Jean-Pierre           (x="Jean", y="-", z="Pierre")

    Args:
        x (str): first part
        y (str): separator
        z (str): second part

    Returns:
        str

    """
    if z == "":
        return "%s." % x[0:1]

    if y == "":
        return "%s. %s." % (x[0:1], z[0:1])
    else:
        return "%s.%s%s." % (x[0:1], y[0:1], z[0:1])

67

68 69 70 71
def to_str(x):
    return ("|".join(x) if isinstance(x, list) else x)


72
class RecordPubli(Record):
LE GAC Renaud's avatar
LE GAC Renaud committed
73 74 75 76
    """The record describes an article, preprint, proceeding, report and talk.
    The main ``field`` and ``subfield`` are::

         +---------------------------------+----------------------------------+
77
         | field                           | subfield                         |
LE GAC Renaud's avatar
LE GAC Renaud committed
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
         +---------------------------------+----------------------------------+
         | FIXME_OAI (inspire)             | id                               |
         | abstract                        |                                  |
         | accelerator_experiment          |                                  |
         | agency_code (cds)               |                                  |
         | authors                         | INSPIRE_number, affiliation,     |
         |                                 | control_number, first_name,      |
         |                                 | full_name, last_name,            |
         |                                 | relator_name (phd director)      |
         | base (cds)                      |                                  |
         | collection                      |                                  |
         | comment                         |                                  |
         | copyright_status (cds)          |                                  |
         | corporate_name                  | collaboration                    |
         | creation_date                   |                                  |
         | doi                             |                                  |
         | email_message (cds)             |                                  |
         | filenames                       |                                  |
         | files                           | comment, description, eformat,   |
         |                                 | full_name, full_path, magic,     |
         |                                 | name, path, size, status,        |
         |                                 | subformat, superformat, type,    |
         |                                 | url, version                     |
         | filetypes                       |                                  |
         | imprint                         |                                  |
         | keywords                        |                                  |
         | language (cds)                  |                                  |
         | license                         |                                  |
         | number_of_authors               |                                  |
         | number_of_citations             |                                  |
         | number_of_comments              |                                  |
         | number_of_reviews               |                                  |
         | oai (cds)                       | value                            |
         | other_report_number (cds)       |                                  |
         | persistent_identifiers_keys     |                                  |
         | physical_description            |                                  |
         | prepublication                  | date, publisher_name, place      |
         | primary_report_number           |                                  |
         | publication_info                | pagination, title, volume, year  |
         | recid                           | none                             |
         | reference (inspire)             |                                  |
         | report_number (cds)             | internal, report_number          |
         | source_of_acquisition (inspire) |                                  |
         | status_week (cds)               |                                  |
         | subject                         |                                  |
         | system_control_number           | institute, value or canceled     |
         | thesaurus_terms                 |                                  |
         | title                           | title                            |
         | title_additional (inspire)      |                                  |
         | url (cds)                       | description, url                 |
         | version_id                      |                                  |
         +---------------------------------+----------------------------------+
130 131

    """
132 133
    def __init__(self, *args):

134 135
        self._last_fmt_author = "Last, First"

136 137
        Record.__init__(self, *args)
        self._process_authors()
LE GAC Renaud's avatar
LE GAC Renaud committed
138
        self._process_publication_info()
139 140 141 142

    def _process_authors(self):
        """Convert authors information into DataFrame:

143 144 145
        Authors and their affiliations are stored in DataFrame with the
        following structure:

LE GAC Renaud's avatar
LE GAC Renaud committed
146 147 148 149 150 151 152 153 154 155
            +---------------+--------------------------------+
            | column        |                                |
            +---------------+--------------------------------+
            | affiliation   | value separated by "|"         |
            | first_name    | first name                     |
            | fmt_name      | formated name                  |
            | full_name     | Last, First                    |
            | last_name     | family name                    |
            | relator_name  | equal to dir. for phd director |
            +---------------+--------------------------------+
156

LE GAC Renaud's avatar
LE GAC Renaud committed
157 158 159 160
        Note:
            After running this method, the field ``authors`` is always defined.
            It contains one entry with empty strings when the field does not
            exist.
161

LE GAC Renaud's avatar
LE GAC Renaud committed
162 163 164 165 166 167 168 169 170
        """
        if u"authors" not in self:
            cols = ["affiliation",
                    "first_name",
                    "fmt_name",
                    "full_name",
                    "last_name"]
            self[u"authors"] = DataFrame([[""]*len(cols)], columns=cols)
            return
171

LE GAC Renaud's avatar
LE GAC Renaud committed
172 173
        data = self[u"authors"]
        data = (data if isinstance(data, list) else [data])
174

LE GAC Renaud's avatar
LE GAC Renaud committed
175
        df = DataFrame(data)
176

LE GAC Renaud's avatar
LE GAC Renaud committed
177 178 179 180 181 182
        # drop useless columns
        refcols = ["affiliation",
                   "first_name",
                   "full_name",
                   "last_name",
                   "relator_name"]
183

LE GAC Renaud's avatar
LE GAC Renaud committed
184 185
        columns = df.columns
        df = df.drop(columns.difference(refcols), axis="columns")
186

LE GAC Renaud's avatar
LE GAC Renaud committed
187 188 189 190
        # protection -- affiliation not defined
        if "affiliation" not in columns:
            dfa = DataFrame([""]*len(df), columns=["affiliation"])
            df = concat([df, dfa], axis="columns")
191

LE GAC Renaud's avatar
LE GAC Renaud committed
192 193 194 195
        # convert list of affiliation to string separated by |
        df.affiliation = (df.affiliation
                          .fillna("")
                          .apply(lambda x: to_str(x)))
196

LE GAC Renaud's avatar
LE GAC Renaud committed
197 198
        # add the column fmt_name
        df["fmt_name"] = df.full_name
199

LE GAC Renaud's avatar
LE GAC Renaud committed
200 201
        # replace
        self[u"authors"] = df
202

LE GAC Renaud's avatar
LE GAC Renaud committed
203 204
    def _process_publication_info(self):
        """Convert publication_info into DataFrame:
205

LE GAC Renaud's avatar
LE GAC Renaud committed
206 207 208
            Note:
                * the field is a list when there are eratum
                * in some case the subfield year is a list (cds 1951625)
209

LE GAC Renaud's avatar
LE GAC Renaud committed
210 211
        publication information are stored in DataFrame with the
        following structure:
212

LE GAC Renaud's avatar
LE GAC Renaud committed
213 214 215 216 217 218 219 220
            +------------+--------------------------------+
            | column     |                                |
            +------------+--------------------------------+
            | title      | abbreviation of the publisher  |
            | volume     | volume                         |
            | year       | year of publication            |
            | pagination | page number or ranges          |
            +------------+--------------------------------+
221

LE GAC Renaud's avatar
LE GAC Renaud committed
222 223 224 225
        Note:
            * After running this method, the field ``publication_info``
              is always defined. It contains one entry with empty strings
              when the field does not exist.
226

LE GAC Renaud's avatar
LE GAC Renaud committed
227 228
            * In order to deal with erratum entry are sorter by year
              and volume.
229

LE GAC Renaud's avatar
LE GAC Renaud committed
230 231 232 233 234 235
        """
        if u"publication_info" not in self:
            cols = ["title",
                    "volume",
                    "year",
                    "pagination"]
236

LE GAC Renaud's avatar
LE GAC Renaud committed
237 238
            self[u"publication_info"] = \
                DataFrame([[""]*len(cols)], columns=cols)
239

LE GAC Renaud's avatar
LE GAC Renaud committed
240
            return
241

LE GAC Renaud's avatar
LE GAC Renaud committed
242 243
        data = self[u"publication_info"]
        data = (data if isinstance(data, list) else [data])
244

LE GAC Renaud's avatar
LE GAC Renaud committed
245
        df = DataFrame(data)
246

LE GAC Renaud's avatar
LE GAC Renaud committed
247 248 249 250
        # protection -- list of year, e.g. [2014, 2014] (cds 1951625)
        df["year"] = \
            df.year.apply(
                lambda x: (", ".join(set(x)) if isinstance(x, list) else x))
251

LE GAC Renaud's avatar
LE GAC Renaud committed
252 253
        # erratum -- sort by year and volume
        df = df.sort_values(["year", "volume"])
254

LE GAC Renaud's avatar
LE GAC Renaud committed
255 256
        # replace
        self[u"publication_info"] = df
257

258
    def authors(self, sep=", ", sort=False):
259 260
        """The author(s) signing the publication.

261
        Args:
262
            sep (str):
263
                string separating author names. The default is the comma.
LE GAC Renaud's avatar
LE GAC Renaud committed
264

265 266 267
            sort (bool):
                sort authors by family name when true otherwise use the
                order of authors at the creation of the record
268

269
        Returns:
270
            str:
271
                * Author names are separated by the ``sep`` argument.
272
                * The string is empty when there is no authors.
273 274

        """
275 276
        li = self.authors_as_list(sort=sort)
        return sep.join(li)
277

278
    def authors_as_list(self, sort=False):
279 280
        """The list of author(s) signing the publication.

281 282 283 284 285
        Args:
            sort (bool):
                sort authors by family name when true otherwise use the
                order of authors at the creation of the record

286
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
287 288
            list:
                the list is empty when authors are not defined.
289 290

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
291 292
        df = self[u"authors"]

293
        if sort:
LE GAC Renaud's avatar
LE GAC Renaud committed
294
            li = (df[["last_name", "fmt_name"]]
295 296 297 298 299
                  .sort_values(by="last_name")
                  .fmt_name
                  .tolist())

        else:
LE GAC Renaud's avatar
LE GAC Renaud committed
300
            li = (df.fmt_name
301 302
                  .sort_index()
                  .tolist())
303 304 305 306 307

        if len(li) == 1 and li[0] == "":
            li = []

        return li
308 309 310 311

    def collaboration(self):
        """The collaboration(s) signing the publication.

312
        Returns:
313
            str:
314 315
                * names of collaboration are separated by a comma.
                * The filter CLEAN_COLLABORATION is applied.
316 317

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
318
        li = self._get(u"corporate_name", u"collaboration", force_list=True)
LE GAC Renaud's avatar
LE GAC Renaud committed
319
        return CLEAN_COLLABORATION(", ".join(li))
320

321
    def find_affiliation(self, pattern):
322
        """Find affiliation matching the regular expression *pattern*.
323

324
        Args:
325
            pattern (str):
326
                regular expression defining the affiliation keys.
327 328 329
                It has to be build for an exact match namely containing
                start and end of string. This is reuqired to separate
                `Ecole Plolytechnique` from `Ecole Polytechnique, Lausanne`.
330

331
        Returns:
332
            str:
333
                - the affiliation or the first one when several are found.
334
                - empty string when nothing is found.
335 336

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
337 338 339 340 341 342 343
        df = self[u"authors"]

        # modify the pattern to capture group
        pattern = "(%s)" % pattern

        data = (df.affiliation.str.extract(pattern, expand=False)
                .dropna())
344

345
        return (data[0] if len(data) > 0 else "")
346

347
    def find_authors(self, pattern, sep=", ", sort=False):
348
        """Find authors containing the regular expression *pattern*.
349
        The search is performed on the formatted name.
350

351
        Args:
352
            pattern (str):
353
                regular expression defining the author name(s).
LE GAC Renaud's avatar
LE GAC Renaud committed
354 355

            sep (unicode):
356
                string separating author names. The default is the comma.
LE GAC Renaud's avatar
LE GAC Renaud committed
357

358 359 360
            sort (bool):
                sort authors by family name when true otherwise use the
                order of authors at the creation of the record
361

362
        Returns:
363
            str:
364
                * Author names are separated by ``sep`` argument.
365
                * The string is empty when nothing is found.
366 367

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
368
        df = self[u"authors"]
369

370 371 372 373 374 375
        query = df.fmt_name.str.contains(pattern)

        if sort:
            data = (df.loc[query, ["last_name", "fmt_name"]]
                    .sort_values(by="last_name")
                    .fmt_name)
376

377 378 379 380 381
        else:
            data = (df.loc[query, ["fmt_name"]]
                    .sort_index()
                    .fmt_name)

382
        return ("" if len(data) == 0 else sep.join(data))
383

384
    def find_authors_by_affiliation(self, pattern, sep=", ", sort=False):
385 386
        """Find authors belonging to a given institute(s) defined by a regular
        expression.
387

388
        Args:
389
            pattern (str):
390
                regular expression defining the affiliation keys
391
                for the institute(s).
LE GAC Renaud's avatar
LE GAC Renaud committed
392 393

            sep (unicode):
394
                string separating author names. The default is the comma.
LE GAC Renaud's avatar
LE GAC Renaud committed
395

396 397 398
            sort (bool):
                sort authors by family name when true otherwise use the
                order of authors at the creation of the record
399

400
        Returns:
401
            str:
402
                * Author names are separated by the ``sep`` argument.
403
                * Author are sorted according to their family name.
404
                * Empty string when authors are not found.
405 406

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
407 408 409
        df = self[u"authors"]

        query = df.affiliation.str.contains(pattern)
410

411 412 413 414 415 416 417 418 419 420
        if sort:
            data = (df.loc[query, ["last_name", "fmt_name"]]
                    .sort_values(by="last_name")
                    .fmt_name)

        else:
            data = (df.loc[query, ["fmt_name"]]
                    .sort_index()
                    .fmt_name)

421
        return (sep.join(data) if len(data) > 0 else "")
422 423 424 425

    def first_author(self):
        """The name of the first author.

426
        Returns:
427
            str:
428
                - Empty string when the first author is not defined.
429 430

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
431
        return self[u"authors"].fmt_name.iloc[0]
432 433 434 435

    def first_author_institutes(self):
        """The institute(s) associated to the first author.

436
        Note:
437 438
            Search is performed via the affiliation defined by the "u" key
            of the author field.
439

440
        Returns:
441
            str:
442
                - names are separated by ``|``.
443
                - The string is empty when institutes are not defined.
444 445

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
446
        val = self[u"authors"].affiliation.iloc[0]
447
        return ("" if val == NaN else val)
448 449 450 451

    def institutes(self):
        """The list of institute signing the publication.

452
        Note:
453
            Name of institute are given by the affiliation defined by
LE GAC Renaud's avatar
LE GAC Renaud committed
454
            the "affiliation" key of the author field.
455

456
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
457 458
            list:
                the list is sort in alphabetic order.
459 460

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
461 462
        df = self[u"authors"]

463
        # expand multi-affiliation (one per column)
LE GAC Renaud's avatar
LE GAC Renaud committed
464
        df = df.affiliation.str.split("|", expand=True)
465

466 467 468 469 470 471
        # merge all columns into a single one,
        # sort and remove duplicate entries
        li = [df[el].dropna() for el in df.columns]
        df = (concat(li, ignore_index=True)
              .sort_values()
              .unique())
472

473
        return df.tolist()
474

475
    def is_affiliations(self):
476
        """``True`` when affiliations are defined for authors.
477

478
        Note:
LE GAC Renaud's avatar
LE GAC Renaud committed
479 480
            This is a fast algorithm checking that the ``affiliation`` field
            exists. To check that the affiliation is defined for all authors,
481
            uses the method :func:`is_affiliation_for_all`.
482

483 484
        Returns:
            bool:
485 486

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
487
        df = self[u"authors"]
488

LE GAC Renaud's avatar
LE GAC Renaud committed
489
        if len(df) == 1 and df.affiliation.iloc[0] == "":
490
            return False
491 492 493

        return True

494
    def is_affiliation_for_all(self):
495
        """``True`` when affiliation are defined for all authors.
496

497 498
        Return:
            bool:
499 500

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
501
        df = self[u"authors"]
502

LE GAC Renaud's avatar
LE GAC Renaud committed
503 504
        query = df.affiliation.isin(["", NaN])
        return df.affiliation[query].size == 0
505

506
    def is_authors(self):
507
        """``True`` when authors are defined.
508

509 510
        Returns:
            bool:
511 512

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
513
        df = self[u"authors"]
514

LE GAC Renaud's avatar
LE GAC Renaud committed
515 516
        cols = {"first_name", "full_name", "last_name"}
        if len(df.columns.intersection(cols)) != 3:
517 518
            return False

LE GAC Renaud's avatar
LE GAC Renaud committed
519
        if len(df) == 1 and df.full_name.iloc[0] == "":
520 521 522
            return False

        return True
523

524
    def is_published(self):
LE GAC Renaud's avatar
LE GAC Renaud committed
525 526
        """``True`` is the record is published and contains a complet set
        of publication infromation (title, volume, year and pagination).
527 528 529

        Returns:
            bool:
530 531

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
532
        df = self[u"publication_info"]
533

LE GAC Renaud's avatar
LE GAC Renaud committed
534 535 536 537 538
        query = \
            (df.title.str.len() > 0) \
            & (df.volume.str.len() > 0) \
            & (df.year.str.len() > 0) \
            & (df.pagination.str.len() > 0)
539

LE GAC Renaud's avatar
LE GAC Renaud committed
540
        return len(df[query]) > 0
541 542

    def is_with_erratum(self):
543
        """``True`` when the record contains erratum data.
544

545 546
        Returns:
            bool
547 548

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
549 550
        df = self[u"publication_info"]
        return len(df) > 1
551 552

    def paper_editor(self):
553
        """The abbreviated version of the review, *e.g* Phys Lett B.
554

555
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
556
            unicode:
557
                * Empty string when not defined.
558 559

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
560 561
        df = self[u"publication_info"]
        return df.title.iloc[0]
562 563 564 565

    def paper_pages(self):
        """The page number / range when the record is published in a review.

566
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
567
            unicode:
568 569
                * The format is "45-67" or "234".
                * Empty string when not defined.
570 571

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
572 573
        df = self[u"publication_info"]
        return df.pagination.iloc[0]
574 575 576 577

    def paper_reference(self):
        """The full reference for a publication published in a review.

578
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
579
            unicode:
580 581 582
                * The format is "Phys Lett B 456 2010 5-6".
                * The string is empty when the publication is not
                  published in a review.
583 584

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
585
        paper = self[u"publication_info"].iloc[0]
586

LE GAC Renaud's avatar
LE GAC Renaud committed
587 588 589 590
        li = [paper.title,
              paper.volume,
              paper.year,
              paper.pagination]
591

LE GAC Renaud's avatar
LE GAC Renaud committed
592
        return u" ".join(li).strip()
593 594 595 596

    def paper_url(self):
        """The URL of the preprint.

597 598
        Note:
            Many others URL exists mainly those related to open access.
599

600
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
601 602
            unicode:
                the string is empty when no URLs are found.
603 604

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
605 606 607 608
        # depends on the store
        # start with CDS looking for the field `url`
        if u"url" in self:
            data = self[u"url"]
609

LE GAC Renaud's avatar
LE GAC Renaud committed
610 611
            li = (data if isinstance(data, list) else [data])
            li = [di[u"url"] for di in li if di[u"description"] == u"Preprint"]
612

LE GAC Renaud's avatar
LE GAC Renaud committed
613 614
            if len(li) == 1:
                return li[0]
615

LE GAC Renaud's avatar
LE GAC Renaud committed
616 617 618
        # scan the list of files
        # work for both stores.
        pdf = "%s.pdf" % self.preprint_number()
619

LE GAC Renaud's avatar
LE GAC Renaud committed
620 621
        li = self._get(u"files", u"url", force_list=True)
        li = [el for el in li if el.endswith(pdf)]
622

LE GAC Renaud's avatar
LE GAC Renaud committed
623 624
        if len(li) == 1:
            return li[0]
625

LE GAC Renaud's avatar
LE GAC Renaud committed
626
        return u""
627 628 629 630

    def paper_volume(self):
        """The volume number when the record is published in a review.

631
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
632
            unicode:
633
                - Empty string when nothing is found.
634 635

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
636 637
        df = self[u"publication_info"]
        return df.volume.iloc[0]
638 639 640 641

    def paper_year(self):
        """The year of the publication.

642
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
643
            unicode:
644
                - Empty string if the year is not defined.
645 646

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
647 648
        df = self[u"publication_info"]
        return df.year.iloc[0]
649 650 651 652

    def preprint_number(self):
        """The ArXiv preprint number.

653
        Returns:
654
            str: empty string when it is not defined.
655 656

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
657 658 659 660 661 662 663 664 665 666 667
        if u"primary_report_number" not in self:
            return

        data = self[u"primary_report_number"]
        data = (data if isinstance(data, list) else [data])

        li = [el for el in data if el.startswith(ARXIV)]
        if len(li) == 1:
                return li[0]

        return u""
668

669 670 671 672 673 674 675 676 677 678 679 680
    def reformat_authors(self, fmt="Last, First"):
        """Reformat names of authors.

        The default formatting for cds/invenio record is ``Last, First``.

        Args:
            fmt (str):
                define the new format for author names.
                Possible values are "First, Last", "F. Last", "Last",
                "Last, First" and "Last F."

        Raises:
681
            RecordException:
LE GAC Renaud's avatar
LE GAC Renaud committed
682
                * the argument ``fmt`` is not valid.
683 684 685 686 687 688 689 690 691 692

        """
        if fmt not in AUTHOR_FORMATS:
            raise RecordException(MSG_INVALID_FMT)

        if fmt == self._last_fmt_author:
            return

        self._last_fmt_author = fmt

LE GAC Renaud's avatar
LE GAC Renaud committed
693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731
        df = self[u"authors"]

        # ....................................................................
        #
        # Compute initial for the first name
        #
        if fmt in ("F. Last", "Last F."):

            dfm = (df.first_name.str.extract(REG_INITIAL, expand=True)
                   .fillna(""))

            df["initial"] = dfm.apply(
                lambda x: to_initial(x[0], x[1], x[2]), axis="columns")

        # ....................................................................
        #
        # Format
        #
        if fmt == "Last, First":
            df["fmt_name"] = df.last_name + ", " + df.first_name

        elif fmt == "First, Last":
            df["fmt_name"] = df.first_name + ", " + df.last_name

        elif fmt == "F. Last":
            df["fmt_name"] = df.initial + " " + df.last_name

        elif fmt == "Last":
            df["fmt_name"] = df.last_name

        elif fmt == "Last F.":
            df["fmt_name"] = df.last_name + " " + df.initial

        # ....................................................................
        #
        # Clean initial column
        #
        if fmt in ("F. Last", "Last F."):
            df = df.drop("initial", axis="columns")
732

733 734 735
    def report_number(self):
        """The report number(s) associated to the publication.

736
        Returns:
737
            str:
738 739 740
                - Numbers are separated by a comma
                - Number are sorted in alphabetic order.
                - Empty string when not defined.
741 742

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764
        # CDS
        if u"report_number" in self:

            data = self[u"report_number"]
            data = (data if isinstance(data, list) else [data])

            li = []
            [li.extend(di.itervalues()) for di in data]

            return ", ".join(sorted(li))

        # INSPIRE
        if u"primary_report_number" in self:

            data = self[u"primary_report_number"]
            data = (data if isinstance(data, list) else [data])

            li = [el for el in data if not el.startswith(ARXIV)]

            return ", ".join(sorted(li))

        return u""
765 766 767 768

    def submitted(self):
        """The date of submission.

769
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
770 771 772
            unicode:
                * format are"YYYY-MM", "YYYY-MM-DD", "DD MMM YYYY", *etc.*
                * Empty sring when not defined.
773 774

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
775
        return self._get(u"prepublication", u"date")
776 777 778 779

    def title(self):
        """The title of the publication.

780
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
781
            unicode:
782 783
                * Empty string when not defined.
                * The filter CLEAN_SPACES is applied.
784 785

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
786
        return CLEAN_SPACES(self._get(u"title", u"title"))