automaton.py 23.4 KB
Newer Older
1
""" harvest_tools.automaton
2 3 4 5 6

"""
import re
import traceback

7

8 9 10 11 12
from .base import (MSG_FIX_ORIGIN,
                   MSG_IN_DB,
                   search_synonym,
                   ToolException)
from .checkandfix import CheckAndFix
13
from gluon.storage import Storage
LE GAC Renaud's avatar
LE GAC Renaud committed
14 15
from invenio_tools import (CdsException,
                           InvenioStore,
16 17
                           Marc12,
                           OAI_URL)
LE GAC Renaud's avatar
LE GAC Renaud committed
18
from invenio_tools.factory import build_record
19 20
from .msg import Msg
from .msgcollection import MsgCollection
21
from plugin_dbui import CALLBACK_ERRORS, get_id
22

23

24 25 26
MSG_NO_CAT = 'Select a "category" !!!'
MSG_NO_PROJECT = 'Select a "project" !!!'
MSG_NO_TEAM = 'Select a "team" !!!'
27

28
MSG_NSERT_FAIL = "Fail to insert the new record in the database."
29
MSG_NO_OAI = "Reject no OAI identifier"
30
MSG_WELL_FORM_OAI = "Reject OAI is not well formed"
31

32 33 34 35
# search collection when using inspirehep
# require for "Hal Hidden"
REG_COLLECTION = re.compile(r"cc([A-Za-z ]+)(and|$)")

36

37
class Automaton(object):
38
    """Base class to search and process publications:
39

40
        * Decode the selector defining user criteria.
LE GAC Renaud's avatar
LE GAC Renaud committed
41
        * Search in the store publications matching user criteria.
LE GAC Renaud's avatar
LE GAC Renaud committed
42
        * Instantiate the record and check it.
43
        * Insert new records in the database.
44

45 46
    Note:
        The parameters of the search are defined by the current ``request``.
47

48 49 50
    The logic implements in the ``Automaton`` class is the following:

        #. Ask to the store, all the `record_id` satisfying the user request.
LE GAC Renaud's avatar
LE GAC Renaud committed
51 52
        #. Reject `record_id` contains in the *origin* field of a
           database entry.
LE GAC Renaud's avatar
LE GAC Renaud committed
53
        #. Request to the store, the JSON description of the publications
LE GAC Renaud's avatar
LE GAC Renaud committed
54 55 56 57
           and decode them.
        #. Reject the record for which the *secondary_oai_url* is contained in
           the *origin* field of a database entry. Update the *origin* field
           of the database record.
58
        #. Check that the *oai* of the publication is defined and well formed.
LE GAC Renaud's avatar
LE GAC Renaud committed
59 60
           Recover it, if it is not the case. At this stage the OAI is always
           defined.
61 62
        #. Reject temporarily publication.
        #. Check that *authors* are defined.
63
           Reject the publication if it is not the case.
64
        #. Check that *my institute* is in the list of the institutes
65 66 67 68 69 70
           signing the publication. Reject the publication if it is
           not the case. When the affiliation are not defined,
           try to recover this case, by finding the author of my institute
           signing the publication. This recovery procedure uses
           the *author rescue list*. Reject the record when the recovery
           procedure failed.
71
        #. Check that the *collaboration*, if defined, is well formed.
72
           Reject the publication if it is not the case
73 74 75 76 77
        #. Several check are applied depending on the publication type.
        #. At the end of this process, the publisher, the authors are
           formatted and the list of signatories of my institute extracted.

    Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
        db (gluon.DAL):
            the database connection.

        id_team (int):
            the identifier of the team in the database.

        id_project (int):
            the identifier of the project in the database.

        automaton (unicode):
            the name of the automaton which will be used to process the data.
            Possible values are: ``articles``, ``notes``, ``preprints``,
            ``proceedings``, ``reports``, ``talks`` and ``theses``.

        id_category (int):
            the identifier of the category of publication

        year_start (int):
            starting year for the scan

        year_end (int):
            ending year of the scan

        dry_run (bool):
            new records are not inserted in the database when ``True``.

        debug (bool):
            activate the verbose mode when ``True``.
106 107

    Raises:
LE GAC Renaud's avatar
LE GAC Renaud committed
108 109
        ToolException:
            * team or project or the publication category not defined
110

111 112 113 114 115
    """
    def __init__(self,
                 db,
                 id_team,
                 id_project,
116
                 automaton,
117 118 119 120 121 122 123
                 id_category,
                 year_start=None,
                 year_end=None,
                 dry_run=True,
                 debug=False):

        # protection team, project and/or category have to be defined
LE GAC Renaud's avatar
LE GAC Renaud committed
124
        if not id_team:
125 126
            raise ToolException(MSG_NO_TEAM)

LE GAC Renaud's avatar
LE GAC Renaud committed
127
        if not id_project:
128 129
            raise ToolException(MSG_NO_PROJECT)

LE GAC Renaud's avatar
LE GAC Renaud committed
130
        if not id_category:
131 132
            raise ToolException(MSG_NO_CAT)

LE GAC Renaud's avatar
LE GAC Renaud committed
133 134 135 136 137 138 139 140 141 142 143 144 145 146
        self.check = CheckAndFix()
        self.collection_logs = []
        self.controller = automaton
        self.db = db
        self.dbg = debug
        self.dry_run = dry_run
        self.id_category = id_category
        self.id_team = id_team
        self.id_project = id_project
        self.logs = []
        self.store = None
        self.year_start = year_start
        self.year_end = year_end

147
        # Construct harvester Storage needed for the log
LE GAC Renaud's avatar
LE GAC Renaud committed
148 149 150 151
        self.harvester = Storage(id_teams=id_team,
                                 id_projects=id_project,
                                 controller=automaton,
                                 id_categories=id_category)
152

153 154 155 156 157
        # Identifier of the categories preprint and articles
        # Used by the method _is_record_in_db
        self._id_preprint = get_id(db.categories, code="PRE")
        self._id_article = get_id(db.categories, code="ACL")

158 159 160
    def _insert_in_db(self, log_year="", **fields):
        """Insert the record in the database, handling database exception.

161
        Args:
162
            log_year (str): year of the record for the log
163

164
        Keyword Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
165 166
            **fields:
                keyword arguments defining the record values to be
167
                inserted in the database.
168

169
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
170 171
            int:
                one when the record is inserted / updated in the database,
172
                zero otherwise.
173 174 175 176 177

        """
        db = self.db

        try:
LE GAC Renaud's avatar
LE GAC Renaud committed
178
            rec_id = db.publications.insert(**fields)
LE GAC Renaud's avatar
LE GAC Renaud committed
179 180
            if rec_id:
                return 1
181

LE GAC Renaud's avatar
LE GAC Renaud committed
182
            # operation can be reject by callback table._before_insert
LE GAC Renaud's avatar
LE GAC Renaud committed
183 184 185 186
            else:
                msg = MSG_NSERT_FAIL
                if CALLBACK_ERRORS in db.publications:
                    msg = db.publications._callback_errors
187

LE GAC Renaud's avatar
LE GAC Renaud committed
188 189 190
                # reduce the error message
                if isinstance(msg, list):
                    msg = "%s %s" % (msg[0], msg[-1])
191

LE GAC Renaud's avatar
LE GAC Renaud committed
192 193
                self.logs[-1].reject(msg, log_year)
                return 0
194

LE GAC Renaud's avatar
LE GAC Renaud committed
195 196
        # operation can be rejected by the database
        except Exception as dbe:
197
            self.logs[-1].reject(str(dbe), log_year)
LE GAC Renaud's avatar
LE GAC Renaud committed
198
            return 0
199

LE GAC Renaud's avatar
LE GAC Renaud committed
200 201 202 203 204 205 206
    def _is_record_in_db(self,
                         collection_title,
                         host=None,
                         rec_id=None,
                         oai_url=None):
        """Return the database identifier when the publication is registered.
        The search is based on the ``origin`` field and on the primary OAI.
207

208 209
        Note:
            A new log entry is created when a record is found.
210

211
        Args:
212
            title (str): the title of the publication.
213 214

        Keyword Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
215 216
            host (unicode):
                the store. possible values are ``cds.cern.ch`` or
217 218
                ``inspirehep.net``. To be used with *rec_id*.

LE GAC Renaud's avatar
LE GAC Renaud committed
219 220 221 222 223 224
            rec_id (int):
                the record identifier in the store

            oai_url (unicode):
                the URL of the record in the store.
                Either use *host* and *rec_id* or *oai_url*
225

226
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
227 228
            int:
                the id of the record in the database when a record is found,
229
                0 otherwise.
230

231
        Raises:
LE GAC Renaud's avatar
LE GAC Renaud committed
232 233
            ValueError:
                * keyword arguments are not defined properly.
234

235 236
        """
        db = self.db
237
        harvester = self.harvester
238

239 240 241 242 243 244 245 246
        # build the OAI URL
        if host is not None and rec_id is not None and oai_url is None:
            url = OAI_URL % (host, rec_id)
        elif host is None and rec_id is None and oai_url is not None:
            url = oai_url
        else:
            raise ValueError

LE GAC Renaud's avatar
LE GAC Renaud committed
247
        # protection empty URL
248 249 250
        if len(url) == 0:
            return 0

251 252 253
        # check the OAI
        query = db.publications.origin.contains(url)
        setrows = db(query)
254

255
        if setrows.count() == 0:
256
            return 0
257

258
        # one record found
259 260
        columns = [db.publications.id,
                   db.publications.id_categories,
261 262 263
                   db.publications.title,
                   db.publications.year]
        publication = setrows.select(*columns).first()
264

265 266
        # Note:
        # The category for the publication and the harvester have to be equal.
267 268 269 270 271 272 273
        # However, keep the record if it is a preprint when the harvester
        # looks for articles. This is required to transform a preprint
        # into article
        #
        # Category can disagree when the publication is an article and
        # the harvester look for preprint. In that case, keep the article
        #
274
        if publication.id_categories != harvester.id_categories:
275 276 277 278 279 280 281

            is_preprint_to_article = \
                publication.id_categories == self._id_preprint \
                and harvester.id_categories == self._id_article

            if is_preprint_to_article:
                return 0
282 283

        # log
284
        self.logs.append(Msg(harvester=harvester,
LE GAC Renaud's avatar
LE GAC Renaud committed
285
                             collection=collection_title,
286 287 288 289 290
                             record_id=rec_id,
                             title=publication.title))

        self.logs[-1].idle(MSG_IN_DB, publication.year)

LE GAC Renaud's avatar
LE GAC Renaud committed
291 292 293
        if self.dbg:
            print("\trecord already in db:", rec_id, "->", publication.id)

294
        return publication.id
295

296 297 298 299 300
    def _search_parameters(self, collection):
        """Build the keywords to steer the URL search in invenio store.
        The main parameter is the collection and the date range defined
        in the selector.

301
        Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
302 303 304
            collection (unicode):
                string defining the collection in the store.
                The syntax depends on the invenio store:
305 306 307

                    * ``"find cn d0 and tc p and not tc c"``
                    * ``"LHCb Papers"``.
308

309
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
310 311 312
            dict:
                the key are a sub-set of those defined in
                :meth:`invenio_tools.InvenioStore.get_ids`.
313 314

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
315 316
        year_start = self.year_start
        year_end = self.year_end
317 318

        # INSPIREHEP store
LE GAC Renaud's avatar
LE GAC Renaud committed
319
        if collection.startswith("find"):
320 321 322

            query = collection

LE GAC Renaud's avatar
LE GAC Renaud committed
323 324
            if year_start and not year_end:
                query += " and date %s" % year_start
325

LE GAC Renaud's avatar
LE GAC Renaud committed
326 327
            elif not year_start and year_end:
                query += " and date %s" % year_end
328

LE GAC Renaud's avatar
LE GAC Renaud committed
329
            elif year_start and year_end:
330
                query += " and date > %s and date < %s " \
LE GAC Renaud's avatar
LE GAC Renaud committed
331
                         % (year_start - 1, year_end + 1)
332 333 334

            dic = dict(p=query,  # query à la spires
                       rg=1000,  # maximum number of records returned
LE GAC Renaud's avatar
LE GAC Renaud committed
335 336
                       sf="year",  # sort by date
                       so="d")  # descending order
337

338 339 340 341 342 343 344 345 346
            # handle the cc keyword (true inspirehep collection)
            match = REG_COLLECTION.search(query)
            if match:
                dic["cc"] = match.group(1).strip()
                dic["p"] = REG_COLLECTION.sub("", query).strip()
                dic["p"] = dic["p"].replace("  ", " ")
                if dic["p"] == "find":
                    del dic["p"]

347 348 349
        # CERN INVENIO store
        else:

LE GAC Renaud's avatar
LE GAC Renaud committed
350 351
            if year_start and not year_end:
                rex = year_start
352

LE GAC Renaud's avatar
LE GAC Renaud committed
353 354
            elif not year_start and year_end:
                rex = year_end
355

LE GAC Renaud's avatar
LE GAC Renaud committed
356 357
            elif year_start and year_end:
                li = [str(el) for el in xrange(year_start, year_end + 1)]
LE GAC Renaud's avatar
LE GAC Renaud committed
358
                rex = "|".join(li)
359 360

            dic = dict(cc=collection,  # collection
LE GAC Renaud's avatar
LE GAC Renaud committed
361 362
                       f1="year",  # search on year
                       m1="r",  # use regular expression
363
                       p1=rex,  # regular expression defining year
LE GAC Renaud's avatar
LE GAC Renaud committed
364 365
                       sf="year",  # sort by date
                       so="d")  # descending order
366 367
        return dic

LE GAC Renaud's avatar
LE GAC Renaud committed
368
    def check_record(self, record):
369 370
        """Check the content of the record in order to fix non-conformities.
        Return ``False`` when non-conformities are found and can not be
371 372
        corrected.

373 374 375
        Note:
            Some checks depend on the type of publications and have to be
            implemented in inherited class.
376

377
        Note:
LE GAC Renaud's avatar
LE GAC Renaud committed
378
            The order of the checks matter. It should be OAI,
379 380
            temporary record, authors, my authors and then a series of checks
            specific to the publication type.
381

382
        Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
383 384
            record (Record):
                JSON record describing the publication.
385

386
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
387 388
            bool:
                ``False`` when a non-conformity is found and can not be
389
                corrected.
390 391 392

        """
        if self.dbg:
393
            print("check record")
394 395

        try:
396
            self.check.is_oai(record)
397

398
            if self.check.is_bad_oai_used(record):
LE GAC Renaud's avatar
LE GAC Renaud committed
399
                self.logs[-1].idle(MSG_IN_DB, record.submitted())
400 401
                return False

402 403
            self.check.temporary_record(record)
            self.check.authors(record)
404
            self.check.my_affiliation(record, self.id_project, self.id_team)
405 406 407
            self.check.collaboration(record)

        except Exception as e:
408
            self.logs[-1].reject(e, record=record)
409 410 411 412
            return False

        return True

413
    def get_record_by_fields(self, oai_url, year, **kwargs):
414 415
        """Get database record matching fields values defined
        in the keyword arguments.
416

417
        Note:
418 419
            This method is required to deal with publication entered by hand
            and found later by an harvester.
420

421
        Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
422 423 424 425
            oai_url (unicode):
                the oai_url, *e.g.* ``http://cds.cern.ch/record/123456``.
                The origin field of the existing database record is update to
                **oai_url** when a match is found.
426

LE GAC Renaud's avatar
LE GAC Renaud committed
427 428
            year (int):
                the year of the publication. It is used
429 430 431
                by the search algorithm and by the logger.

        Keyword Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
432 433 434
            kwargs (unicode):
                 a series of key, value pair where the key is the name of a
                 publications database field.
435

436
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
437 438 439 440 441
            tuple:
                ``(id, status)`` which contains the ``id`` of the record.
                The ``id`` is equal to ``None`` when there is no matching.
                The ``status`` is equal to one when the existing record was
                modified zero otherwise.
442 443 444

        """
        if self.dbg:
445
            print("get existing record by fields")
446

447
        # alias
448
        db = self.db
449
        logs = self.logs
450

451 452 453
        # add the publication year to search criteria
        if year:
            kwargs["year"] = year
454 455 456 457 458 459 460

        # look for an existing record
        rec_id = get_id(db.publications, **kwargs)
        if not rec_id:
            return (None, 0)

        # fix origin field
461 462
        publication = db.publications[rec_id]
        ok = publication.origin and publication.origin == oai_url
463 464
        if not ok:
            if not self.dry_run:
465
                publication = dict(origin=oai_url)
466

467
            logs[-1].modify(MSG_FIX_ORIGIN, year)
468 469
            return (rec_id, 1)

470
        logs[-1].idle(MSG_IN_DB, year)
471 472
        return (rec_id, 0)

473 474
    def insert_record(self, record):
        """Insert the record in the database.
475

476 477 478
        Note:
            This method depend on the type of publications.
            It has to be implemented for each inherited class.
479

480
        Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
481 482
            record (Record):
                record describing the publication.
483

484
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
485 486
            int:
                one when the record is inserted / updated in the database,
487
                zero otherwise.
488 489 490 491

        """
        return 0

LE GAC Renaud's avatar
LE GAC Renaud committed
492 493 494
    def process_collection(self, collection):
        """"Retrieve JSON objects from the invenio store and for the given
        collection. Corresponding records are inserted in the database.
495

496
        Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
497 498
            collection (unicode):
                name of the collection to be interrogated.
499

500
        Raises:
LE GAC Renaud's avatar
LE GAC Renaud committed
501 502 503 504 505
            CdsException:
                * keyword argument is invalid;
                * the server return an HTTP error;
                * JSON object can't be decoded
                * not well formed list of ids.
506 507 508

        """
        if self.dbg:
LE GAC Renaud's avatar
LE GAC Renaud committed
509
            print("\nprocess collection", collection)
510 511

        # alias
512
        collection_logs = self.collection_logs
513
        controller = self.controller
LE GAC Renaud's avatar
LE GAC Renaud committed
514
        host = self.harvester.host
515
        project = self.db.projects[self.id_project].project
LE GAC Renaud's avatar
LE GAC Renaud committed
516
        store = self.store
517

518 519
        # log collection information
        # A collection is identified as "Project Controller collection"
LE GAC Renaud's avatar
LE GAC Renaud committed
520 521
        ctitle = "%s / %s / %s" % (project, controller, collection)
        collection_logs.append(MsgCollection(title=ctitle))
522

LE GAC Renaud's avatar
LE GAC Renaud committed
523
        # get search parameters for the collection including user criteria
524
        kwargs = self._search_parameters(collection)
525

LE GAC Renaud's avatar
LE GAC Renaud committed
526
        # get the list of record identifier matching the search criteria
527 528
        try:
            rec_ids = store.get_ids(**kwargs)
529

LE GAC Renaud's avatar
LE GAC Renaud committed
530
        except CdsException as error:
531 532 533
            collection_logs[-1].url = store.last_search_url()
            collection_logs[-1].error = error
            return
534

LE GAC Renaud's avatar
LE GAC Renaud committed
535
        # log the number of record found for the collection
536 537
        collection_logs[-1].url = store.last_search_url()
        collection_logs[-1].found = len(rec_ids)
538

LE GAC Renaud's avatar
LE GAC Renaud committed
539 540 541
        if len(rec_ids) == 0:
            if self.dbg:
                print("\tNo records found in %s" % collection)
542
            return
543

544
        if self.dbg:
LE GAC Renaud's avatar
LE GAC Renaud committed
545
            print("\t%i records found in %s" % (len(rec_ids), collection))
546

LE GAC Renaud's avatar
LE GAC Renaud committed
547 548 549 550
        # remove form the list identifier already registered in the data base
        # and log them
        func = self._is_record_in_db
        rec_ids = [el for el in rec_ids if func(ctitle, host, el) == 0]
551

LE GAC Renaud's avatar
LE GAC Renaud committed
552 553 554 555 556 557 558 559 560 561 562
        # process the remaining identifiers
        [self.process_recid(rec_id) for rec_id in rec_ids]

    def process_recid(self, rec_id):
        """Process the publication:

            * get the publication data from the store using its identifier
            * instantiate the record (RecordPubli, REcordConf, RecordThesis)
            * process OAI data
            * check the record
            * insert new record in the database
563

564
        Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
565 566
            rec_id (int):
                identifier of the publication in the store.
567

LE GAC Renaud's avatar
LE GAC Renaud committed
568 569 570 571
        Raise:
            CdsException:
                * the server return an HTTP error.
                * no JSON object could be decoded.
572

LE GAC Renaud's avatar
LE GAC Renaud committed
573
        """
574
        if self.dbg:
LE GAC Renaud's avatar
LE GAC Renaud committed
575
            print("\nprocessing record", rec_id)
576

LE GAC Renaud's avatar
LE GAC Renaud committed
577 578
        collection_logs = self.collection_logs
        harvester = self.harvester
579 580
        logs = self.logs

LE GAC Renaud's avatar
LE GAC Renaud committed
581 582 583 584
        # instantiate the record
        try:
            recjson = self.store.get_record(rec_id)
            record = build_record(recjson)
585 586

            if self.dbg:
LE GAC Renaud's avatar
LE GAC Renaud committed
587
                print("\t", record.title())
588

LE GAC Renaud's avatar
LE GAC Renaud committed
589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606
        except Exception as e:
            print(traceback.format_exc())
            url = OAI_URL % (harvester.host, rec_id)
            logs.append(Msg(harvester=harvester,
                            collection=collection_logs[-1].title,
                            record_id=rec_id,
                            title=url))
            logs[-1].reject(e)

        # start the log for the record
        logs.append(Msg(harvester=harvester,
                        collection=collection_logs[-1].title,
                        record_id=record.id(),
                        title=record.title()))

        # check that the record is well formed
        # repair non-conformity as far as possible
        if not self.check_record(record):
607
            if self.dbg:
LE GAC Renaud's avatar
LE GAC Renaud committed
608 609 610 611 612
                print("\trecord rejected", logs[-1].txt)
                return

        if self.dbg:
            print("\tinsert record in the database")
613

LE GAC Renaud's avatar
LE GAC Renaud committed
614 615
        # insert the record in the database
        self.insert_record(record)
616

LE GAC Renaud's avatar
LE GAC Renaud committed
617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660
        if self.dbg:
            log = logs[-1]
            action = log.action
            action = (action.upper() if isinstance(action, str) else action)
            print("\tlog:", action, log.txt)

    def process_url(self, host, collections):
        """Retrieve JSON objects from the invenio store and
        insert corresponding records in the database.

        Args:
            host (unicode):
                host name to query for publications, either
                ``cds.cern.ch`` or ``inspirehep.net``.

            collections (unicode):
                list of collection to be interrogated.

        Raises:
           StoreException:
               when something goes wrong interrogating the store.

           CheckException:
               when the record has non-conformities.

           Exception:
               when the python code crashes.

        """
        if self.dbg:
            print("process URL search")

        # extend harvester for logs
        self.harvester.host = host
        self.harvester.collections = collections

        # instantiate the store
        self.store = InvenioStore(host)

        # list of collections
        collections = re.sub(" *, *", ",", collections).split(",")

        # process
        [self.process_collection(collection) for collection in collections]
661 662 663 664

    def report(self):
        """Build the processing report.

665 666
        Returns:
            dict:
LE GAC Renaud's avatar
LE GAC Renaud committed
667 668
                * ``collection_logs`` list of :class:`MsgCollection`
                * ``controller`` unicode
669
                * ``logs`` list of :class:Msg
LE GAC Renaud's avatar
LE GAC Renaud committed
670
                * ``selector`` :class:`plugin_dbui.Selector`
671 672 673 674 675 676

        """

        return dict(collection_logs=self.collection_logs,
                    controller=self.controller,
                    logs=self.logs)
677 678

    def search_collaboration(self, value):
LE GAC Renaud's avatar
LE GAC Renaud committed
679
        """Get the database collaboration identifier using synonyms.
680 681

        Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
682 683
            value (unicode):
                the name of the collaboration.
684 685 686

        Returns:
            int:
LE GAC Renaud's avatar
LE GAC Renaud committed
687
                * the id of the collaboration record.
688 689
                * UNDEF_ID if value is not defined.

690
        Raises:
LE GAC Renaud's avatar
LE GAC Renaud committed
691 692
            ToolException:
                when more than one synonym is found or when the
693
                collaboration is not defined.
694 695

        """
696

697
        return search_synonym(self.db.collaborations, "collaboration", value)
698 699

    def search_country(self, value):
LE GAC Renaud's avatar
LE GAC Renaud committed
700
        """Get the database country identifier using synonyms.
701 702

        Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
703 704
            value (unicode):
                the name of the country.
705 706 707

        Returns:
            int:
LE GAC Renaud's avatar
LE GAC Renaud committed
708
                * the id of the country record.
709 710
                * UNDEF_ID if value is not defined.

711
        Raises:
LE GAC Renaud's avatar
LE GAC Renaud committed
712 713
            ToolException:
                when more than one synonym is found ot when
714
                the country is not defined.
715 716

        """
717
        return search_synonym(self.db.countries, "country", value)
718 719

    def search_publisher(self, value):
LE GAC Renaud's avatar
LE GAC Renaud committed
720
        """Get the database publisher identifier using synonyms.
721 722

        Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
723 724
            value (unicode):
                the abbreviation of the publisher.
725 726 727

        Returns:
            int:
LE GAC Renaud's avatar
LE GAC Renaud committed
728
                * the id of the publisher record.
729 730
                * UNDEF_ID if value is not defined.

731
        Raises:
LE GAC Renaud's avatar
LE GAC Renaud committed
732 733
            ToolException:
                when more than one synonym is found or when
734
                the publisher is not defined.
735 736

        """
737
        return search_synonym(self.db.publishers, "abbreviation", value)