automaton.py 21.7 KB
Newer Older
1
""" harvest_tools.automaton
2 3 4 5 6

"""
import re
import traceback

7

8 9 10 11
from .base import (MSG_FIX_ORIGIN,
                   MSG_IN_DB,
                   ToolException)
from .checkandfix import CheckAndFix
12
from gluon.storage import Storage
LE GAC Renaud's avatar
LE GAC Renaud committed
13 14
from invenio_tools import (CdsException,
                           InvenioStore,
15
                           OAI_URL)
LE GAC Renaud's avatar
LE GAC Renaud committed
16
from invenio_tools.factory import build_record
17 18
from .msg import Msg
from .msgcollection import MsgCollection
19
from plugin_dbui import CALLBACK_ERRORS, get_id
20

21

22 23 24
MSG_NO_CAT = 'Select a "category" !!!'
MSG_NO_PROJECT = 'Select a "project" !!!'
MSG_NO_TEAM = 'Select a "team" !!!'
25

LE GAC Renaud's avatar
LE GAC Renaud committed
26
MSG_INSERT_FAIL = "Fail to insert the new record in the database."
27

28 29
OAI = u"oai:%s:%i"

30 31 32 33
# search collection when using inspirehep
# require for "Hal Hidden"
REG_COLLECTION = re.compile(r"cc([A-Za-z ]+)(and|$)")

34

35
class Automaton(object):
36
    """Base class to search and process publications:
37

38
        * Decode the selector defining user criteria.
LE GAC Renaud's avatar
LE GAC Renaud committed
39
        * Search in the store publications matching user criteria.
LE GAC Renaud's avatar
LE GAC Renaud committed
40
        * Instantiate the record and check it.
41
        * Insert new records in the database.
42

43 44
    Note:
        The parameters of the search are defined by the current ``request``.
45

46 47 48
    The logic implements in the ``Automaton`` class is the following:

        #. Ask to the store, all the `record_id` satisfying the user request.
LE GAC Renaud's avatar
LE GAC Renaud committed
49 50
        #. Reject `record_id` contains in the *origin* field of a
           database entry.
LE GAC Renaud's avatar
LE GAC Renaud committed
51
        #. Request to the store, the JSON description of the publications
LE GAC Renaud's avatar
LE GAC Renaud committed
52 53 54 55
           and decode them.
        #. Reject the record for which the *secondary_oai_url* is contained in
           the *origin* field of a database entry. Update the *origin* field
           of the database record.
56
        #. Check that the *oai* of the publication is defined and well formed.
LE GAC Renaud's avatar
LE GAC Renaud committed
57 58
           Recover it, if it is not the case. At this stage the OAI is always
           defined.
59 60
        #. Reject temporarily publication.
        #. Check that *authors* are defined.
61
           Reject the publication if it is not the case.
62
        #. Check that *my institute* is in the list of the institutes
63 64 65 66 67 68
           signing the publication. Reject the publication if it is
           not the case. When the affiliation are not defined,
           try to recover this case, by finding the author of my institute
           signing the publication. This recovery procedure uses
           the *author rescue list*. Reject the record when the recovery
           procedure failed.
69
        #. Check that the *collaboration*, if defined, is well formed.
70
           Reject the publication if it is not the case
71 72 73 74 75
        #. Several check are applied depending on the publication type.
        #. At the end of this process, the publisher, the authors are
           formatted and the list of signatories of my institute extracted.

    Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
        db (gluon.DAL):
            the database connection.

        id_team (int):
            the identifier of the team in the database.

        id_project (int):
            the identifier of the project in the database.

        automaton (unicode):
            the name of the automaton which will be used to process the data.
            Possible values are: ``articles``, ``notes``, ``preprints``,
            ``proceedings``, ``reports``, ``talks`` and ``theses``.

        id_category (int):
            the identifier of the category of publication

        year_start (int):
            starting year for the scan

        year_end (int):
            ending year of the scan

        dry_run (bool):
            new records are not inserted in the database when ``True``.

        debug (bool):
            activate the verbose mode when ``True``.
104 105

    Raises:
LE GAC Renaud's avatar
LE GAC Renaud committed
106 107
        ToolException:
            * team or project or the publication category not defined
108

109 110 111 112 113
    """
    def __init__(self,
                 db,
                 id_team,
                 id_project,
114
                 automaton,
115 116 117 118 119 120 121
                 id_category,
                 year_start=None,
                 year_end=None,
                 dry_run=True,
                 debug=False):

        # protection team, project and/or category have to be defined
LE GAC Renaud's avatar
LE GAC Renaud committed
122
        if not id_team:
123 124
            raise ToolException(MSG_NO_TEAM)

LE GAC Renaud's avatar
LE GAC Renaud committed
125
        if not id_project:
126 127
            raise ToolException(MSG_NO_PROJECT)

LE GAC Renaud's avatar
LE GAC Renaud committed
128
        if not id_category:
129 130
            raise ToolException(MSG_NO_CAT)

131
        self.check = CheckAndFix(debug)
LE GAC Renaud's avatar
LE GAC Renaud committed
132 133 134 135 136 137 138 139 140 141 142 143 144
        self.collection_logs = []
        self.controller = automaton
        self.db = db
        self.dbg = debug
        self.dry_run = dry_run
        self.id_category = id_category
        self.id_team = id_team
        self.id_project = id_project
        self.logs = []
        self.store = None
        self.year_start = year_start
        self.year_end = year_end

145
        # Construct harvester Storage needed for the log
LE GAC Renaud's avatar
LE GAC Renaud committed
146 147 148 149
        self.harvester = Storage(id_teams=id_team,
                                 id_projects=id_project,
                                 controller=automaton,
                                 id_categories=id_category)
150

151 152 153 154 155
        # Identifier of the categories preprint and articles
        # Used by the method _is_record_in_db
        self._id_preprint = get_id(db.categories, code="PRE")
        self._id_article = get_id(db.categories, code="ACL")

156 157 158
    def _insert_in_db(self, log_year="", **fields):
        """Insert the record in the database, handling database exception.

159
        Args:
160
            log_year (str): year of the record for the log
161

162
        Keyword Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
163 164
            **fields:
                keyword arguments defining the record values to be
165
                inserted in the database.
166

167
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
168 169
            int:
                one when the record is inserted / updated in the database,
170
                zero otherwise.
171 172 173 174 175

        """
        db = self.db

        try:
LE GAC Renaud's avatar
LE GAC Renaud committed
176
            rec_id = db.publications.insert(**fields)
LE GAC Renaud's avatar
LE GAC Renaud committed
177 178
            if rec_id:
                return 1
179

LE GAC Renaud's avatar
LE GAC Renaud committed
180
            # operation can be reject by callback table._before_insert
LE GAC Renaud's avatar
LE GAC Renaud committed
181
            else:
LE GAC Renaud's avatar
LE GAC Renaud committed
182
                msg = MSG_INSERT_FAIL
LE GAC Renaud's avatar
LE GAC Renaud committed
183 184
                if CALLBACK_ERRORS in db.publications:
                    msg = db.publications._callback_errors
185

LE GAC Renaud's avatar
LE GAC Renaud committed
186 187 188
                # reduce the error message
                if isinstance(msg, list):
                    msg = "%s %s" % (msg[0], msg[-1])
189

LE GAC Renaud's avatar
LE GAC Renaud committed
190 191
                self.logs[-1].reject(msg, log_year)
                return 0
192

LE GAC Renaud's avatar
LE GAC Renaud committed
193 194
        # operation can be rejected by the database
        except Exception as dbe:
195
            self.logs[-1].reject(str(dbe), log_year)
LE GAC Renaud's avatar
LE GAC Renaud committed
196
            return 0
197

LE GAC Renaud's avatar
LE GAC Renaud committed
198 199 200 201 202 203 204
    def _is_record_in_db(self,
                         collection_title,
                         host=None,
                         rec_id=None,
                         oai_url=None):
        """Return the database identifier when the publication is registered.
        The search is based on the ``origin`` field and on the primary OAI.
205

206 207
        Note:
            A new log entry is created when a record is found.
208

209
        Args:
210
            title (str): the title of the publication.
211 212

        Keyword Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
213 214
            host (unicode):
                the store. possible values are ``cds.cern.ch`` or
215 216
                ``inspirehep.net``. To be used with *rec_id*.

LE GAC Renaud's avatar
LE GAC Renaud committed
217 218 219 220 221 222
            rec_id (int):
                the record identifier in the store

            oai_url (unicode):
                the URL of the record in the store.
                Either use *host* and *rec_id* or *oai_url*
223

224
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
225 226
            int:
                the id of the record in the database when a record is found,
227
                0 otherwise.
228

229
        Raises:
LE GAC Renaud's avatar
LE GAC Renaud committed
230 231
            ValueError:
                * keyword arguments are not defined properly.
232

233 234
        """
        db = self.db
235
        harvester = self.harvester
236

237 238 239 240 241 242 243 244
        # build the OAI URL
        if host is not None and rec_id is not None and oai_url is None:
            url = OAI_URL % (host, rec_id)
        elif host is None and rec_id is None and oai_url is not None:
            url = oai_url
        else:
            raise ValueError

LE GAC Renaud's avatar
LE GAC Renaud committed
245
        # protection empty URL
246 247 248
        if len(url) == 0:
            return 0

249 250 251
        # check the OAI
        query = db.publications.origin.contains(url)
        setrows = db(query)
252

253
        if setrows.count() == 0:
254
            return 0
255

256
        # one record found
257 258
        columns = [db.publications.id,
                   db.publications.id_categories,
259 260 261
                   db.publications.title,
                   db.publications.year]
        publication = setrows.select(*columns).first()
262

263 264
        # Note:
        # The category for the publication and the harvester have to be equal.
265 266 267 268 269 270 271
        # However, keep the record if it is a preprint when the harvester
        # looks for articles. This is required to transform a preprint
        # into article
        #
        # Category can disagree when the publication is an article and
        # the harvester look for preprint. In that case, keep the article
        #
272
        if publication.id_categories != harvester.id_categories:
273 274 275 276 277 278 279

            is_preprint_to_article = \
                publication.id_categories == self._id_preprint \
                and harvester.id_categories == self._id_article

            if is_preprint_to_article:
                return 0
280 281

        # log
282
        self.logs.append(Msg(harvester=harvester,
LE GAC Renaud's avatar
LE GAC Renaud committed
283
                             collection=collection_title,
284 285 286 287 288
                             record_id=rec_id,
                             title=publication.title))

        self.logs[-1].idle(MSG_IN_DB, publication.year)

LE GAC Renaud's avatar
LE GAC Renaud committed
289 290 291
        if self.dbg:
            print("\trecord already in db:", rec_id, "->", publication.id)

292
        return publication.id
293

294 295 296 297 298
    def _search_parameters(self, collection):
        """Build the keywords to steer the URL search in invenio store.
        The main parameter is the collection and the date range defined
        in the selector.

299
        Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
300 301 302
            collection (unicode):
                string defining the collection in the store.
                The syntax depends on the invenio store:
303 304 305

                    * ``"find cn d0 and tc p and not tc c"``
                    * ``"LHCb Papers"``.
306

307
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
308 309 310
            dict:
                the key are a sub-set of those defined in
                :meth:`invenio_tools.InvenioStore.get_ids`.
311 312

        """
LE GAC Renaud's avatar
LE GAC Renaud committed
313 314
        year_start = self.year_start
        year_end = self.year_end
315 316

        # INSPIREHEP store
LE GAC Renaud's avatar
LE GAC Renaud committed
317
        if collection.startswith("find"):
318 319 320

            query = collection

LE GAC Renaud's avatar
LE GAC Renaud committed
321 322
            if year_start and not year_end:
                query += " and date %s" % year_start
323

LE GAC Renaud's avatar
LE GAC Renaud committed
324 325
            elif not year_start and year_end:
                query += " and date %s" % year_end
326

LE GAC Renaud's avatar
LE GAC Renaud committed
327
            elif year_start and year_end:
328
                query += " and date > %s and date < %s " \
LE GAC Renaud's avatar
LE GAC Renaud committed
329
                         % (year_start - 1, year_end + 1)
330 331 332

            dic = dict(p=query,  # query à la spires
                       rg=1000,  # maximum number of records returned
LE GAC Renaud's avatar
LE GAC Renaud committed
333 334
                       sf="year",  # sort by date
                       so="d")  # descending order
335

336 337 338 339 340 341 342 343 344
            # handle the cc keyword (true inspirehep collection)
            match = REG_COLLECTION.search(query)
            if match:
                dic["cc"] = match.group(1).strip()
                dic["p"] = REG_COLLECTION.sub("", query).strip()
                dic["p"] = dic["p"].replace("  ", " ")
                if dic["p"] == "find":
                    del dic["p"]

345 346 347
        # CERN INVENIO store
        else:

LE GAC Renaud's avatar
LE GAC Renaud committed
348 349
            if year_start and not year_end:
                rex = year_start
350

LE GAC Renaud's avatar
LE GAC Renaud committed
351 352
            elif not year_start and year_end:
                rex = year_end
353

LE GAC Renaud's avatar
LE GAC Renaud committed
354 355
            elif year_start and year_end:
                li = [str(el) for el in xrange(year_start, year_end + 1)]
LE GAC Renaud's avatar
LE GAC Renaud committed
356
                rex = "|".join(li)
357 358

            dic = dict(cc=collection,  # collection
LE GAC Renaud's avatar
LE GAC Renaud committed
359 360
                       f1="year",  # search on year
                       m1="r",  # use regular expression
361
                       p1=rex,  # regular expression defining year
LE GAC Renaud's avatar
LE GAC Renaud committed
362 363
                       sf="year",  # sort by date
                       so="d")  # descending order
364 365
        return dic

LE GAC Renaud's avatar
LE GAC Renaud committed
366
    def check_record(self, record):
367 368
        """Check the content of the record in order to fix non-conformities.
        Return ``False`` when non-conformities are found and can not be
369 370
        corrected.

371 372 373
        Note:
            Some checks depend on the type of publications and have to be
            implemented in inherited class.
374

375
        Note:
LE GAC Renaud's avatar
LE GAC Renaud committed
376
            The order of the checks matter. It should be OAI,
377 378
            temporary record, authors, my authors and then a series of checks
            specific to the publication type.
379

380
        Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
381 382
            record (Record):
                JSON record describing the publication.
383

384
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
385 386
            bool:
                ``False`` when a non-conformity is found and can not be
387
                corrected.
388 389 390

        """
        if self.dbg:
391
            print("check record")
392 393

        try:
394 395
            # fix record with a missing OAI
            if not self.check.is_oai(record):
LE GAC Renaud's avatar
LE GAC Renaud committed
396
                oai = OAI % (self.harvester.host, record.id())
397
                record[u"oai"] = {u"value": oai}
398

399
            if self.check.is_bad_oai_used(record):
LE GAC Renaud's avatar
LE GAC Renaud committed
400
                self.logs[-1].idle(MSG_IN_DB, record.submitted())
401 402
                return False

403 404
            self.check.temporary_record(record)
            self.check.authors(record)
405
            self.check.my_affiliation(record, self.id_project, self.id_team)
406 407 408
            self.check.collaboration(record)

        except Exception as e:
409
            self.logs[-1].reject(e, record=record)
410 411 412 413
            return False

        return True

414
    def get_record_by_fields(self, oai_url, year, **kwargs):
415 416
        """Get database record matching fields values defined
        in the keyword arguments.
417

418
        Note:
419 420
            This method is required to deal with publication entered by hand
            and found later by an harvester.
421

422
        Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
423 424 425 426
            oai_url (unicode):
                the oai_url, *e.g.* ``http://cds.cern.ch/record/123456``.
                The origin field of the existing database record is update to
                **oai_url** when a match is found.
427

LE GAC Renaud's avatar
LE GAC Renaud committed
428 429
            year (int):
                the year of the publication. It is used
430 431 432
                by the search algorithm and by the logger.

        Keyword Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
433 434 435
            kwargs (unicode):
                 a series of key, value pair where the key is the name of a
                 publications database field.
436

437
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
438 439 440 441 442
            tuple:
                ``(id, status)`` which contains the ``id`` of the record.
                The ``id`` is equal to ``None`` when there is no matching.
                The ``status`` is equal to one when the existing record was
                modified zero otherwise.
443 444 445

        """
        if self.dbg:
446
            print("get existing record by fields")
447

448
        # alias
449
        db = self.db
450
        logs = self.logs
451

452 453 454
        # add the publication year to search criteria
        if year:
            kwargs["year"] = year
455 456 457 458 459 460 461

        # look for an existing record
        rec_id = get_id(db.publications, **kwargs)
        if not rec_id:
            return (None, 0)

        # fix origin field
462 463
        publication = db.publications[rec_id]
        ok = publication.origin and publication.origin == oai_url
464 465
        if not ok:
            if not self.dry_run:
466
                publication = dict(origin=oai_url)
467

468
            logs[-1].modify(MSG_FIX_ORIGIN, year)
469 470
            return (rec_id, 1)

471
        logs[-1].idle(MSG_IN_DB, year)
472 473
        return (rec_id, 0)

474 475
    def insert_record(self, record):
        """Insert the record in the database.
476

477 478 479
        Note:
            This method depend on the type of publications.
            It has to be implemented for each inherited class.
480

481
        Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
482 483
            record (Record):
                record describing the publication.
484

485
        Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
486 487
            int:
                one when the record is inserted / updated in the database,
488
                zero otherwise.
489 490 491 492

        """
        return 0

LE GAC Renaud's avatar
LE GAC Renaud committed
493 494 495
    def process_collection(self, collection):
        """"Retrieve JSON objects from the invenio store and for the given
        collection. Corresponding records are inserted in the database.
496

497
        Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
498 499
            collection (unicode):
                name of the collection to be interrogated.
500

501 502 503 504
        Note:
            * Design to never stop although exceptions are raised
            * Have a look to the collection_logs and logs in order to
              understand what happen.
505 506 507

        """
        if self.dbg:
LE GAC Renaud's avatar
LE GAC Renaud committed
508
            print("\nprocess collection", collection)
509 510

        # alias
511
        collection_logs = self.collection_logs
512
        controller = self.controller
LE GAC Renaud's avatar
LE GAC Renaud committed
513
        host = self.harvester.host
514
        project = self.db.projects[self.id_project].project
LE GAC Renaud's avatar
LE GAC Renaud committed
515
        store = self.store
516

517 518
        # log collection information
        # A collection is identified as "Project Controller collection"
LE GAC Renaud's avatar
LE GAC Renaud committed
519 520
        ctitle = "%s / %s / %s" % (project, controller, collection)
        collection_logs.append(MsgCollection(title=ctitle))
521

LE GAC Renaud's avatar
LE GAC Renaud committed
522
        # get search parameters for the collection including user criteria
523
        kwargs = self._search_parameters(collection)
524

LE GAC Renaud's avatar
LE GAC Renaud committed
525
        # get the list of record identifier matching the search criteria
526 527
        try:
            rec_ids = store.get_ids(**kwargs)
528

LE GAC Renaud's avatar
LE GAC Renaud committed
529
        except CdsException as error:
530 531 532
            collection_logs[-1].url = store.last_search_url()
            collection_logs[-1].error = error
            return
533

LE GAC Renaud's avatar
LE GAC Renaud committed
534
        # log the number of record found for the collection
535 536
        collection_logs[-1].url = store.last_search_url()
        collection_logs[-1].found = len(rec_ids)
537

LE GAC Renaud's avatar
LE GAC Renaud committed
538 539 540
        if len(rec_ids) == 0:
            if self.dbg:
                print("\tNo records found in %s" % collection)
541
            return
542

543
        if self.dbg:
LE GAC Renaud's avatar
LE GAC Renaud committed
544
            print("\t%i records found in %s" % (len(rec_ids), collection))
545

LE GAC Renaud's avatar
LE GAC Renaud committed
546 547 548 549
        # remove form the list identifier already registered in the data base
        # and log them
        func = self._is_record_in_db
        rec_ids = [el for el in rec_ids if func(ctitle, host, el) == 0]
550

LE GAC Renaud's avatar
LE GAC Renaud committed
551 552 553 554 555 556 557 558 559 560 561
        # process the remaining identifiers
        [self.process_recid(rec_id) for rec_id in rec_ids]

    def process_recid(self, rec_id):
        """Process the publication:

            * get the publication data from the store using its identifier
            * instantiate the record (RecordPubli, REcordConf, RecordThesis)
            * process OAI data
            * check the record
            * insert new record in the database
562

563 564 565 566 567
        Note:
            * Design to never stop although exception are raised
            * Have a look to the collection_logs and logs in order to
              understand what happen.

568
        Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
569 570
            rec_id (int):
                identifier of the publication in the store.
571

LE GAC Renaud's avatar
LE GAC Renaud committed
572
        """
573
        if self.dbg:
LE GAC Renaud's avatar
LE GAC Renaud committed
574
            print("\nprocessing record", rec_id)
575

LE GAC Renaud's avatar
LE GAC Renaud committed
576 577
        collection_logs = self.collection_logs
        harvester = self.harvester
578 579
        logs = self.logs

LE GAC Renaud's avatar
LE GAC Renaud committed
580 581 582 583
        # instantiate the record
        try:
            recjson = self.store.get_record(rec_id)
            record = build_record(recjson)
584 585

            if self.dbg:
LE GAC Renaud's avatar
LE GAC Renaud committed
586
                print("\t", record.title())
587

LE GAC Renaud's avatar
LE GAC Renaud committed
588 589 590 591 592 593 594 595
        except Exception as e:
            print(traceback.format_exc())
            url = OAI_URL % (harvester.host, rec_id)
            logs.append(Msg(harvester=harvester,
                            collection=collection_logs[-1].title,
                            record_id=rec_id,
                            title=url))
            logs[-1].reject(e)
LE GAC Renaud's avatar
LE GAC Renaud committed
596
            return
LE GAC Renaud's avatar
LE GAC Renaud committed
597 598 599 600 601 602 603 604 605 606

        # start the log for the record
        logs.append(Msg(harvester=harvester,
                        collection=collection_logs[-1].title,
                        record_id=record.id(),
                        title=record.title()))

        # check that the record is well formed
        # repair non-conformity as far as possible
        if not self.check_record(record):
607
            if self.dbg:
LE GAC Renaud's avatar
LE GAC Renaud committed
608 609 610 611 612
                print("\trecord rejected", logs[-1].txt)
                return

        if self.dbg:
            print("\tinsert record in the database")
613

LE GAC Renaud's avatar
LE GAC Renaud committed
614 615
        # insert the record in the database
        self.insert_record(record)
616

LE GAC Renaud's avatar
LE GAC Renaud committed
617 618 619 620 621 622 623 624 625 626
        if self.dbg:
            log = logs[-1]
            action = log.action
            action = (action.upper() if isinstance(action, str) else action)
            print("\tlog:", action, log.txt)

    def process_url(self, host, collections):
        """Retrieve JSON objects from the invenio store and
        insert corresponding records in the database.

627 628 629 630 631
        Note:
            * Design to never stop although exceptions are raised
            * Have a look to the collection_logs and logs in order to
              understand what happen.

LE GAC Renaud's avatar
LE GAC Renaud committed
632 633 634 635 636 637 638
        Args:
            host (unicode):
                host name to query for publications, either
                ``cds.cern.ch`` or ``inspirehep.net``.

            collections (unicode):
                list of collection to be interrogated.
639
                Collections are separated by a comma.
LE GAC Renaud's avatar
LE GAC Renaud committed
640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656

        """
        if self.dbg:
            print("process URL search")

        # extend harvester for logs
        self.harvester.host = host
        self.harvester.collections = collections

        # instantiate the store
        self.store = InvenioStore(host)

        # list of collections
        collections = re.sub(" *, *", ",", collections).split(",")

        # process
        [self.process_collection(collection) for collection in collections]
657 658 659 660

    def report(self):
        """Build the processing report.

661 662
        Returns:
            dict:
LE GAC Renaud's avatar
LE GAC Renaud committed
663 664
                * ``collection_logs`` list of :class:`MsgCollection`
                * ``controller`` unicode
665
                * ``logs`` list of :class:Msg
LE GAC Renaud's avatar
LE GAC Renaud committed
666
                * ``selector`` :class:`plugin_dbui.Selector`
667 668 669 670 671 672

        """

        return dict(collection_logs=self.collection_logs,
                    controller=self.controller,
                    logs=self.logs)