report_tools.py 23.3 KB
Newer Older
1 2 3 4
"""report_tools module

"""
import json
5
import matplotlib as mpl
6
import pandas as pd
7

8

9 10
from .dataframes import to_extjs_gridcolumns
from .event import Event
11 12
from gluon import current
from gluon.storage import Storage
13
from .model_report import AGGREGATE_FUNCS
14
from matplotlib_tools import ticks_and_labels
15
from plugin_dbui import Selector, Store
16
from StringIO import StringIO
17 18 19 20 21 22 23


FIELD_TYPES = {"gridcolumn": "string",
               "booleancolumn": "boolean",
               "datecolumn": "date",
               "numbercolumn": "float"}

24 25 26
GROUPING = {"ftype": "grouping",
            "groupHeaderTpl": "{name}"}

27 28 29 30
GROUPING_SUMMARY = {"ftype": "groupingsummary",
                    "groupHeaderTpl": "{name}",
                    "startCollapsed": False}

31
MSG_NO_DATAINDEX = "The property dataIndex is missing."
32
MSG_NO_KIND = "The graph parameter kind is missing."
33 34
MSG_NO_XTYPE = "The property xtype is missing."

35 36
SUMMARY = {"ftype": "summary"}

37 38 39 40 41

class ReportException(BaseException):
    pass


42
class BaseReport(object):
43 44 45 46 47
    """Base class to build reports.

    Instantiate the DataFrame either using the config option or the
    source name. In both user criteria are applied. They are extracted
    by the selector from the requested variables.
48 49

    Args:
50 51 52 53 54
        config (gluon.storage.Storage): the report configuration.
            The argument ``source_name`` has to be defined when
            the report configuration is empty.
        source_name (str): name of the source uses to generate the DataFrame.
            Useful when it is not defined in the report configuration.
55

LE GAC Renaud's avatar
LE GAC Renaud committed
56 57 58 59 60 61 62
    **Public attributes**:
        * **config** (*gluon.dal.Row*): configuration for the report
        * **df** (*pandas.DataFrame*): data used by the report
        * **selector** (*plugin_dbui.Selector*): user criteria
        * **store** (*dict*): configuration of the ``Ext.data.Store``
        * **grid** (*dict*): configuration of the ``Ext.grid.Panel`` widget

63
    """
64
    def __init__(self, config=Storage(), selector=Storage(), source_name=None):
65 66 67 68 69

        # ....................................................................
        #
        # Get the user selection criteria
        #
70
        self.selector = selector
71 72 73

        # ....................................................................
        #
74
        # Get the configuration
75
        #
76
        self.config = config
77 78 79 80 81

        # ....................................................................
        #
        # Fill the the DataFrame
        #
82 83 84 85
        criteria = Storage(selector.as_dict())

        if config.id_events not in (None, ""):
            criteria.id_events = config.id_events
86

87 88 89
        name = (source_name if config.source is None else config.source)

        func = Event.get_source(name)
90 91 92 93 94 95 96 97 98 99 100 101 102 103

        self.df = df = func(**criteria)

        if not df.empty and config.eval not in (None, ""):
            df.eval(config.eval, inplace=True)

        if not df.empty and config.query not in (None, ""):
            df.query(config.query, inplace=True)

        if not df.empty and selector["query"] not in (None, ""):
            df.query(selector["query"], inplace=True)

        # ....................................................................
        #
LE GAC Renaud's avatar
LE GAC Renaud committed
104
        # Hook for Ext JS store and grid.Panel configurations
105
        #
LE GAC Renaud's avatar
LE GAC Renaud committed
106 107
        self.store = None
        self.grid = None
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133

    def get_grid_configuration(self):
        """Get the configuration for the ``Ext.grid.Panel``.

        Returns:
            gluon.storage.Storage:
                the keys are ``columns`` and ``features`` and the
                corresponding values are list of dictionary.

        """
        return self.grid

    def get_store_configuration(self):
        """Get the configuration for the ``Ext.data.Store``.

        Note:
            The name of the ``Ext.data.Field`` is extract from the column
            definition of the list. it matches the name of the columns in
            the DataFrame.

        Returns:
            plugin_dbui.Store: the configuration of the ``Ext.data.Store``.

        """
        return self.store

134 135
    def get_title(self):
        """Build the report title.
136

137 138
        Returns:
            str:
139

140 141
        """
        T = current.T
142
        db = current.db
143

144 145
        config = self.config
        selector = self.selector
146

147 148
        # from the configuration
        title = (config.title if config.title else config.name)
149

150 151
        # add meta data
        metadata = []
152 153 154
        if selector.id_domains:
            metadata.append(db.domains[selector.id_domains].domain)

155 156
        if selector.id_teams:
            metadata.append(db.teams[selector.id_teams].team)
157

158 159
        if selector.id_projects:
            metadata.append(db.projects[selector.id_projects].project)
160

161 162
        if selector.category:
            metadata.append(selector.category)
163

164 165 166
        if selector.id_people_categories:
            code = db.people_categories[selector.id_people_categories].code
            metadata.append(code)
167

168 169 170
        # add period
        if selector.year_start and not selector.year_end:
            metadata.append(str(selector.year_start))
171

172 173 174
        elif selector.year_start and selector.year_end:
            years = (str(selector.year_start), str(selector.year_end))
            metadata.append(T("from %s to %s", lazy=False) % years)
175

176
        return "%s: %s" % (title, " / ".join(metadata))
177 178


179
class Graph(object):
180 181 182 183 184 185
    """Transform a DataFrame into a graph.

    The rendering is performed by the matplotlib library. Therefore, many
    representations of the data are possible: plot, histogram, bar charts,
    error charts, scatter plots, *etc*.

186 187 188
    Attributes:
        ax (matplotlib.axes or None)

189
    Args:
190 191
        metric2d (Metrics2D2): the data to be plotted.

192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
        config (gluon.storage.Storage):
            the configuration for the graph.
            This is a dictionary with the keyword arguments of the
            ``pandasDataFrame.plot`` method.

        backend (str):
            the name of the matplotlib back-end uses to produce figure.
            The X11 back end is not needed when running on a server.
            In addition Tkinter crash with the message "out of stack space"
            when the 2nd plot is generated. The documentation recommend to
            limit the matplotlib back end to ``Agg`` which is tuned to render
            high quality PNG image. But, it is also design to render PDF
            and SVG image without the X11 interface.

    """
207 208
    def __init__(self, metric2d, backend="Agg"):

209
        # configuration
210
        config = metric2d.config
211

212
        selector = metric2d.selector
213
        selector.graph = json.loads(selector.graph)
214 215

        config_graph = (selector.graph if selector.is_graph else config.graph)
216

217
        if config_graph is None or config_graph["kind"] in (None, ""):
218 219 220
            self.ax = None
            return

221
        kind = config_graph["kind"]
222 223 224
        if selector.is_graph and kind in (None, ""):
            raise ReportException(MSG_NO_KIND)

225 226 227 228 229 230
        # alias
        group_field_x = config.group_field_x
        group_field_y = config.group_field_y
        metric_field_z = config.metric_field_z

        # decode ylim
231
        ylim = config_graph["ylim"]
232
        if len(ylim) == 0:
233
            del config_graph["ylim"]
234

235
        else:
236
            config_graph["ylim"] = (int(el) for el in ylim.split(","))
237

238
        # setup the matplotlib back-end
239
        mpl.use(backend)
240
        mpl.rcParams['legend.fontsize'] = 7
241

242 243 244 245 246 247
        # setup matplotlib style
        mpl.rcParams['xtick.major.size'] = 8
        mpl.rcParams['xtick.minor.size'] = 4
        mpl.rcParams['ytick.major.size'] = 8
        mpl.rcParams['ytick.minor.size'] = 4

248
        # select the tool to render the plot
249
        if kind in ("bar", "barh"):
250
            func = Graph.bar_like
251 252

        elif kind in ("area", "line"):
253
            func = Graph.line_like
254

255 256 257 258 259 260 261 262 263 264 265 266 267 268
        # produce the plot without summary rows or columns
        self.ax = func(metric2d.df0,
                       group_field_x,
                       group_field_y,
                       metric_field_z,
                       config_graph)

    @staticmethod
    def bar_like(df,
                 group_field_x,
                 group_field_y,
                 metric_field_z,
                 config_graph):
        """Produce bar, barh plot
269

270 271 272 273 274 275
        Args:
            df (pandas.DataFrame):
            group_field_x (str):
            group_field_y (str):
            metric_field_z (str):
            config_graph (dict): keyword argument of the DataFrame.plot method
276

277 278
        Returns:
            matplotlib.axes or None
279

280
        """
281 282 283 284 285 286 287 288 289 290
        if group_field_y in df.columns:
            ax = (df
                  .set_index(group_field_y)
                  .transpose()
                  .plot(**config_graph))

        else:
            ax = (df
                  .transpose()
                  .plot(**config_graph))
291 292 293 294

        # tick and axis labels
        xlabel, ylabel = group_field_x, metric_field_z

295
        if config_graph["stacked"]:
296 297
            ylabel += " (stacked)"

298
        if config_graph["kind"] == "barh":
299 300 301
            xlabel, ylabel = ylabel, xlabel

        ticks_and_labels(ax, xlabel, ylabel)
302

303 304 305 306
        # legend
        legend = ax.get_legend()
        if legend is not None:
            ax.get_legend().set_title("")
307

308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
        return ax

    @staticmethod
    def line_like(df,
                  group_field_x,
                  group_field_y,
                  metric_field_z,
                  config_graph):
        """Produce area, line plot

        Args:
            df (pandas.DataFrame):
            group_field_x (str):
            group_field_y (str):
            metric_field_z (str):
            config_graph (dict): keyword argument of the DataFrame.plot method

        Returns:
            matplotlib.axes or None

        """
        # clean the configuration of the graph
330
        del config_graph["width"]
331 332

        # protection
333 334
        linewidth = config_graph["linewidth"]
        config_graph["linewidth"] = (1 if linewidth == 0 else linewidth)
335

336 337
        # graph
        df1 = df.transpose()
338 339 340 341

        if group_field_y in df.columns:
            df1.columns = df1.loc[group_field_y, :]
            df1 = df1[df1.index != group_field_y]
342

343
        ax = df1.plot(**config_graph)
344

345
        # tick and axis labels
346 347
        xlabel, ylabel = group_field_x, metric_field_z

348
        if config_graph["stacked"]:
349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365
            ylabel += " (stacked)"

        if group_field_x == "year":
            ax.xaxis.set_major_formatter(mpl.ticker.FormatStrFormatter("%i"))

        if group_field_y == "year":
            ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter("%i"))

        ticks_and_labels(ax, xlabel, ylabel)

        # legend
        legend = ax.get_legend()
        if legend is not None:
            ax.get_legend().set_title("")

        return ax

366 367 368 369 370 371 372
    def _savefig(self, fmt):
        """Save the figure as a string.

        Args:
            fmt (str): possible values are pdf, png and svg.

        """
373 374 375
        if self.ax is None:
            return ""

376 377
        fig = self.ax.get_figure()

378 379 380 381
        fi = StringIO()
        fig.savefig(fi, format=fmt)
        data = fi.getvalue()
        fi.close()
382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415

        fig.clear()
        mpl.pyplot.close(fig)

        return data

    def to_pdf(self):
        """Encode the graph using the PDF format

        Returns:
            str:

        """
        return self._savefig("pdf")

    def to_png(self):
        """Encode the graph using the PNG format.

        Returns:
            str:

        """
        return self._savefig("png")

    def to_svg(self):
        """Encode the graph using the SVG format.

        Returns:
            str:

        """
        return self._savefig("svg")


416
class List(BaseReport):
417 418 419 420 421 422 423 424 425 426 427 428 429
    """Generate the list and related configurations for ``Ext.grid.Panel``
    and ``Ext.data.Store``.

    Args:
        id_list: report identifier in the database

    Raises:
        ReportException:
            when the errors are found in the configuration of the grid columns.

    """
    def __init__(self, id_list):

430 431
        db = current.db
        virtdb = current.virtdb
432 433 434 435 436

        BaseReport.__init__(
            self,
            config=db.lists2[id_list],
            selector=Selector(virtdb.selector_list))
437 438 439

        # ....................................................................
        #
440
        # Get the configuration for the grid columns and check it
441
        #
442
        config = self.config
443 444 445 446 447 448 449 450

        grid_columns = [Storage(el) for el in json.loads(config.columns)]
        [self._check_column(el) for el in grid_columns]

        # ....................................................................
        #
        # Configure Ext JS store
        #
LE GAC Renaud's avatar
LE GAC Renaud committed
451
        self.store = store = Store(data=[], fields=[], sorters=[])
452 453 454 455 456 457 458 459 460 461 462 463 464

        [self._add_store_fields(col) for col in grid_columns]
        [self._add_store_sorter(sorter) for sorter in config.sorters]

        store.groupField = config.group_field

        # add the data to the store
        #
        # extract the list of records as a JSON-string
        # at this stage date/time are converted as an ISO8601 string
        #
        # convert the JSON-string into a list
        #
465 466
        df = self.df

467 468 469 470 471 472 473 474
        if not df.empty:
            data = df.to_json(orient="records", date_format="iso")
            store.data = json.loads(data)

        # ....................................................................
        #
        # Configure Ext JS grid
        #
LE GAC Renaud's avatar
LE GAC Renaud committed
475
        self.grid = grid = Storage(columns=[], features=[])
476
        grid.columns = grid_columns
477

478
        if len(config.group_field) > 0:
479 480

            if config.summary_group:
481
                grid.features.append(GROUPING_SUMMARY)
482
            else:
483
                grid.features.append(GROUPING)
484

485
        if config.summary_all:
486 487
            grid.features.append(SUMMARY)

488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
    def _add_store_fields(self, column):
        """Add ``Ext.data.Field`` to the store configuration
        It is derived from the ``Ext.grid.Column`` configuration.

        Args:
            column (gluon.storage.Storage):
                configuration for ``Ext.grid.Column``

        """
        # protection against rownumberer column
        if not column.dataIndex:
            return

        cfg = Storage(name=column.dataIndex,
                      type=FIELD_TYPES[column.xtype])

        self.store.fields.append(cfg)

    def _add_store_sorter(self, field):
        """Add sorter to the store configuration.

        Args:
            field (str): name of the DataFrame column either foo or ~foo.

        """
        sorter = Storage(property=field.replace("~", ""))

        if field.startswith("~"):
            sorter.direction = "DESC"

        self.store.sorters.append(sorter)

    def _check_column(self, column):
        """Check column configuration:

            - Raise an exception if xtype is not defined
            - Raise an exception if dataIndex is not defined

        Args:
            column (gluon.storage.Storage):

        """
        T = current.T

        xtype = column.xtype
        if not xtype:
            raise ReportException(T(MSG_NO_XTYPE))

        dataIndex = column.dataIndex
        if (xtype != "rownumberer" and dataIndex is None):
            raise ReportException(T(MSG_NO_DATAINDEX))

540

541
class Metric2D(BaseReport):
542 543 544 545 546 547
    """Generate the Metric2D and related configurations for ``Ext.grid.Panel``
    and ``Ext.data.Store``.

    Args:
        id_report: report identifier in the database

LE GAC Renaud's avatar
LE GAC Renaud committed
548 549 550 551 552
    **Public attributes**:
        * **df** (*pandas.DataFrame*): metric with summary rows and columns
        * **df0** (*pandas.DataFrame*): metric without summary rows and columns
        * **is_summary** (*bool*): summary rows or columns are there

553 554 555 556 557 558 559
    Raises:
        ReportException:
            when the errors are found in the configuration of the grid columns.

    """
    def __init__(self, id_report):

560 561
        db = current.db
        virtdb = current.virtdb
562 563 564 565 566

        BaseReport.__init__(
            self,
            config=db.metrics2d2[id_report],
            selector=Selector(virtdb.selector_metric2d))
567

568 569
        self.is_summary = False

570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585
        # ....................................................................
        #
        # build the 2D table
        #
        config = self.config

        field_x = config.group_field_x
        field_y = config.group_field_y
        field_z = config.metric_field_z

        aggregate_func_z = AGGREGATE_FUNCS[config.aggregate_func_z]

        df = (self.df
              .groupby([field_x, field_y])
              .agg({field_z: aggregate_func_z})
              .unstack(level=0)
586 587
              .fillna(0)
              .sort_index())
588 589 590 591 592 593 594

        if isinstance(df.columns, pd.MultiIndex):
            df.columns = df.columns.get_level_values(1)

        if isinstance(df.index, pd.MultiIndex):
            df.index = df.index.get_level_values(1)

595 596
        # add summaries for rows and columns
        df = self._add_summaries(df)
597

598
        # move the index to column in order to ease the data extraction
599
        # update the attribute df for a later use
600 601 602
        self.df = df = df.reset_index()
        if self.is_summary:
            self.df0 = self.df0.reset_index()
603

604 605 606 607
        # ....................................................................
        #
        # Configure Ext JS store
        #
LE GAC Renaud's avatar
LE GAC Renaud committed
608
        self.store = store = Store(data=[], fields=[], sorters=[])
609 610

        for col in df.columns:
611
            di = {"name": col}
612 613 614 615 616 617 618 619 620 621 622
            di["type"] = ("string" if col == field_y else "number")
            store.fields.append(di)

        if not df.empty:
            data = df.to_json(orient="records", date_format="iso")
            store.data = json.loads(data)

        # ....................................................................
        #
        # Configure Ext JS grid
        #
LE GAC Renaud's avatar
LE GAC Renaud committed
623
        self.grid = grid = Storage(columns=[], features=[])
624

625 626 627
        # numberer the row
        grid.columns.append({"xtype": "rownumberer"})

628 629 630 631 632 633
        # the first column is field_y
        grid.columns.append({"text": field_y.title(),
                             "dataIndex": field_y,
                             "flex": 0.8,
                             "xtype": "gridcolumn"})

LE GAC Renaud's avatar
LE GAC Renaud committed
634
        # add the remaining columns
635 636 637 638
        for col in df.columns:
            if col == field_y:
                continue

639
            name = col
640 641 642 643 644 645
            grid.columns.append({"text": name,
                                 "dataIndex": name,
                                 "align": "right",
                                 "flex": 0.5,
                                 "formatter": "round(2)",
                                 "xtype": "numbercolumn"})
646 647 648 649

    def _add_summaries(self, df):
        """Add summaries rows and/or columns

650
        A summary is a sum of all the element in a row or in a column.
651 652 653 654 655 656 657 658 659 660 661 662 663 664 665
        Several functions can be applied to count or to compute basic
        estimators like mean, median or standard deviation...

        Summaries are defined in the configuration of the report.
        It is superseded by the summaries defined in the selector.

        Args:
            df (pandas.DataFrame)

        Returns:
            pandas.DataFrame
        """
        config = self.config
        selector = self.selector

666 667 668
        summary_x = (
            json.loads(selector.summary_x) if selector.is_summary_x else
            config.summary_x)
669

670 671 672
        summary_y = (
            json.loads(selector.summary_y) if selector.is_summary_y else
            config.summary_y)
673

674 675
        # preserve the original DataFrame in order to avoid pollution
        # when series of summaries is computed
676
        self.df0 = df
677 678

        # add summary rows and or columns
679
        self.is_summary = is_summary = \
680 681 682 683
            (summary_x is not None and len(summary_x["functions"]) > 0) or \
            (summary_y is not None and len(summary_y["functions"]) > 0)

        if is_summary:
684
            self.df0 = df0 = df.copy()
685

686 687
            dfx = self.summaries(df0, summary_x, axis="columns")
            df = pd.concat((df, dfx), axis="columns")
688

689
            if isinstance(df.index, pd.CategoricalIndex):
690
                df.index = df.index.astype(str)
691

692 693
            dfy = self.summaries(df0, summary_y, axis="rows")
            df = pd.concat((df, dfy))
694

695
        return df
696

697 698 699 700
    def summaries(self, df, config, axis="columns"):
        """Compute the summary per row/column of the DataFrame according to
        the configuration parameters and return results in as a separated
        DataFrame. The input DataFrame is not modified.
701

702
        Typical summaries are sum, count, mean, median, standard deviation, ...
703

704 705 706
        Args:
            df (pandas.DataFrame):
            config (dict): configuration parameters for summaries:
707

708 709 710 711 712
                    * functions (str):
                        - string of values separated by comma
                        - list JSON-encoded

                    * labels (str): string of values separated by comma
713

714 715 716 717 718 719 720 721
            axis (str): either ``rows`` of ``columns``.

        Returns:
            pandas.DataFrame:
                contains only the summary data.
                The name of the columns are those defined in the labels.
                They are equal to the name of functions when labels
                are not defined.
722

723 724 725
        """
        df1 = pd.DataFrame()

726
        funcs = config["functions"]
727 728
        if funcs in ("", '""', "''", '[]'):
            return df1
729

730
        if isinstance(funcs, str):
731
            funcs = funcs.split(",")
732

733
        labels = config["labels"]
734 735 736
        if labels in ("", '""', "''", "[]"):
            labels = funcs
        else:
737
            labels = labels.split(",")
738

739 740
        if len(labels) != len(funcs):
            labels = funcs
741

742 743 744 745 746 747 748 749 750 751 752 753 754
        for i, fname in enumerate(funcs):
            func = AGGREGATE_FUNCS[fname]
            label = labels[i]

            # summary per row is added as a new column
            if axis == "columns":
                df1[label] = df.apply(func, axis=axis)

            # summary per column is added as a new row
            else:
                serie = df.apply(func, axis="rows")
                dfy = pd.DataFrame(serie, columns=[label])
                df1 = pd.concat((df1, dfy.T))
755

756
        return df1
757 758 759 760 761 762 763 764 765 766 767 768 769 770 771


class Source(BaseReport):
    """Transform the content of the source into configuration for
    an ``Ext.grid.Panel`` and ``Ext.data.Store`` objects.

    Args:
        name: name of the source

    Raises:
        ReportException:

    """
    def __init__(self, name):

772
        virtdb = current.virtdb
773 774 775 776 777

        BaseReport.__init__(
            self,
            source_name=name,
            selector=Selector(virtdb.selector_source))
778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812

        df = self.df
        self.source_name = name

        # ....................................................................
        #
        # Get the configuration for the Ext.grid.Panel
        #
        self.grid = grid = Storage(columns=to_extjs_gridcolumns(df, meta=True))

        # ....................................................................
        #
        # Get the configuration for the Ext.data.Store
        #
        self.store = store = Store(data=[], fields=[], sorters=[])

        for col in grid.columns:

            if "dataIndex" not in col:
                continue

            cfg = Storage(name=col["dataIndex"],
                          type=FIELD_TYPES[col["xtype"]])

            store.fields.append(cfg)

        if not df.empty:
            data = df.to_json(orient="records", date_format="iso")
            store.data = json.loads(data)

    def get_title(self):

        T = current.T
        self.config.title = "%s %s" % (T("Source"), self.source_name)
        return BaseReport.get_title(self)