"""A collection of tools to build graphs in controllers. """ import matplotlib as mpl import pandas as pd from gluon import current from io import StringIO from .model_selector import MONTH_SUBMIT, YEAR_PUBLISH from pandas import DataFrame, DatetimeIndex, to_datetime DATE_PUB = "Publication date" DATE_SUB = "Submission date" FROM_TO = "from %s to %s" LABELY = "Number of publications" LABELY_CUM_MONTH = "Cumulative sum of publications / month" LABELY_CUM_YEAR = "Cumulative sum of publications / year" LABELY_MONTH = "Number of publications / month" LABELY_YEAR = "Number of publications / year" TITLE = "Publications" TITLE_CUMSUM = "Cumulative sum of publications" PUBLICATIONS_QUERY_FIELDS = [ "Graph_selectorAuthor", "Graph_selectorId_authors_roles", "Graph_selectorId_categories", "Graph_selectorId_fundings", "Graph_selectorId_projects", "Graph_selectorId_teams", "Graph_selectorYear_end", "Graph_selectorYear_start"] def db2df(db, query, fields=[], columns=None): """Transform a database query into a dataframe. Example:: # database query query = history.id_events == id_event query &= history.id_projects == id_project query &= history.id_fundings == db.fundings.id query &= history.id_people == db.people.id query &= history.id_people_categories == db.people_categories.id query &= history.id_teams == db.teams.id # fields to be extracted from the database fields = [ history.id_domains, history.id_events, history.id_people, history.id_projects, history.id_teams, db.teams.team, db.people_categories.category, db.people_categories.code, db.people.first_name, db.people.last_name, history.percentage, history.start_date, history.end_date, history.data, db.fundings.agency] # get the DataFrame df = db2df(query, fields) print df.info() Args: db (pyDAL.DAL): database connection. query: database query including statement to join foreign table. fields (list): list of pyDAL.Field to be extracted from the query. All fields are extracted when not defined. columns (list): name of the columns in the dataframe. There is one to one correspondence between the fields and columns list. Names of database field are used when columns is not defined. Returns: pandas.DataFrame """ if columns is None and len(fields) > 0: columns = [field.name for field in fields] rows = db.executesql(db(query)._select(*fields)) return pd.DataFrame.from_records(list(rows), columns=columns) def emptychart(db, selector, target=None): """Empty graph as a function of month. Args: db (gluon.DAL): database connection selector (plugin_dbui.Selector): selector with user critera target (matplotlib.AxesSubplot): an existing axes useful when working with subplot. Returns: matplotlib.AxesSubplot: axes subplot """ T = current.T year_start = selector.Graph_selectorYear_start year_end = selector.Graph_selectorYear_end if year_start and not year_end: dates = to_datetime(["%s-01-01" % year_start, "%s-12-31" % year_start]) elif year_start and year_end: dates = to_datetime(["%s-01-01" % year_start, "%s-12-31" % year_end]) else: year = current.request.now.year dates = to_datetime(["%i-01-01" % year, "%i-12-31" % year]) df = DataFrame([0]*2, index=dates) ax = df.plot(legend=False, grid=True, x_compat=True, ax=target) # x and y ticks ax.minorticks_on() ax.tick_params(which="major", length=8) ax.tick_params(which="minor", length=4) # x and y labels xlabel = T(DATE_SUB).decode("utf-8") ax.set_xlabel(xlabel, x=1, horizontalalignment="right") ylabel = T(LABELY).decode("utf-8") ax.set_ylabel(ylabel, y=1, horizontalalignment="right") # main title of the graph set_title(ax, db, selector) return ax def mplstyle(): """Helper function to define matplotlib style for plots. """ mpl.rcParams["font.size"] = 8 mpl.rcParams["grid.alpha"] = 0.5 mpl.rcParams["grid.linestyle"] = "dotted" mpl.rcParams["xtick.direction"] = "in" mpl.rcParams["xtick.top"] = True mpl.rcParams["xtick.bottom"] = True mpl.rcParams["xtick.major.size"] = 8 mpl.rcParams["xtick.minor.size"] = 4 mpl.rcParams["xtick.labelsize"] = 8 mpl.rcParams["ytick.direction"] = "in" mpl.rcParams["ytick.left"] = True mpl.rcParams["ytick.right"] = True mpl.rcParams["ytick.major.size"] = 8 mpl.rcParams["ytick.minor.size"] = 4 mpl.rcParams["ytick.labelsize"] = 8 def linechart(db, selector, target=None, title=None, xlabel=None, ylabel=None): """Build a line chart showing the number of publications as a function of the time. Args: db (pydal.DAL): database connection selector (Storage): value returned by the graph selector. target (matplotlib.AxesSubplot): an existing axes useful when working with subplot. title (bool): define from the user criteria when None. xlabel (bool): define automatically when None. ylabel (bool): define automatically when None. Returns: matplotlib.AxesSubplot: axes subplot """ T = current.T publications = db.publications # ........................................................................ # # variables depending on the time axis # time_axis = selector.Graph_selectorTime is_submit_month = time_axis == T(MONTH_SUBMIT) is_publish_year = time_axis == T(YEAR_PUBLISH) to_time = (submitted2YM if is_submit_month else submitted2Y) to_time = (published2Y if is_publish_year else to_time) # ........................................................................ # # instantiate the DataFrame # query = query_publications(db, is_publish_year, **selector) fields = [ publications.year, publications.submitted] columns = ["published", "submitted"] df = (db2df(db, query, fields, columns) .assign( axis=1, time=to_time) .drop(["published", "submitted"], axis="columns") .groupby(["time", "axis"]) .size() .unstack(level=1) .fillna(0)) if df.empty: return emptychart(db, selector) # post processing is_cumsum = selector.Graph_selectorCumulative == "true" if is_cumsum: df = df.cumsum() # ........................................................................ # # instantiate the graph # if is_submit_month: df.index = DatetimeIndex(df.index) if df.size == 1: kwargs = dict(color="lavender", kind="bar", edgecolor="blue", rot=0) else: kwargs = dict(colormap="Pastel1", kind="line", rot=35, x_compat=True) ax = df.plot(grid=True, legend=False, ylim=(0, None), ax=target, **kwargs) # x and y ticks ax.minorticks_on() ax.tick_params(which="major", length=8) ax.tick_params(which="minor", length=4) # x label if xlabel is None: xlabel = T(DATE_PUB if is_publish_year else DATE_SUB).decode("utf-8") ax.set_xlabel(xlabel, x=1, horizontalalignment="right") # y label if ylabel is None: if is_cumsum and is_submit_month: ylabel = LABELY_CUM_MONTH elif is_cumsum: ylabel = LABELY_CUM_YEAR elif is_submit_month: ylabel = LABELY_MONTH else: ylabel = LABELY_YEAR ylabel = T(ylabel).decode("utf-8") ax.set_ylabel(ylabel, y=1, horizontalalignment="right") # main title of the graph if title is None: set_title(ax, db, selector) else: ax.set_title(title) return ax def query_publications(db, is_publish, **kwargs): """Construct the query to select publications according to user criteria, team, project, year, etc. The operator AND is applied between conditions. The keyword dictionary is the request.vars coming from the graph_selector. Args: db (gluon.dal.DAL): database connection. is_publish (bool): select using either published year or submitted one. Keyword Args: Graph_selectorAuthor (str): Graph_selectorId_authors_roles (str): Graph_selectorId_categories (str): Graph_selectorId_fundings (str): Graph_selectorId_projects (str): Graph_selectorId_teams (str): Graph_selectorYear_end (str): Graph_selectorYear_start (str): * other keywords are ignored Returns: gluon.DAL.Query """ di = {k: kwargs[k] for k in PUBLICATIONS_QUERY_FIELDS if k in kwargs} publications = db.publications query = None year_end = di["Graph_selectorYear_end"] year_start = di["Graph_selectorYear_start"] if is_publish: if year_start and not year_end: query = publications.year == int(year_start) elif year_start and year_end: q_start = publications.year >= int(year_start) q_end = publications.year <= int(year_end) query = ((q_start) & (q_end)) elif year_end and not year_start: query = publications.year <= int(year_end) else: if year_start and not year_end: query = publications.submitted[0:4] == year_start elif year_start and year_end: q_start = publications.submitted[0:4] >= year_start q_end = publications.submitted[0:4] <= year_end query = ((q_start) & (q_end)) elif year_end and not year_start: query = publications.submitted[0:4] <= year_end for k, v in di.items(): if k in ("Graph_selectorYear_start", "Graph_selectorYear_end"): continue if v in ('', None): continue if k == "Graph_selectorAuthor": qi = publications.authors_institute.contains(v) elif k == "Graph_selectorId_authors_roles": qi = publications.id_authors_role == int(v) elif k == "Graph_selectorId_categories": qi = publications.id_categories == int(v) elif k == "Graph_selectorId_projects": qi = publications.id_projects == int(v) elif k == "Graph_selectorId_teams": qi = publications.id_teams == int(v) else: continue query = (qi if query is None else (query) & (qi)) query = (publications.id > 0 if query is None else query) return query def savefig(fig, fmt): """Save the figure as a string. Args: fig (matplotlib.figure): the figure. fmt (str): possible values are pdf, png and svg. Return: str: """ fi = StringIO() fig.savefig(fi, format=fmt) data = fi.getvalue() fi.close() fig.clear() mpl.pyplot.close(fig) return data def set_title(ax, db, selector): """Set the title for the graph. Args: ax (matplotlib.AxesSubplot): axes subplot db (gluon.DAL): database connection selector (plugin_dbui.Selector): selector with user critera """ T = current.T id_project = selector.Graph_selectorId_projects id_team = selector.Graph_selectorId_teams team_project = "" if id_team and not id_project: team_project = db.teams[int(id_team)].team elif not id_team and id_project: team_project = db.projects[int(id_project)].project elif id_team and id_project: team = db.teams[int(id_team)].team project = db.projects[int(id_project)].project team_project = "%s/%s" % (team, project) if selector.Graph_selectorCumulative == "true": title = "%s %s" % (T(TITLE_CUMSUM), team_project) else: title = "%s %s" % (T(TITLE), team_project) year_start = selector.Graph_selectorYear_start year_end = selector.Graph_selectorYear_end period = "" if year_start and year_end and year_start != year_end: period = T(FROM_TO) % (year_start, year_end) elif year_start: period = year_start title = "%s %s" % (title, period) ax.set_title(title.decode("utf-8")) def stackchart(db, selector, target=None): """Build a stacked chart showing the number of publications as a function of the time. Args: db (pydal.DAL): database connection selector (Storage): value returned by the graph selector. target (matplotlib.AxesSubplot): an existing axes useful when working with subplot. Returns: matplotlib.AxesSubplot: axes subplot """ T = current.T graph = db.graphs[int(selector.Graph_selectorId_graphs)] publications = db.publications # ........................................................................ # # variables depending on the time axis # time_axis = selector.Graph_selectorTime is_submit_month = time_axis == T(MONTH_SUBMIT) is_publish_year = time_axis == T(YEAR_PUBLISH) to_time = (submitted2YM if is_submit_month else submitted2Y) to_time = (published2Y if is_publish_year else to_time) # ........................................................................ # # instantiate the DataFrame # query = query_publications(db, is_publish_year, **selector) # link to the axis table axis = graph.stack_axis granularity = graph.stack_granularity table = db[axis] field = table[granularity] query &= publications["id_%s" % axis] == table["id"] fields = [ publications.year, publications.submitted, field] columns = ["published", "submitted", "axis"] df = (db2df(db, query, fields, columns) .assign(time=to_time) .drop(["published", "submitted"], axis="columns") .groupby(["time", "axis"]) .size() .unstack(level=1) .fillna(0)) if df.empty: return emptychart(db, selector, target) # post processing is_cumsum = selector.Graph_selectorCumulative == "true" if is_cumsum: df = df.cumsum() # ........................................................................ # # instantiate the graph # kwargs = {} if is_submit_month: df.index = DatetimeIndex(df.index) kwargs = dict(kind="area", x_compat=True) else: rot = (0 if df.index.size == 1 else 15) kwargs = dict(kind="bar", linewidth=0, rot=rot, width=1.) ax = df.plot(colormap="Pastel1", grid=True, stacked=True, ylim=(0, None), ax=target, **kwargs) # x and y ticks ax.minorticks_on() ax.tick_params(which="major", length=8) ax.tick_params(which="minor", length=4) # x and y labels xlabel = T(DATE_PUB if is_publish_year else DATE_SUB).decode("utf-8") ax.set_xlabel(xlabel, x=1, horizontalalignment="right") # y label if is_cumsum and is_submit_month: ylabel = LABELY_CUM_MONTH elif is_cumsum: ylabel = LABELY_CUM_YEAR elif is_submit_month: ylabel = LABELY_MONTH else: ylabel = LABELY_YEAR ylabel = T(ylabel).decode("utf-8") ax.set_ylabel(ylabel, y=1, horizontalalignment="right") # legend title is the name of the axis axis = (axis if axis == "categories" else graph.stack_granularity) legend = ax.get_legend() legend.set_title(T(axis).decode("utf-8")) # main title of the graph set_title(ax, db, selector) return ax def submitted2YM(df): """ Args: df (pandas.DataFrame): contains the column ``submitted``. Returns: str: YYYY-MM """ return df.submitted.str[0:7] def submitted2Y(df): """ Args: df (pandas.DataFrame): contains the column ``submitted``. Returns: str: YYYY """ return df.submitted.str[0:4] def published2Y(df): """ Args: df (pandas.DataFrame): contains the column ``published``. Returns: str: YYYY """ return df.published.astype(str)