Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit 6c8c1ef2 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Redesign the graph section using the power of pandas.

parent 8fe8156a
......@@ -7,19 +7,12 @@ import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from datetime import datetime
from gluon.storage import Storage
from graph_tools import (do_labels,
do_linechart,
do_stackedchart,
do_title,
savefig)
from pandas import DataFrame
from plugin_dbui import Selector
from reporting_tools import repr_team_project
TITLE_Y = "Number of publications"
from graph_tools import (FROM_TO,
LABELY_YEAR,
linechart,
savefig,
stackchart)
def dashboard():
......@@ -28,73 +21,68 @@ def dashboard():
the current year.
"""
current_year = datetime.now().year
cfg = Storage()
cfg.Graph_selectorCumulative = "True"
cfg.Graph_selectorId = ""
cfg.Graph_selectorId_authors_roles = ""
cfg.Graph_selectorId_graphs = ""
cfg.Graph_selectorId_projects = ""
cfg.Graphs_selectorId_teams = ""
cfg.Graph_selectorTime = ""
cfg.Graph_selectorYear_start = ""
cfg.Graph_selectorYear_end = ""
request.vars.update(cfg)
fields = ("cumulative", "id_graphs", "time", "year_start", "year_end")
selector = Selector(virtdb.graph_selector, exclude_fields=fields)
current_year = request.now.year
selector = Storage()
selector.Graph_selectorCumulative = "true"
selector.Graph_selectorId = ""
selector.Graph_selectorId_authors_roles = ""
selector.Graph_selectorId_graphs = ""
selector.Graph_selectorId_projects = ""
selector.Graphs_selectorId_teams = ""
selector.Graph_selectorTime = ""
selector.Graph_selectorYear_start = ""
selector.Graph_selectorYear_end = ""
# figure layout
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, sharey=True)
fig.subplots_adjust(wspace=0.01)
# histogram of the number of publications per year
selector.cumulative = False
selector.time = T("year")
selector.year_start = ""
selector.year_end = current_year - 1
# the number of publications per year except for the current year
min_year = "2009"
max_year = str(current_year - 1)
selector.Graph_selectorCumulative = "false"
selector.Graph_selectorTime = T("year")
selector.Graph_selectorYear_start = min_year
selector.Graph_selectorYear_end = max_year
do_linechart(db.publications, selector, target=ax1)
do_labels(ax1, "", T(TITLE_Y))
title = (T(FROM_TO) % (min_year, max_year)).decode("utf-8")
ylabel = T(LABELY_YEAR).decode("utf-8")
# the cumulative sum of publications for the current year
selector.cumulative = True
selector.time = T("month")
selector.year_start = current_year
selector.year_end = ""
linechart(db,
selector,
target=ax1,
xlabel="",
ylabel=ylabel,
title=title)
do_linechart(db.publications, selector, target=ax2)
# the cumulative sum of publications for the current year on a month
year = str(current_year)
selector.Graph_selectorCumulative = "true"
selector.Graph_selectorTime = T("month")
selector.Graph_selectorYear_start = year
selector.Graph_selectorYear_end = ""
title = (T("In %s") % year).decode("utf-8")
linechart(db, selector, target=ax2, title=title)
# delegate the rendering to the view
response.view = "graphs/index.html"
return dict(data=savefig(fig, "svg"))
def index():
def publications_versus_time():
"""Generate graph showing the number of publication per month / year
either as a linechart or as a stacked histograms.
either as a line chart or as a stacked histograms.
"""
fields = \
("author", "cumulative", "id_graphs", "time", "year_start", "year_end")
selector = Selector(virtdb.graph_selector, exclude_fields=fields)
# graph configuration
graph = db.graphs[selector.id_graphs]
# stacked chart
if graph.stack_axis:
ax = do_stackedchart(db.publications, selector, graph)
# line char
else:
ax = do_linechart(db.publications, selector)
# user criteria and graph configuration
selector = request.vars
graph = db.graphs[selector.Graph_selectorId_graphs]
axis = graph.stack_axis
do_labels(ax, "", T(TITLE_Y))
do_title(ax, db, selector)
# instantiate the graph
ax = (stackchart(db, selector) if axis else linechart(db, selector))
# delegate the rendering to the view
extension = request.extension
......@@ -105,4 +93,5 @@ def index():
if fmt in ("pdf", "png"):
data = base64.b64encode(data)
response.view = "graphs/index.%s" % extension
return dict(data=data)
......@@ -19,6 +19,7 @@
'about': 'à propos',
'ACL': 'ACL',
'ACLN': 'ACLN',
'acte de conférence': 'acte de conférence',
'ACTI': 'ACTI',
'Action': 'Action',
'ACTN': 'ACTN',
......@@ -26,6 +27,8 @@
'Added': 'Ajouté',
'Address of the invenio store where the search is performed.': 'Addresse du site invenio où les recherches sont effectuées.',
'administrators, librairians,...': 'administrateurs, documentalistes,...',
'AFF': 'AFF',
'affiche': 'affiche',
'Affiliation': 'Affiliation',
'affiliation keys': "clés définissant l'affiliation",
'Affiliation keys are not defined !!!': "Les clés définissant votre affiliation n'existe pas !!!",
......@@ -85,6 +88,8 @@
'Binary files': 'fichiers binaires',
'Book': 'Ouvrage',
'book': 'ouvrage',
'BRE': 'BRE',
'brevet': 'brevet',
'but with different category: %s': 'mais avec une catégorie différente : %s',
'cache': 'cache',
'Can be applied on any field of the table using the SQL WHERE syntax. Be aware that foreign key are not resolved (see smart_query in the web2py)': 'Peu être appliqué à tous le champs de la table publications en utilisant la syntaxe SQL WHERE. Attention, les clés étrangères ne sont pas résolues (voir le chapitre smart_query dans la documentation web2py)',
......@@ -166,6 +171,8 @@
'Created On': 'Created On',
'Cumulative': 'Cumulé',
'Cumulative sum of publications': 'Somme cumulée des publications',
'Cumulative sum of publications / month': 'Somme cumulée des publications / mois',
'Cumulative sum of publications / year': 'Somme cumulée des publications / année',
'dashboard': 'tableau de bord',
'Data base scheme': 'Schéma base de donnée',
'Data institute': 'Les données du laboratoire',
......@@ -201,6 +208,7 @@
'Direction Row': 'Direction Row',
'Direction Vertical': 'Direction Vertical',
'Directors': 'Directeurs',
'DO': 'DO',
'Documentation for developers': 'Documentation développeurs',
'documentations': 'documentations',
'Documentations': 'Documentations',
......@@ -266,8 +274,8 @@
'from %s to %s': 'de %s à %s',
'Function disabled': 'Fonction desactivée',
'General': 'Général',
'granularity': 'granularité',
'Granularity': 'Granularité',
'granularity': 'granularité',
'Granularity Column': 'Granularity Column',
'Granularity Horizontal': 'Granularity Horizontal',
'Granularity Level 1': 'Granularity Level 1',
......@@ -294,6 +302,7 @@
'Harvester(s)': 'Moissonneur(s)',
'harvesters': 'moissonneurs',
'Harvesters are ran automatically when the value is equal to true.': 'Les moissonneurs sont éxécuté automatiquement quand cette valuer est égale à vraie.',
'HDR': 'HDR',
'Header': 'En tête',
'Header / Footer': 'Header / Footer',
'Header/Footer': 'Header/Footer',
......@@ -305,6 +314,8 @@
'Id': 'Id',
'Identical harvester already exists.': 'Ce moissonneur existe dans la base de données.',
'Import/Export': 'Importer/Exporter',
'In %s': 'En %s',
'indéfini': 'indéfini',
'insert MARCXML': 'insérer MARCXML',
'insert new': 'insert new',
'insert new %s': 'insert new %s',
......@@ -313,6 +324,7 @@
'Institute identifier in inspirehep.net.': 'Identifiant du laboratoire dans inspirehep.net.',
'Institute not found in the inspirehep database!': "Le laboratoire n'a pas été trouvé !",
'Institute number associated to CPPM authors': "Numéro de l'Institut associé aux auteurs du CPPM",
'INV': 'INV',
'Invalid': 'Non conforme',
"Invalid database table '%s'": "Invalid database table '%s'",
'Invalid email': 'Invalid email',
......@@ -386,6 +398,7 @@
'month': 'mois',
'More than one affiliation for the selected author!': "Plus d'une affiliation pour l'auteur sélectionné !",
'my_authors': 'autheurs du laboratoire',
'mémoire': 'mémoire',
'Name': 'Nom',
'Name of a function located in the modules list_postprocessing. Can be a list of name separated by comma.': 'Liste de fonction du module list_postprocessing, séparé par une virgule. Les fonctions disponibles sont : clean, highlight_my_authors, highlight_my_speaker, remove_undef.',
'Name of the database table containing the publications shown in this section.': 'Nom de la table qui contient les publications qui seront présentées dans cette section.',
......@@ -398,6 +411,7 @@
'Niveau 4': 'Niveau 4',
'No harvesters for your selection !!!': 'Pas de moisonneurs pour votre sélection !!!',
'Nom': 'Nom',
'Nombre de publications / année': 'Nombre de publications / année',
'not install': 'pas installé',
'Note': 'Note',
'Notes and reports to committees': 'Notes et Rapports à des Comités',
......@@ -406,6 +420,8 @@
'Number of invalid records': "Nombre d'enregistrement non valide",
'Number of invalid records: %s': "Nombre d'enregistrement non valide : %s",
'Number of publications': 'Nombre de publications',
'Number of publications / month': 'Nombre de publications / mois',
'Number of publications / year': 'Nombre de publications / année',
'Number of records added in the database': "Nombre d'enregistrements ajoutés à la base de donnée",
'Number of records already registered': "Nombre d'enregistrements déjà enregistrés dans la base de donnée",
'Number of records already validated': "Nombre d'enregistrements déjà validé",
......@@ -431,6 +447,9 @@
'organisations': 'organisations',
'organizations': 'organizations',
'Origin': 'Origine',
'OS': 'OS',
'ouvrage': 'ouvrage',
'OV': 'OV',
'Page generated in %s seconds': 'Page générée en %s secondes',
'pages': 'pages',
'Pages': 'Pages',
......@@ -443,6 +462,7 @@
'patent': 'brevet',
'PDF file url': 'URL du pdf',
'Period': 'Période',
'PHD ': 'PHD ',
'PhD Thesis, ...': 'Doctorat, habilitation à diriger les recherches, ...',
'PhDs': 'PhDs',
'please input your password again': 'please input your password again',
......@@ -456,9 +476,10 @@
'poster': 'affiche',
'Posters': 'Affiches',
'Postprocessing': 'Postprocessing',
'PRE': 'PRE',
'Preference REG_INSTITUTE is not defined.': 'La préférence REG_INSTITUTE est indéfinie.',
'Preferences': 'Préférences',
'preferences': 'préférences',
'Preferences': 'Préférences',
'Preprint': 'Preprint',
'preprint': 'preprint',
'Preprint identifier separated by comma: arXiv:0906.1516': 'Numéro(s) du preprint séparé par des virgules: arXiv:0906.1516',
......@@ -476,6 +497,7 @@
'Projets': 'Projets',
'properties': 'propriétés',
'Property': 'Propriété',
'présentation orale': 'présentation orale',
'Publication': 'Publication',
'Publication category associated to the found records.': 'Catégorie associée aux enregistrements.',
'Publication not found!': "La publication n'a pas été trouvé !",
......@@ -488,6 +510,7 @@
'Publishers is not defined': "La revue n'est pas définie",
'Python API': 'Python API',
'Query:': 'Query:',
'rapport': 'rapport',
'Ratio': 'Ratio',
'Record': 'Enregistrement',
'record id': 'record id',
......@@ -626,6 +649,7 @@
'String containing blabla and the database fields to be displayed. The substitution mechanism is: {tablename.fieldname} or {foreigntablename.fieldname}': 'Chaîne de caractère contenant du texte et des champs de la base de donnée. Les régles de substitution sont : {tablename.fieldname} ou {foreigntablename.fieldname}',
'String containing the author names with their institutes number.': 'Chaîne de caractère contenant le nom des auteurs avec le numéro de leurs instituts.',
'string indices must be integers, not str': 'string indices must be integers, not str',
'Submission date': 'Date de soumission',
'Submitted': 'Soumis',
'Submitted date is not defined': "La date de soumission n'est pas défini",
'Submitted date is not valid': "La date de soumission n'est pas valide",
......
......@@ -2,37 +2,115 @@
"""A collection of tools to build graphs in controllers.
"""
import matplotlib
import re
import matplotlib as mpl
import pandas as pd
from datetime import datetime
from gluon import current
from pandas import DataFrame, DatetimeIndex, to_datetime
from reporting_tools import repr_team_project
from regex import REG_SUBMITTED
from StringIO import StringIO
DATE_PUB = "Publication date"
DATE_SUB = "Submission date"
FROM_TO = "from %s to %s"
LABELY = "Number of publications"
LABELY_CUM_MONTH = "Cumulative sum of publications / month"
LABELY_CUM_YEAR = "Cumulative sum of publications / year"
LABELY_MONTH = "Number of publications / month"
LABELY_YEAR = "Number of publications / year"
TITLE = "Publications"
TITLE_CUMSUM = "Cumulative sum of publications"
PUBLICATIONS_QUERY_FIELDS = [
"Graph_selectorAuthor",
"Graph_selectorId_authors_roles",
"Graph_selectorId_categories",
"Graph_selectorId_fundings",
"Graph_selectorId_projects",
"Graph_selectorId_teams",
"Graph_selectorYear_end",
"Graph_selectorYear_start"]
def db2df(db, query, fields=[], columns=None):
"""Transform a database query into a dataframe.
Example::
# database query
query = history.id_events == id_event
query &= history.id_projects == id_project
query &= history.id_fundings == db.fundings.id
query &= history.id_people == db.people.id
query &= history.id_people_categories == db.people_categories.id
query &= history.id_teams == db.teams.id
# fields to be extracted from the database
fields = [
history.id_domains,
history.id_events,
history.id_people,
history.id_projects,
history.id_teams,
db.teams.team,
db.people_categories.category,
db.people_categories.code,
db.people.first_name,
db.people.last_name,
history.percentage,
history.start_date,
history.end_date,
history.data,
db.fundings.agency]
# get the DataFrame
df = db2df(query, fields)
print df.info()
Args:
db (pyDAL.DAL): database connection.
query: database query including statement to join foreign table.
fields (list): list of pyDAL.Field to be extracted from the query.
All fields are extracted when not defined.
columns (list): name of the columns in the dataframe. There is one
to one correspondence between the fields and columns list.
Names of database field are used when columns is not defined.
Returns:
pandas.DataFrame
"""
if columns is None and len(fields) > 0:
columns = [field.name for field in fields]
def do_empty(db, selector, target=None):
rows = db.executesql(db(query)._select(*fields))
return pd.DataFrame.from_records(list(rows), columns=columns)
def emptychart(db, selector, target=None):
"""Empty graph as a function of month.
Args:
db (gluon.DAL): database connection
selector (plugin_dbui.Selector): selector with user critera
target (matplotlib.AxesSubplot): useful when working with subplot.
target (matplotlib.AxesSubplot):
an existing axes useful when working with subplot.
Returns:
matplotlib.AxesSubplot: axes subplot
"""
year_start = selector.year_start
year_end = selector.year_end
T = current.T
year_start = selector.Graph_selectorYear_start
year_end = selector.Graph_selectorYear_end
if year_start and not year_end:
dates = to_datetime(["%s-01-01" % year_start, "%s-12-31" % year_start])
......@@ -41,322 +119,246 @@ def do_empty(db, selector, target=None):
dates = to_datetime(["%s-01-01" % year_start, "%s-12-31" % year_end])
else:
year = datetime.now().year
year = current.request.now.year
dates = to_datetime(["%i-01-01" % year, "%i-12-31" % year])
df = DataFrame([0]*2, index=dates)
ax = df.plot(legend=False, grid=True, x_compat=True, ax=target)
do_tick(ax)
return ax
def do_labels(ax, xlabel, ylabel):
"""Deal with axes label.
ax = df.plot(legend=False, grid=True, x_compat=True, ax=target)
Args:
ax (matplotlib.AxesSubplot): axes subplot
xlabel (str): label for the horizontal axis.
ylabel (str): label for the vertical axis.
# x and y ticks
ax.minorticks_on()
ax.tick_params(which="major", length=8)
ax.tick_params(which="minor", length=4)
"""
# x and y labels
xlabel = T(DATE_SUB).decode("utf-8")
ax.set_xlabel(xlabel, x=1, horizontalalignment="right")
ax.set_ylabel(ylabel, y=1, horizontalalignment="right")
def do_legend(ax):
"""Put legend at the top.
ylabel = T(LABELY).decode("utf-8")
ax.set_ylabel(ylabel, y=1, horizontalalignment="right")
Args:
ax (matplotlib.AxesSubplot): axes subplot
# main title of the graph
set_title(ax, db, selector)
"""
if ax.get_legend():
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width, box.height * 0.9])
ax.legend(loc="lower right",
bbox_to_anchor=(1.01, 1.),
fontsize=10,
ncol=2)
return ax
def do_linechart(publications, selector, target=None):
def linechart(db, selector, target=None, title=None, xlabel=None, ylabel=None):
"""Build a line chart showing the number of publications as a function
of the time either month or year.
of the time.
Args:
publications (gluon.dal.Table): the publications table.
selector (plugin_dbui.Selector): the selector with user criteria.
target (matplotlib.AxesSubplot): an existing axes,
useful when working with subplot.
db (pydal.DAL): database connection
selector (Storage): value returned by the graph selector.
target (matplotlib.AxesSubplot):
an existing axes useful when working with subplot.
title (bool): define from the user criteria when None.
xlabel (bool): define automatically when None.
ylabel (bool): define automatically when None.
Returns:
matplotlib.AxesSubplot: axes subplot
"""
db = publications._db
is_cumu = selector.cumulative
is_month = selector.time == current.T("month")
rx_submitted = re.compile(REG_SUBMITTED)
# query directive to count publications including
# foreign key constraints and user requirements
# related to team, project, authors roles and year
query = do_query(publications, selector)
# count the number of publications
# they are grouped per year or per year and month
# depending on the time axis
T = current.T
publications = db.publications
# ........................................................................
#
# variables depending on the time axis
#
time_axis = selector.Graph_selectorTime
is_month = time_axis == T("month")
to_time = (submitted2YM if is_month else submitted2Y)
# ........................................................................
#
# instantiate the DataFrame
#
query = query_publications(db, **selector)
fields = [
publications.year,
publications.submitted]
columns = ["published", "submitted"]
df = (db2df(db, query, fields, columns)
.assign(
axis=1,
time=to_time)
.drop(["published", "submitted"], axis="columns")
.groupby(["time", "axis"])
.size()
.unstack(level=1)
.fillna(0))
if df.empty:
return emptychart(db, selector)
# post processing
is_cumsum = selector.Graph_selectorCumulative == "true"
if is_cumsum:
df = df.cumsum()
# ........................................................................
#
# instantiate the graph
#
mpl.rcParams['legend.fontsize'] = 10
if is_month:
group_by = [publications.submitted[:4], publications.submitted[5:7]]
else:
group_by = publications.submitted[:4]
df.index = DatetimeIndex(df.index)
setrows = db(query)
if setrows.count() == 0:
return do_empty(db, selector, target=target)
if df.size == 1:
kwargs = dict(kind="bar", color="white", edgecolor="blue", rot=0)
count = publications.id.count()
rows = setrows.select(publications.submitted,
count,
groupby=group_by,
orderby=group_by)
else:
kwargs = dict(kind="line", rot=35, x_compat=True)
# build the list of data points
data, y, prev_year = [], 0., None
ax = df.plot(colormap="Pastel1",
grid=True,
legend=False,
ylim=(0, None),
ax=target,
**kwargs)
for row in rows:
submitted = row.publications.submitted
# x and y ticks
ax.minorticks_on()
ax.tick_params(which="major", length=8)
ax.tick_params(which="minor", length=4)
# protection
if is_month and not rx_submitted.match(submitted):
continue
# x label
if xlabel is None:
xlabel = T(DATE_SUB).decode("utf-8")
ax.set_xlabel(xlabel, x=1, horizontalalignment="right")
if is_cumu:
y += row[count]
else:
y = row[count]
# y label
if ylabel is None:
if is_cumsum and is_month:
ylabel = LABELY_CUM_MONTH
# a continuous line when month axis is selected
if is_month:
data.append([submitted[:7], y])
elif is_cumsum:
ylabel = LABELY_CUM_YEAR
# a step line for the year axis
# fill properly holes between year
elif is_month:
ylabel = LABELY_MONTH
else:
year = int(submitted[:4])