Commit 31d61aed authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Add the script lbfr_report_rh.

parent b5b57422
......@@ -67,6 +67,30 @@ def style():
mpl.rcParams['ytick.labelsize'] = 8
def table_only(ax, mytable, fontsize=10, cellheight=0.10):
"""Show only the table
Args:
ax (matplotlib.Axes):
mytable (matplotlib.table.Table):
"""
# trick to have only the table
# http://buddapie.com/2015/10/31/report-with-matplotlib-tables-and-plots/
ax.set_frame_on(False)
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
# play with font size and cell size
mytable.auto_set_font_size(False)
mytable.set_fontsize(fontsize)
table_props = mytable.properties()
table_cells = table_props['child_artists']
for cell in table_cells:
cell.set_height(cellheight)
def ticks_and_labels(ax, xlabel="", ylabel=""):
"""Set ylabel and activate minor ticks.
......
# -*- coding: utf-8 -*-
""" NAME
lbfr_report_rh
SYNOPSIS
build the report related to human resources.
DESCRIPTION
List of people starting / leaving by category and by teams
The evolution of the ITA and physicist population as a function
of time. Separated particle physics and heavy ion domains.
OPTIONS
-h, --help
Display the help and exit.
EXAMPLE
> cd ...track_events/scripts
> ./run -S test_lhcbfrance script lbfr_report_rh.py
> ./run -S track_lhcbfrance script lbfr_report_rh.py
AUTHOR
R. Le Gac -- Sep 2016
"""
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime
from gluon import current
from matplotlib_tools import (create_pdf,
header_footer,
style,
table_only,
ticks_and_labels)
from selector import MySelector
from pandas.tools.plotting import table
from plugin_dbui import get_id
NOW = datetime.now()
def df_people_fte(year):
"""Build the dataFrame with the FTE data for the given year +-2.
Args:
year (int):
Returns:
pandas.DataFrame:
columns are "year", "team", "topcat", "category", "name", "fte".
"""
db = current.globalenv['db']
data = []
selector = current.selector
# order the database scan to ease the debugging
orderby = (db.people_categories.category,
db.teams.team,
db.people.first_name)
# scan people active during the given year +-2
# and identify those starting of leaving
for myyear in xrange(year-2, year+3):
selector.reset_period()
selector.set_year(myyear)
for row in db(selector.query(db.history)).iterselect(orderby=orderby):
category = row.people_categories.category
people = row.people
data.append((
myyear,
row.teams.team,
top_category(category),
category,
"%s %s" % (people.first_name, people.last_name),
row.history.fte))
# build the data frame
columns = ["year",
"team",
"topcat",
"category",
"name",
"fte"]
df = pd.DataFrame(data, columns=columns)
return df
def df_people_start_leave(year):
"""build the data frame containing the list of people starting and leaving
during the given year.
Args:
year (int):
Returns:
pandas.DataFrame:
columns are "team", "name", "category", "start",
"leave", "date", "agency", "cdd"
"""
db = current.globalenv['db']
data = []
id_people = get_id(db.events, event="People")
selector = current.selector
# scan people active during the given year
# and identify those starting of leaving
orderby = (db.people_categories.category,
db.teams.team,
db.people.first_name)
for row1 in db(selector.query(db.history)).iterselect(orderby=orderby):
people_category = row1.people_categories.category
people_category = people_category.replace("é", "e")
agency = main_agency(row1.fundings.agency)
cdd = row1.history.data["cdd"]
# people starting
# find the first date
if row1.history.is_start:
mindate = db.history.start_date.min()
query = db.history.id_events == id_people
query &= db.history.id_people == row1.history.id_people
myset = db(query).iterselect(mindate, groupby=db.history.id_people)
start_date = myset.first()[mindate]
if start_date.year < year:
continue
data.append((
row1.teams.team,
"%s %s" % (row1.people.first_name, row1.people.last_name),
people_category,
1, 0, start_date,
agency,
cdd))
# people leaving
if row1.history.is_end:
data.append((
row1.teams.team,
"%s %s" % (row1.people.first_name, row1.people.last_name),
people_category,
0, 1, row1.history.end_date,
agency,
cdd))
# build the data frame
columns = ["team",
"name",
"category",
"start",
"leave",
"date",
"agency",
"cdd"]
df = pd.DataFrame(data, columns=columns)
return df
def do_pdf(year):
"""Generate the report as a pdf file for the given year.
Args:
year (int):
"""
style()
base_pdf = "lbfr_rh_%s" % NOW.strftime("%y%m%d")
pdf = create_pdf(base_pdf)
# ........................................................................
#
# Status on people starting / leaving and on CDD funding
#
df = df_people_start_leave(year)
page_people_starting_leaving(df, year)
pdf.savefig()
page_cdd_funding(df, year)
pdf.savefig()
# ........................................................................
#
# Status on number of people, fte, ..
#
df = df_people_fte(year)
page_population(df, year)
pdf.savefig()
pdf.close()
def page_cdd_funding(df, year):
"""Page showing the funding of cdd.
Args:
df (pandas.DataFrame):
columns are "team", "name", "category", "start",
"leave", "date", "agency", "cdd"
year(int):
"""
fig = plt.figure()
header_footer(fig, u"Financement des CDDs en %i" % year)
print "."*80, "\nFUNDING CDD:\n"
# ........................................................................
#
# Prepare data frame
#
query = (df.category == "PhdStudent") | \
(df.category == "PostDoc") | \
(df.cdd)
df1 = df[query].groupby(("category", "agency")).sum()
del df1["cdd"]
del df1["leave"]
df1 = df1.unstack(level=1).fillna(0.)
df1.columns = [el[1] for el in df1.columns]
df1 = df1.T
print df1
# ........................................................................
#
# PhdStudent
#
ax = plt.subplot(221)
query = (df1.PhdStudent > 0)
df1.PhdStudent[query].plot.pie(ax=ax,
autopct="%.0f%%",
colormap="Pastel1")
# ........................................................................
#
# Postdoc
#
ax = plt.subplot(222)
query = (df1.PostDoc > 0)
df1.PostDoc[query].plot.pie(ax=ax,
autopct="%.0f%%",
colormap="Pastel1")
# ........................................................................
#
# Ingenieur
#
ax = plt.subplot(223)
query = (df1.Ingenieur > 0)
df1.Ingenieur[query].plot.pie(ax=ax,
autopct="%.0f%%",
colormap="Pastel1")
# ........................................................................
#
# Summary table
#
ax = plt.subplot(224)
mytable = table(ax, df1, loc="center", colWidths=[0.20]*len(df1.columns))
table_only(ax, mytable)
def page_people_starting_leaving(df, year):
"""List and plot of people starting of leaving during the given year.
Args:
df (pandas.DataFrame):
columns are "team", "name", "category", "start",
"leave", "date", "agency"
year (int):
"""
fig = plt.figure()
header_footer(fig, u"Arrivé(s) et départ(s) en %i" % year)
print "."*80, "\nPEOPLE STARTING LEAVING IN", year, "\n"
print df
# ........................................................................
#
# count the number of start/leave per people_category
#
print "\nCount per category:\n"
grouped = df.groupby("category")
df1 = grouped.sum()
del df1["cdd"]
print df1
ax = plt.subplot(221)
df1.plot.bar(ax=ax,
colormap="Pastel1",
grid=True,
rot=0,
ylim=(0, 6))
ticks_and_labels(ax, u"Catégorie", "Nombre de persone")
# ........................................................................
#
# Count per category and per team
# weight are +1 = start, -1 = leave
#
print "\nCount per category and per teams:\n"
grouped = df.groupby(("category", "team"))
df2 = grouped.sum()
df2["state"] = df2["start"] - df2["leave"]
del df2["leave"]
del df2["start"]
del df2["cdd"]
df2 = df2.unstack(level=1).fillna(0.)
df2.columns = [el[1] for el in df2.columns]
print df2
ax = plt.subplot(223)
mytable = table(ax, df2, loc="center")
table_only(ax, mytable)
def page_population(df, year):
"""Page showing the number of person, fte for the given year -2 and +2
as well as the evolution of the physicists population.
Args:
df (pandas.DataFrame):
columns are "year", "team", "topcat", "category", "name", "fte".
year (int):
"""
fig = plt.figure()
header_footer(fig, u"Population LHCb France en %i" % year)
print "."*80, "\nPOPULATION\n"
# ........................................................................
#
# view number of people
#
grouped = df.groupby(("year", "topcat"))
df1 = grouped.count()
del df1["fte"]
del df1["team"]
del df1["category"]
df1 = df1.unstack(level=1)
df1.columns = [el[1] for el in df1.columns]
print "\nCount the number of people per category:"
print df1
ax = plt.subplot(221)
df1.plot.bar(ax=ax,
colormap="Pastel1",
grid=True,
rot=0,
ylim=(0, 70))
ticks_and_labels(ax, u"Année", u"Nombre de personne")
# ........................................................................
#
# view FTE
#
df2 = grouped.sum()
df2 = df2.unstack(level=1)
df2.columns = [el[1] for el in df2.columns]
print "\nCount the number of FTE per category:"
print df2
ax = plt.subplot(222)
df2.plot.bar(ax=ax,
colormap="Pastel1",
grid=True,
rot=0,
ylim=(0, 70))
ticks_and_labels(ax, u"Année", u"Equivalent temps plein")
# ........................................................................
#
# view the content of the physicist population
#
query = (df.category != "Emérite") & (df.category != "Ingénieur")
df3 = df[query].groupby(("year", "category")).sum()
df3 = df3.unstack(level=1)
df3.columns = [el[1] for el in df3.columns]
print "\nBreak down of the physicist population:"
print df3
ax = plt.subplot(223)
df3.plot.bar(ax=ax,
colormap="Pastel1",
grid=True,
linewidth=0,
rot=0,
stacked=True,
width=1.,
ylim=(0, 60))
ticks_and_labels(ax, u"Année", u"Physiciens [ETP]")
def setup_selector(year):
"""Instantiate MySelector to select people event for the current year.
Note:
MySelector introduce a set of virtual field:
age, coverage, duration, fte, is_start, is_end, is_over as well as
few method to handle time period.
Args:
year (int)
Returns
MySelector:
"""
db = current.globalenv['db']
myvars = current.request.vars
myvars.SelectorData = ''
myvars.SelectorId = ''
myvars.SelectorId_fundings = ''
myvars.SelectorId_object_categories = ''
myvars.SelectorId_object_code = ''
myvars.SelectorId_people_categories = ''
myvars.SelectorId_people_code = ''
myvars.SelectorId_projects = ''
myvars.SelectorId_teams = ''
myvars.SelectorYear_end = ''
myvars.SelectorYear_start = year
virtdb = current.globalenv["virtdb"]
selector = MySelector(virtdb.selector)
id_people = get_id(db.events, event="People")
selector.append_query(db.history.id_events == id_people)
return selector
def main_agency(agency):
"""Determine the main agency MESR, LABEX, CNRX, LABO, IN2P3, ...
Args:
agency (str)
Returns:
str
"""
if agency in ("ENIGMASS", "OCEVU", "P2IO"):
value = "LABEX"
elif agency.startswith("ERC"):
value = "ERC"
elif agency.startswith("ENS"):
value = "ECOLE"
else:
value = agency
return value
def top_category(category):
"""Determine the top category EME, IT or PHY.
Args:
category (str):
Returns:
str
"""
if category == "Ingénieur":
topcat = "IT"
elif category == "Emérite":
topcat = "EME"
else:
topcat = "PHY"
return topcat
if __name__ == "__main__":
import sys
# protection
application = current.request.application
if application not in ("test_lhcbfrance", "track_lhcbfrance"):
print "Invalid application", application
sys.exit(1)
# instantiate the selector tuned to select people event
# for the current year
current.selector = setup_selector(NOW.year)
# instantiate the report as a pdf file
do_pdf(NOW.year)
# exit
sys.exit(0)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment