Commit b3770ab9 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Update citations stuff to display information per domain, team or project.

parent c18fe61b
......@@ -39,7 +39,7 @@ def citations():
#
selector = Selector(
virtdb.graph_citation_selector,
exclude_fields=("id_graphs", "year_start", "year_end"))
exclude_fields=("graphs", "year_start", "year_end"))
year_start = selector.year_start
year_end = selector.year_end
......@@ -58,15 +58,33 @@ def citations():
#
acl = get_acl(db, selector)
df = (acl
[["id_publication", "scan", "citations"]]
.set_index("id_publication"))
if selector.graphs == "all":
cols = ["id_publication", "scan", "citations"]
idx = ["id_publication"]
fgroupby = False
elif selector.graphs == "per domain":
cols = ["id_publication", "scan", "citations", "domain"]
idx = ["id_publication", "domain"]
fgroupby = True
elif selector.graphs == "per team":
cols = ["id_publication", "scan", "citations", "team"]
idx = ["id_publication", "team"]
fgroupby = True
elif selector.graphs == "per project":
cols = ["id_publication", "scan", "citations", "project"]
idx = ["id_publication", "project"]
fgroupby = True
df = (acl[cols].set_index(idx))
dct = {}
for scan in acl.scan.unique():
dct[scan] = (df[df.scan <= scan]
.citations
.groupby("id_publication").max())
.groupby(idx).max())
df = pd.DataFrame(dct)
df = df[sorted(df.columns)]
......@@ -86,17 +104,20 @@ def citations():
#
# Sum of citations as a function of time
#
fig2 = estimator_versus_time(df.sum(),
data = (df.groupby(level=1).sum().T if fgroupby else df.sum())
fig2 = estimator_versus_time(data,
last_scan,
legend,
delta=True,
delta=not fgroupby,
logy=fgroupby,
xtitle="$\sum$ citations")
# ........................................................................
#
# Average citations per article as a function of time
#
fig3 = estimator_versus_time(df.mean(),
data = (df.groupby(level=1).mean().T if fgroupby else df.mean())
fig3 = estimator_versus_time(data,
last_scan,
legend,
xtitle="mean citations / article")
......@@ -105,7 +126,10 @@ def citations():
#
# h-index as a function of time
#
fig4 = estimator_versus_time(df.apply(h_index),
data = \
(df.groupby(level=1).agg(h_index).T if fgroupby else df.apply(h_index))
fig4 = estimator_versus_time(data,
last_scan,
legend,
xtitle="h-index")
......
......@@ -432,6 +432,7 @@
'Number for the first pages or a range 69-80': 'Numéro de la première page ou un range 69-80',
'Number of articles analysed': "Nombre d'articles analysés",
'Number of articles modified': "Nombre d'articles modifiés",
'Number of citations': 'Nombre de citations',
'Number of invalid records': "Nombre d'enregistrement non valide",
'Number of invalid records: %s': "Nombre d'enregistrement non valide : %s",
'Number of publications': 'Nombre de publications',
......
......@@ -11,7 +11,12 @@ from matplotlib.figure import Figure
import matplotlib.ticker as ticker
def estimator_versus_time(ts, last_scan, legend, delta=False, xtitle=""):
def estimator_versus_time(ts,
last_scan,
legend=[],
delta=False,
logy=False,
xtitle=""):
"""Plot citations estimator as a function of time.
Args:
......@@ -25,6 +30,8 @@ def estimator_versus_time(ts, last_scan, legend, delta=False, xtitle=""):
delta (bool):
additional plot showing Delta(yn -yn-1)
logy (bool)
xtitle (str)
Returns:
......@@ -44,7 +51,7 @@ def estimator_versus_time(ts, last_scan, legend, delta=False, xtitle=""):
else:
ax = fig.subplots(nrows=1, ncols=1)
ts.plot(ax=ax, grid=True, drawstyle="steps-mid")
ts.plot(ax=ax, grid=True, drawstyle="steps-mid", logy=logy)
ax.minorticks_on()
ax.set_xlabel("time", horizontalalignment='right', x=1.)
......@@ -153,16 +160,24 @@ def h_index(sr):
data.index += 1
for npubli, ncitations in data.iteritems():
if ncitations == npubli:
if ncitations == 0:
return 0
elif ncitations == npubli:
return float(ncitations)
elif ncitations < npubli:
n1, n2 = npubli - 1, npubli
c1, c2 = data.iat[n1], ncitations
c1, c2 = data.at[n1], ncitations
try:
alpha = (n1 - n2) / (c1 - c2)
hfactor = (n1 - alpha*c1) / (1-alpha)
return hfactor
alpha = (n1 - n2) / (c1 - c2)
hfactor = (n1 - alpha*c1) / (1-alpha)
return hfactor
except ZeroDivisionError:
return np.nan
# not enough publications
# take the number of publication as estimator
......@@ -175,7 +190,7 @@ def histogram(citations, last_scan, legend):
Args:
citations (pandas.Series):
* index is id_publication
* index is id_publication or (id_publication, domain), etc
* value is the number of citations
last_scan (datetime.datetime)
......@@ -189,19 +204,37 @@ def histogram(citations, last_scan, legend):
"""
T = current.T
# build the histogram
# define histogram bins
upper_limit = citations.max()+1
upper_limit = (500 if upper_limit <= 500 else upper_limit)
bins = [-0.5, 0.5, 9.5, 49.5, 99.5, 249.5, 499.5, upper_limit]
hist, dummy = np.histogram(citations, bins)
# histogram publications per domain, team or project
if isinstance(citations.index, pd.MultiIndex):
xx = citations.groupby(level=1).agg(np.histogram, bins)
df = pd.DataFrame({el[0]: el[1][0] for el in xx.items()})
df = df[sorted(df.columns)]
flegend = True
fstack = False
# plot histogram
# histogram all publications
else:
hist, dummy = np.histogram(citations, bins)
df = pd.DataFrame({"all": hist})
flegend = False
fstack = False
# plot histogram(s)
fig = Figure()
ax = fig.subplots(nrows=1, ncols=1)
df = pd.DataFrame({"all": hist})
df.plot.bar(ax=ax, grid=True, legend=False, rot=0, width=0.9)
df.plot.bar(ax=ax,
alpha=0.5,
grid=True,
legend=flegend,
stacked=fstack,
rot=0,
width=0.9)
ax.minorticks_on()
......@@ -215,17 +248,23 @@ def histogram(citations, last_scan, legend):
ax.xaxis.set_ticklabels(xlabels)
ax.set_xlabel("Number of citations", horizontalalignment='right', x=1.)
ax.set_ylabel("Number of publications", horizontalalignment='right', y=1.)
ax.set_xlabel(T("Number of citations"), horizontalalignment='right', x=1.)
ax.set_ylabel(
T("Number of publications"), horizontalalignment='right', y=1.)
ax.xaxis.set_minor_locator(ticker.NullLocator())
# legend
# legend (stacked histograms)
if flegend:
ax.legend(loc="upper right", bbox_to_anchor=(0.995, 0.8))
# my legend
txt = list(legend)
txt.extend([
f"$\sum${T('citations:')}: {citations.sum()}",
f"{T('citations/article (avg):')} {citations.mean():.1f}",
f"h-index: {h_index(citations):.1f}"])
# f"h-index: {h_index(citations):.1f}"
])
ax.text(
0.62, 0.82, "\n".join(txt),
......
......@@ -301,15 +301,18 @@ class Selector(object):
Field("year_end", "integer"),
Field("id_teams", "reference teams", label="Team"),
Field("id_projects", "reference projects", label="Project"),
Field("author", "string"),
Field("id_authors_roles", "reference authors_roles", label="Role"),
Field("id_graphs", "reference graphs", label="Graph"))
Field("graphs", "string", default="all", label="Graph"))
table.id_authors_roles.requires = IS_IN_DB(db, "authors_roles.role")
table.id_graphs.requires = IS_IN_DB(db, "graphs.graph")
table.id_projects.requires = IS_IN_DB(db, "projects.project")
table.id_teams.requires = IS_IN_DB(db, "teams.team")
table.graphs.requires = IS_IN_SET(["all",
"per domain",
"per team",
"per project"])
return table
@staticmethod
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment