Commit 4fc172d1 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Merge branch 'master' into 'production'

Release 0.9.5.2

* Consolidation version fixing several bugs.

See merge request !59
parents 6aff9622 d21862c6
......@@ -28,6 +28,8 @@ def dashboard():
the current year.
"""
current_year = datetime.now().year
cfg = Storage()
cfg.Graph_selectorCumulative = 'True'
cfg.Graph_selectorId = ''
......@@ -35,8 +37,8 @@ def dashboard():
cfg.Graph_selectorId_graphs = ''
cfg.Graph_selectorId_projects = ''
cfg.Graphs_selectorId_teams = ''
cfg.Graph_selectorTime = T('month')
cfg.Graph_selectorYear_start = datetime.now().year
cfg.Graph_selectorTime = ''
cfg.Graph_selectorYear_start = ''
cfg.Graph_selectorYear_end = ''
request.vars.update(cfg)
......@@ -45,20 +47,25 @@ def dashboard():
selector = Selector(virtdb.graph_selector, exclude_fields=fields)
# figure layout
fig, axes = plt.subplots(nrows=1, ncols=2, sharey=True)
fig.subplots_adjust(wspace=0.1)
# the cumulative sum of publications for the current year
do_linechart(db.publications, selector, target=axes[0])
do_labels(axes[0], "", T(TITLE_Y))
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, sharey=True)
fig.subplots_adjust(wspace=0.01)
# histogram of the number of publications per year
selector.cumulative = False
selector.time = T('year')
selector.year_start = ''
selector.year_end = current_year - 1
do_linechart(db.publications, selector, target=ax1)
do_labels(ax1, "", T(TITLE_Y))
# the cumulative sum of publications for the current year
selector.cumulative = True
selector.time = T('month')
selector.year_start = current_year
selector.year_end = ''
do_linechart(db.publications, selector, target=axes[1])
do_labels(axes[1], "", "")
do_linechart(db.publications, selector, target=ax2)
# delegate the rendering to the view
response.view = "graphs/index.html"
......
......@@ -142,6 +142,9 @@ def edit_insert():
values = {}
check = CheckAndFix()
# fix invalid oai
check.recover_oai(record, selector.host)
# title, preprint, URL, report number
values['PublicationsTitle'] = record.title()
values['PublicationsPreprint'] = record.preprint_number()
......
......@@ -55,7 +55,7 @@ def do_empty(db, selector, target=None):
months = to_datetime(["%i-%02i" % (year, i) for i in xrange(1, 13)])
df = DataFrame([0]*12*nyear, index=months)
ax = df.plot(legend=False, x_compat=True, ylim=(0, 100), ax=target)
ax = df.plot(legend=False, x_compat=True, ax=target)
do_tick(ax)
return ax
......@@ -124,7 +124,7 @@ def do_linechart(publications, selector, target=None):
setrows = db(query)
if setrows.count() == 0:
return do_empty(db, selector)
return do_empty(db, selector, target=target)
count = publications.id.count()
rows = setrows.select(publications.submitted,
......@@ -133,11 +133,13 @@ def do_linechart(publications, selector, target=None):
orderby=group_by)
# build the list of data points
data, y = [], 0.
data, y, prev_year = [], 0., None
for row in rows:
submitted = row.publications.submitted
if not rx_submitted.match(submitted):
# protection
if is_month and not rx_submitted.match(submitted):
continue
if is_cumu:
......@@ -145,15 +147,25 @@ def do_linechart(publications, selector, target=None):
else:
y = row[count]
# a continous line when month axis is selected
# a continuous line when month axis is selected
if is_month:
data.append([submitted[:7], y])
# a step line for the year axis
# fill properly holes between year
else:
year = int(submitted[:4])
data.append(["%i-06" % (year-1), y])
data.append(["%i-06" % year, y])
if prev_year is not None and prev_year + 1 != year:
for x in xrange(prev_year+1, year):
n = (y if is_cumu else 0.)
data.append(["%i-01-01" % x, n])
data.append(["%i-12-31" % x, n])
data.append(["%i-01-01" % year, y])
data.append(["%i-12-31" % year, y])
prev_year = year
# instantiate the dataframe
# the index is the time
......@@ -209,6 +221,9 @@ def do_query(publications, selector):
q_end = publications.submitted[0:4] <= year_end
query &= ((q_start) & (q_end))
elif year_end and not year_start:
query &= publications.submitted[0:4] <= year_end
return query
......
......@@ -16,7 +16,7 @@ from base import (DRY_RUN,
from automaton import Automaton
from articles import Articles
from checkandfix import CheckAndFix
from checkandfix import CheckAndFix, MONTHS
from exception import CheckException, ToolException
from factory import build_harvester_tool, get_harvester_tool
from msg import Msg
......
......@@ -122,8 +122,8 @@ class Record(dict):
val = self.primary_oai()
if not val:
self.__host = u''
return self.__host
self.__host = None
return u''
match = REG_OAI.match(val)
if match:
......
......@@ -111,7 +111,7 @@ class RecordPubli(Record):
authors.append(first_author)
# sometime the first author is missing
if first_author != authors[0]:
if first_author and len(authors) > 0 and first_author != authors[0]:
authors.insert(0, first_author)
return authors
......
......@@ -35,8 +35,17 @@
import re
from harvest_tools import MONTHS
reg1 = r'(\d{1,2}) ([A-Z][a-z]{2}) (\d{4})'
reg2 = r'(\d{1,2})-\d{1,2} ([A-Z][a-z]{2}) (\d{4})'
reg3 = r'(\d{1,2}) ([A-Z][a-z]{2}) - \d{1,2} [A-Z][a-z]{2} (\d{4})'
REG_CONF_DATES = re.compile(r'%s|%s|%s' % (reg1, reg2, reg3))
REG_ORIGIN = re.compile("https?://([a-z\.]+)/record/(\d+)")
if __name__ == "__main__":
import sys
......@@ -70,15 +79,22 @@ if __name__ == "__main__":
data["id_publishers"] = 1
data["pages"] = ""
data["volume"] = ""
data["submitted"] = ""
data["publication_url"] = ""
data["preprint"] = ""
# the year is the one of the conference
conference_dates = data["conference_dates"]
if conference_dates:
data["year"] = int(data["conference_dates"][-4:])
# submitted date is when the conference start
match = REG_CONF_DATES.match(data["conference_dates"])
if match:
offset = 0
for i in xrange(3):
if match.group(1+i*3) is not None:
offset = i*3
break
data["year"] = match.group(offset+3)
month = MONTHS[match.group(offset+2)]
data["submitted"] = "%s-%s-%s" % (match.group(offset+3), month, match.group(offset+1))
else:
print "No conferences dates", data["id"]
......
......@@ -2,19 +2,21 @@
HEAD
0.9.5.1 (Jan 2016)
0.9.5.2 (Jan 2016)
- Fix a bug in the controller edit_and_insert.
- Fix a bug in the function duplicate_conference.
- Fix bugs in graph_tools and improve the dashboard layout.
0.9.5 (Dec 2015)
- Require plugin_dbui 0.8.1 and libreoffice.
- Few modifications to be compliant with the navigator Chrome.
- List can be extract in OpenDocument format odt.
- Update the documentation.
0.9.3 (Dec 2015)
- Require plugin_dbui 0.7.3
- Add the wizard to create an harvester.
0.9.2 (Nov 2015)
- Require plugin_dbui 0.7.2 or later release.
- Major review of the user guide.
......@@ -23,7 +25,7 @@ HEAD
- Several bugs fixed.
- Fix bug in the list extraction to CSV file.
- Fix a bug in the graph generation when the database is empty.
0.9.0 (Nov 2015)
- Fix pylint warnings and errors in python modules.
- Migrate the documentation of the python modules to Sphinx.
......@@ -36,15 +38,15 @@ HEAD
- Add the preference add_rules_reg_institute.
- Graphs are rendered by the pandas and matplotlib libraries.
- Add protections and fix bugs.
0.8.14 (Sep 2015)
- Transform the modules invenio_tools and harvest_tools into packages.
- Institute parameters are searched in the inspirehep database.
- Major refactoring of the Marc12 decoding. New classes have been introduced:
RecordConf, RecodInst, RecordPubli, RecordThesis.
- Major redesign of harvester logic. Stop the processing as soon as
possible. The order of the checks is oai, authors, affiliation,
collaboration and specific items depending on the publication type.
- Major redesign of harvester logic. Stop the processing as soon as
possible. The order of the checks is oai, authors, affiliation,
collaboration and specific items depending on the publication type.
- The class Automaton replaces PublicationsTool and many of its methods
have been renamed.
......
......@@ -56,10 +56,10 @@ def test_authors(record, recordfix):
assert recordfix.authors() == "F. Hubaut"
def test_collaboration_exception(recordfix):
svc = CheckAndFix()
with pytest.raises(CheckException):
svc.collaboration(recordfix)
# def test_collaboration_exception(recordfix):
# svc = CheckAndFix()
# with pytest.raises(CheckException):
# svc.collaboration(recordfix)
def test_my_authors_exception(recordfix):
......
......@@ -24,3 +24,19 @@ def test_clean_erratum():
assert isinstance(record["773"], dict)
assert record.paper_year() == "2014"
assert record.year() == "2014"
def test_recovery_oai():
record = load_record('cds.cern.ch', 1744757)
assert record.host() == ""
assert record.primary_oai_url() == "http:///record/1744757"
svc = CheckAndFix()
svc.recover_oai(record, "cds.cern.ch")
assert record.primary_oai() == "oai:cds.cern.ch:1744757"
assert record.host() == "cds.cern.ch"
assert record.oai_url() == "http://cds.cern.ch/record/1744757, http://inspirehep.net/record/1415326"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment