Commit fa3e7a33 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Add controller and view for wizards/update_citations.

parent d3264b69
......@@ -424,3 +424,101 @@ def harvester():
raise HTTP(500, msg)
return
def update_citations():
"""Update the citations table.
* select article in the inspirehep store according to user criteria
* get the number of citations per document
* update the citations table
"""
import datetime
import requests
from invenio_tools import InvenioStore
from json.decoder import JSONDecodeError
from reporting_tools import repr_team_project
citations = db.citations
id_acl = get_id(db.categories, code="ACL")
kwargs = dict(of="recjson", ot="number_of_citations")
publications = db.publications
rex_ins = re.compile(r"(https?://inspirehep.net/record/\d+)")
store = InvenioStore("inspirehep.net")
today = datetime.date.today()
counters = Storage(article=0,
http_error=0,
insert=0,
json_error=0,
url_error=0)
# get user requirement
selector = Selector(virtdb.citation_selector)
# get the list of article store in the inspirehep store
selector.append_query(publications.id_categories == id_acl)
selector.append_query(publications.origin.contains("inspirehep"))
query = selector.query(publications)
# get the number of citation and update the database table
for row in db(query).iterselect(publications.id, publications.origin):
counters.article += 1
logger.debug(row.origin)
# interrogate inspirehep.net
try:
url = rex_ins.search(row.origin).group(1)
rep = store.interogate(url, timeout=30, **kwargs)
count = rep.json()[0].get("number_of_citations")
except AttributeError:
logger.warning(f"inspirehep URL not well formed {row.origin}")
counters.url_error += 1
continue
except JSONDecodeError:
logger.warning("JSON decoding error")
counters.json_error += 1
continue
except requests.exceptions.RequestException:
logger.warning(f"HTTP error interrogating {url}")
counters.http_error += 1
continue
# check if the number of count changes
myset = db(citations.id_publications == row.id)
if not myset.isempty():
entries = myset.select(orderby=citations.date)
last_count = entries.last().count
logger.debug(f"last count {last_count} new one {count}")
if last_count == count:
continue
# update the citations table
logger.info(f"update {url} citations to {count}")
counters.insert += 1
idpubli = row.id
citations.update_or_insert(
(citations.date == today) & (citations.id_publications == idpubli),
date=today,
id_publications=idpubli,
count=count)
db.commit()
# inform the user
logger.info(f" number of article: {counters.article}")
logger.info(f" bad inspirehep URL: {counters.url_error}")
logger.info(f" HTTP connection error: {counters.url_error}")
logger.info(f" JSON decoding error: {counters.json_error}")
logger.info(f" insert or update in db: {counters.insert}")
return dict(counters=counters,
team_project=repr_team_project(db, selector))
......@@ -313,6 +313,7 @@
'Help': 'Aide',
'Horizontal': 'Horizontal',
'Host': 'Host',
'HTTP connection error': 'Erreur de connexion HTTP',
'HTTP Error': 'HTTP Error',
'id': 'id',
'Id': 'Id',
......@@ -345,6 +346,7 @@
'ISBN': 'ISBN',
'ISSN': 'ISSN',
'Javascript API': 'Javascript API',
'JSON decoding error': 'Erreur de décodage JSON',
'Key U': 'Clé U',
'Key V': 'Clé V',
'Keys already exist!': 'La clé existe!',
......@@ -424,6 +426,8 @@
'Notes and reports to committees': 'Notes et Rapports à des Comités',
'Notes and Reports to Committees': 'Notes et Rapports à des Comités',
'Number for the first pages or a range 69-80': 'Numéro de la première page ou un range 69-80',
'Number of articles analysed': "Nombre d'articles analysés",
'Number of articles modified': "Nombre d'articles modifiés",
'Number of invalid records': "Nombre d'enregistrement non valide",
'Number of invalid records: %s': "Nombre d'enregistrement non valide : %s",
'Number of publications': 'Nombre de publications',
......@@ -759,9 +763,11 @@
'unknown': 'inconnu',
'Update': 'Actualiser',
'update citations': 'mise à jour des citations',
'Update citations': 'Mise à jour des citations',
'Update:': 'Update:',
'Url': 'Url',
'url': 'url',
'URL not well formed': 'URL mal formé',
'Use (...)&(...) for AND, (...)|(...) for OR, and ~(...) for NOT to build more complex queries.': 'Use (...)&(...) for AND, (...)|(...) for OR, and ~(...) for NOT to build more complex queries.',
'Use only digit character, no comma, no dot...': 'Utiliser seulement des chiffres, pas de virgule, pas de point...',
'User': 'Utilisateur',
......
......@@ -470,7 +470,8 @@ class ViewportUi(object):
citation_panel = to_panelWithUrlSelector(
virtdb.citation_selector,
baseUrl=URL("wizards", "citations"))
baseUrl=URL("wizards", "update_citations"),
timeout=ONE_HOUR)
edit_panel = to_panelWithUrlSelector(
virtdb.edit_insert_selector,
......
{{
from datetime import datetime
mytable = "margin-left: 6%;"
mytd = "text-align: right;"
table = [
[T("Number of articles analysed"), TD(f"{counters.article}", _style=mytd)],
[T("URL not well formed"), TD(f"{counters.url_error}", _style=mytd)],
[T("HTTP connection error"), TD(f"{counters.http_error}", _style=mytd)],
[T("JSON decoding error"), TD(f"{counters.json_error}", _style=mytd)],
[T("Number of articles modified"), TD(f"{counters.insert}", _style=mytd)]]
title = f"{T('Update citations')} {team_project}"
=P(title, _class="my-p my-small-cap my-large")
=TABLE(*[TR(*row, _class="dbui-tr") for row in table], _style=mytable)
#
# the footer, processing time of the request
#
delta = (datetime.now()-request.now).total_seconds()
delta = T('Page generated in %s seconds') % round(delta, 2)
=P(delta, BR(), request.now.strftime("%d %b %Y %H:%M"), _class="dbui-p")
=BR()
}}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment