Commit fa3e7a33 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Add controller and view for wizards/update_citations.

parent d3264b69
...@@ -424,3 +424,101 @@ def harvester(): ...@@ -424,3 +424,101 @@ def harvester():
raise HTTP(500, msg) raise HTTP(500, msg)
return return
def update_citations():
"""Update the citations table.
* select article in the inspirehep store according to user criteria
* get the number of citations per document
* update the citations table
"""
import datetime
import requests
from invenio_tools import InvenioStore
from json.decoder import JSONDecodeError
from reporting_tools import repr_team_project
citations = db.citations
id_acl = get_id(db.categories, code="ACL")
kwargs = dict(of="recjson", ot="number_of_citations")
publications = db.publications
rex_ins = re.compile(r"(https?://inspirehep.net/record/\d+)")
store = InvenioStore("inspirehep.net")
today = datetime.date.today()
counters = Storage(article=0,
http_error=0,
insert=0,
json_error=0,
url_error=0)
# get user requirement
selector = Selector(virtdb.citation_selector)
# get the list of article store in the inspirehep store
selector.append_query(publications.id_categories == id_acl)
selector.append_query(publications.origin.contains("inspirehep"))
query = selector.query(publications)
# get the number of citation and update the database table
for row in db(query).iterselect(publications.id, publications.origin):
counters.article += 1
logger.debug(row.origin)
# interrogate inspirehep.net
try:
url = rex_ins.search(row.origin).group(1)
rep = store.interogate(url, timeout=30, **kwargs)
count = rep.json()[0].get("number_of_citations")
except AttributeError:
logger.warning(f"inspirehep URL not well formed {row.origin}")
counters.url_error += 1
continue
except JSONDecodeError:
logger.warning("JSON decoding error")
counters.json_error += 1
continue
except requests.exceptions.RequestException:
logger.warning(f"HTTP error interrogating {url}")
counters.http_error += 1
continue
# check if the number of count changes
myset = db(citations.id_publications == row.id)
if not myset.isempty():
entries = myset.select(orderby=citations.date)
last_count = entries.last().count
logger.debug(f"last count {last_count} new one {count}")
if last_count == count:
continue
# update the citations table
logger.info(f"update {url} citations to {count}")
counters.insert += 1
idpubli = row.id
citations.update_or_insert(
(citations.date == today) & (citations.id_publications == idpubli),
date=today,
id_publications=idpubli,
count=count)
db.commit()
# inform the user
logger.info(f" number of article: {counters.article}")
logger.info(f" bad inspirehep URL: {counters.url_error}")
logger.info(f" HTTP connection error: {counters.url_error}")
logger.info(f" JSON decoding error: {counters.json_error}")
logger.info(f" insert or update in db: {counters.insert}")
return dict(counters=counters,
team_project=repr_team_project(db, selector))
...@@ -313,6 +313,7 @@ ...@@ -313,6 +313,7 @@
'Help': 'Aide', 'Help': 'Aide',
'Horizontal': 'Horizontal', 'Horizontal': 'Horizontal',
'Host': 'Host', 'Host': 'Host',
'HTTP connection error': 'Erreur de connexion HTTP',
'HTTP Error': 'HTTP Error', 'HTTP Error': 'HTTP Error',
'id': 'id', 'id': 'id',
'Id': 'Id', 'Id': 'Id',
...@@ -345,6 +346,7 @@ ...@@ -345,6 +346,7 @@
'ISBN': 'ISBN', 'ISBN': 'ISBN',
'ISSN': 'ISSN', 'ISSN': 'ISSN',
'Javascript API': 'Javascript API', 'Javascript API': 'Javascript API',
'JSON decoding error': 'Erreur de décodage JSON',
'Key U': 'Clé U', 'Key U': 'Clé U',
'Key V': 'Clé V', 'Key V': 'Clé V',
'Keys already exist!': 'La clé existe!', 'Keys already exist!': 'La clé existe!',
...@@ -424,6 +426,8 @@ ...@@ -424,6 +426,8 @@
'Notes and reports to committees': 'Notes et Rapports à des Comités', 'Notes and reports to committees': 'Notes et Rapports à des Comités',
'Notes and Reports to Committees': 'Notes et Rapports à des Comités', 'Notes and Reports to Committees': 'Notes et Rapports à des Comités',
'Number for the first pages or a range 69-80': 'Numéro de la première page ou un range 69-80', 'Number for the first pages or a range 69-80': 'Numéro de la première page ou un range 69-80',
'Number of articles analysed': "Nombre d'articles analysés",
'Number of articles modified': "Nombre d'articles modifiés",
'Number of invalid records': "Nombre d'enregistrement non valide", 'Number of invalid records': "Nombre d'enregistrement non valide",
'Number of invalid records: %s': "Nombre d'enregistrement non valide : %s", 'Number of invalid records: %s': "Nombre d'enregistrement non valide : %s",
'Number of publications': 'Nombre de publications', 'Number of publications': 'Nombre de publications',
...@@ -759,9 +763,11 @@ ...@@ -759,9 +763,11 @@
'unknown': 'inconnu', 'unknown': 'inconnu',
'Update': 'Actualiser', 'Update': 'Actualiser',
'update citations': 'mise à jour des citations', 'update citations': 'mise à jour des citations',
'Update citations': 'Mise à jour des citations',
'Update:': 'Update:', 'Update:': 'Update:',
'Url': 'Url', 'Url': 'Url',
'url': 'url', 'url': 'url',
'URL not well formed': 'URL mal formé',
'Use (...)&(...) for AND, (...)|(...) for OR, and ~(...) for NOT to build more complex queries.': 'Use (...)&(...) for AND, (...)|(...) for OR, and ~(...) for NOT to build more complex queries.', 'Use (...)&(...) for AND, (...)|(...) for OR, and ~(...) for NOT to build more complex queries.': 'Use (...)&(...) for AND, (...)|(...) for OR, and ~(...) for NOT to build more complex queries.',
'Use only digit character, no comma, no dot...': 'Utiliser seulement des chiffres, pas de virgule, pas de point...', 'Use only digit character, no comma, no dot...': 'Utiliser seulement des chiffres, pas de virgule, pas de point...',
'User': 'Utilisateur', 'User': 'Utilisateur',
......
...@@ -470,7 +470,8 @@ class ViewportUi(object): ...@@ -470,7 +470,8 @@ class ViewportUi(object):
citation_panel = to_panelWithUrlSelector( citation_panel = to_panelWithUrlSelector(
virtdb.citation_selector, virtdb.citation_selector,
baseUrl=URL("wizards", "citations")) baseUrl=URL("wizards", "update_citations"),
timeout=ONE_HOUR)
edit_panel = to_panelWithUrlSelector( edit_panel = to_panelWithUrlSelector(
virtdb.edit_insert_selector, virtdb.edit_insert_selector,
......
{{
from datetime import datetime
mytable = "margin-left: 6%;"
mytd = "text-align: right;"
table = [
[T("Number of articles analysed"), TD(f"{counters.article}", _style=mytd)],
[T("URL not well formed"), TD(f"{counters.url_error}", _style=mytd)],
[T("HTTP connection error"), TD(f"{counters.http_error}", _style=mytd)],
[T("JSON decoding error"), TD(f"{counters.json_error}", _style=mytd)],
[T("Number of articles modified"), TD(f"{counters.insert}", _style=mytd)]]
title = f"{T('Update citations')} {team_project}"
=P(title, _class="my-p my-small-cap my-large")
=TABLE(*[TR(*row, _class="dbui-tr") for row in table], _style=mytable)
#
# the footer, processing time of the request
#
delta = (datetime.now()-request.now).total_seconds()
delta = T('Page generated in %s seconds') % round(delta, 2)
=P(delta, BR(), request.now.strftime("%d %b %Y %H:%M"), _class="dbui-p")
=BR()
}}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment