Commit cf4e7c7c authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Update wizards.update_citations

parent 03615868
......@@ -435,36 +435,33 @@ def update_citations():
import datetime
import requests
from invenio_tools import InvenioStore
from invenio_tools import CdsException, InvenioStore
from json.decoder import JSONDecodeError
from reporting_tools import repr_team_project
today = datetime.date.today()
rex_ins = re.compile(r"https?://inspirehep.net/record/(\d+)")
citations = db.citations
id_acl = get_id(db.categories, code="ACL")
kwargs = dict(of="recjson", ot="number_of_citations")
publications = db.publications
rex_ins = re.compile(r"(https?://inspirehep.net/record/\d+)")
store = InvenioStore("inspirehep.net")
today = datetime.date.today()
counters = Storage(article=0,
http_error=0,
insert=0,
json_error=0,
list_size=0,
not_list=0,
url_error=0)
recid=0,
failed=0,
insert=0)
# get user requirement
selector = Selector(virtdb.citation_selector)
# get the list of article store in the inspirehep store
id_acl = get_id(db.categories, code="ACL")
selector.append_query(publications.id_categories == id_acl)
selector.append_query(publications.origin.contains("inspirehep"))
query = selector.query(publications)
# get the number of citation and update the database table
store = InvenioStore("inspirehep.net", shelf="literature")
for row in db(query).iterselect(publications.id, publications.origin):
counters.article += 1
......@@ -472,35 +469,17 @@ def update_citations():
# interrogate inspirehep.net
try:
url = rex_ins.search(row.origin).group(1)
rep = store.interrogate(url, timeout=60, **kwargs)
lst = rep.json()
if not isinstance(lst, list):
logger.warning(f"JSON response is not a list")
counters.not_list += 1
continue
if len(lst) != 1:
logger.warning(f"size of the return list is not one")
counters.list_size += 1
continue
count = lst[0].get("number_of_citations")
recid = rex_ins.search(row.origin).group(1)
count = store.get_field(recid, "number_of_citations")
except AttributeError:
logger.warning(f"inspirehep URL not well formed {row.origin}")
counters.url_error += 1
continue
except JSONDecodeError:
logger.warning("JSON decoding error")
counters.json_error += 1
logger.warning(f"record identifier not found in {row.origin}")
counters.recid += 1
continue
except requests.exceptions.RequestException:
logger.warning(f"HTTP error interrogating {url}")
counters.http_error += 1
except CdsException:
logger.warning(f"failed to get citations for {recid}")
counters.failed += 1
continue
# check if the number of count changes
......@@ -513,7 +492,7 @@ def update_citations():
continue
# update the citations table
logger.info(f"update {url} citations to {count}")
logger.info(f"update citations for {recid} by {count - last_count}")
counters.insert += 1
idpubli = row.id
......@@ -527,11 +506,8 @@ def update_citations():
# inform the user
logger.info(f" number of article: {counters.article}")
logger.info(f" bad inspirehep URL: {counters.url_error}")
logger.info(f" HTTP connection error: {counters.url_error}")
logger.info(f" JSON decoding error: {counters.json_error}")
logger.info(f" response is not a list: {counters.not_list}")
logger.info(f" list size is not one: {counters.list_size}")
logger.info(f" bad record identifier: {counters.recid}")
logger.info(f" failed to get citations: {counters.failed}")
logger.info(f" insert or update in db: {counters.insert}")
return dict(counters=counters,
......
......@@ -89,6 +89,7 @@
'Axis Label Converters': 'Axis Label Converters',
'Axis values are used to defined the title of each level in section. This field allows to replace some values by another ones. Rule: label1: "value1", label2: "value2",....': "Le nom de l'axe est utilisé comme titre pour le niveau. Ce champ permet de remplacer cette valeur par une autre. Régle: label1: 'value1', label2: 'value2',....",
'Axis Vertical': 'Axis Vertical',
'Bad record identifer': 'Mauvais identifiant',
'basic': 'basic',
'Binary files': 'fichiers binaires',
'Book': 'Ouvrage',
......@@ -251,6 +252,7 @@
'extract authors': 'extraire les auteurs',
'Fail to decode HTTP response': 'Echec du decodage de la réponse HTTP',
'Fail to insert the new record in the database.': "Echec de l'insertion d'un nouvel enregistrement dans la base de donnée.",
'Failed to get citations': 'Echec pour obtenir le nombre de citations',
'Field': 'Champ',
'Fields with identical value are not listed.': 'Les champs avec des valeurs identiques ne sont pas listés.',
'Fill': 'Remplir',
......@@ -388,6 +390,7 @@
'Load in the database': 'Chargé dans la base de données',
'Loading failed': 'Echec du chargement',
'Loading...': 'Chargement en cours...',
'log cron jobs': 'journaux des taches planifiées',
'Log In': 'Log In',
'Logged in': 'Logged in',
'Logged out': 'Déconnexion',
......
......@@ -6,11 +6,8 @@
table = [
[T("Number of articles analysed"), TD(f"{counters.article}", _style=mytd)],
[T("URL not well formed"), TD(f"{counters.url_error}", _style=mytd)],
[T("HTTP connection error"), TD(f"{counters.http_error}", _style=mytd)],
[T("JSON decoding error"), TD(f"{counters.json_error}", _style=mytd)],
[T("response is not a list"), TD(f"{counters.not_list}", _style=mytd)],
[T("list size is not one"), TD(f"{counters.list_size}", _style=mytd)],
[T("Bad record identifer"), TD(f"{counters.recid}", _style=mytd)],
[T("Failed to get citations"), TD(f"{counters.failed}", _style=mytd)],
[T("Number of articles modified"), TD(f"{counters.insert}", _style=mytd)]]
title = f"{T('Update citations')} {team_project}"
......@@ -26,4 +23,4 @@
=P(delta, BR(), request.now.strftime("%d %b %Y %H:%M"), _class="dbui-p")
=BR()
}}
\ No newline at end of file
}}
......@@ -14,11 +14,8 @@
response.write(f"\t Scan duration: {delta}\n\n")
response.write(f"\tNumber of articles analysed: {counters.article}\n")
response.write(f"\t URL not well formed: {counters.url_error}\n")
response.write(f"\t HTTP connection error: {counters.http_error}\n")
response.write(f"\t JSON decoding error: {counters.json_error}\n")
response.write(f"\t response is not a list: {counters.not_list}\n")
response.write(f"\t list size is not one: {counters.list_size}\n")
response.write(f"\t Bad record identifier: {counters.recid}\n")
response.write(f"\t Failed to get citations: {counters.failed}\n")
response.write(f"\tNumber of articles modified: {counters.insert}\n\n")
}}
\ No newline at end of file
}}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment