Commit d0f2ff84 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Merge branch '96-new-inspirehep-api' into 'master'

Resolve "Migrate to new inspirehep API"

Closes #96

See merge request !105
parents b25bbf1a 2524a79e
...@@ -40,12 +40,12 @@ def citations(): ...@@ -40,12 +40,12 @@ def citations():
mplstyle() mplstyle()
mpl.rcParams['date.autoformatter.year'] = '%Y' # mpl.rcParams['date.autoformatter.year'] = '%Y'
mpl.rcParams['date.autoformatter.month'] = '%b' # mpl.rcParams['date.autoformatter.month'] = '%b %Y'
mpl.rcParams['date.autoformatter.day'] = '%d %b' # mpl.rcParams['date.autoformatter.day'] = '%d %b'
mpl.rcParams['date.autoformatter.hour'] = '%b %d %H' # mpl.rcParams['date.autoformatter.hour'] = '%b %d %H'
mpl.rcParams['date.autoformatter.minute'] = '%H:%M' # mpl.rcParams['date.autoformatter.minute'] = '%H:%M'
mpl.rcParams['date.autoformatter.second'] = '%H:%M:%S' # mpl.rcParams['date.autoformatter.second'] = '%H:%M:%S'
pd.set_option("display.width", None) pd.set_option("display.width", None)
pd.set_option("display.max_rows", 500) pd.set_option("display.max_rows", 500)
......
...@@ -8,21 +8,24 @@ import traceback ...@@ -8,21 +8,24 @@ import traceback
from gluon import current from gluon import current
from gluon.restricted import RestrictedError from gluon.restricted import RestrictedError
from harvest_tools import (build_harvester_tool, from harvest_tools import (build_harvester_tool,
CheckAndFix,
CheckException,
DRY_RUN, DRY_RUN,
MsgCollection, filter_logs,
search_synonym, get_rex_institute,
ToolException) MsgCollection)
from invenio_tools import (load_record,
OAI_URL,
RecordConf,
RecordThesis)
from plugin_dbui import (inline_alert, from plugin_dbui import (inline_alert,
Selector, Selector,
to_formPanel, to_formPanel,
UNDEF_ID) UNDEF_ID)
from requests.exceptions import RequestException from requests.exceptions import RequestException
from store_tools import (CheckException,
load_record,
OAI_URL,
RecordCdsConfPaper,
RecordCdsThesis,
RecordHepConfPaper,
RecordHepThesis,
search_synonym,
ToolException)
MODE_DRY_RUN = T(DRY_RUN) MODE_DRY_RUN = T(DRY_RUN)
MSG_GREMLIN = "Oops a gremlin..." MSG_GREMLIN = "Oops a gremlin..."
...@@ -92,7 +95,7 @@ def free_run(): ...@@ -92,7 +95,7 @@ def free_run():
def edit_insert(): def edit_insert():
"""Edit an invenio record and insert it in the database. """Edit a record and insert it in the database.
Note: Note:
Recovery procedures are applied to fix basic non-conformity, but Recovery procedures are applied to fix basic non-conformity, but
...@@ -142,8 +145,10 @@ def edit_insert(): ...@@ -142,8 +145,10 @@ def edit_insert():
return inline_alert(T("Error"), msg) return inline_alert(T("Error"), msg)
# record # record
logger.debug("load the record...") host = selector.host
record = load_record(selector.host, selector.record_id) shelf = ("literature" if host == "inspirehep.net" else None)
logger.debug(f"load the record {selector.host} {selector.record_id}")
record = load_record(selector.host, selector.record_id, shelf=shelf)
if record is None: if record is None:
return inline_alert(T(MSG_GREMLIN), T(MSG_NO_RECORD)) return inline_alert(T(MSG_GREMLIN), T(MSG_NO_RECORD))
...@@ -175,19 +180,20 @@ def edit_insert(): ...@@ -175,19 +180,20 @@ def edit_insert():
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
# #
# CheckAndFix (general) # fix basic non-conformity (general)
# - is record with authors
# - is record with authors form my institute
# - standardise name of collaboration
# - format authors according to my format
# - extract authors form my institute signing the publication
# - check and fix submitted date
# #
check = CheckAndFix()
# authors # authors
try: try:
check.authors(record) record.check_and_fix(fmt_author="F. Last",
check.format_authors(record, fmt="F. Last") rex_institute=get_rex_institute(db, current.app),
sep_author=", ",
check.my_affiliation( sort_author=True)
record, selector.id_projects, selector.id_teams)
check.get_my_authors(record, sort=True)
except CheckException as e: except CheckException as e:
logger.debug(str(e)) logger.debug(str(e))
...@@ -201,14 +207,13 @@ def edit_insert(): ...@@ -201,14 +207,13 @@ def edit_insert():
values["PublicationsAuthors"] = record.authors() values["PublicationsAuthors"] = record.authors()
values["PublicationsAuthors_institute"] = record.my_authors values["PublicationsAuthors_institute"] = record.my_authors
# collaboration # repeat collaboration check
recId = UNDEF_ID
try: try:
recId = UNDEF_ID
recId = search_synonym(db.collaborations, recId = search_synonym(db.collaborations,
"collaboration", "collaboration",
record.collaboration()) record.collaboration())
except ToolException as e: except ToolException as e:
logger.debug(str(e))
pass pass
values["PublicationsId_collaborations"] = int(recId) values["PublicationsId_collaborations"] = int(recId)
...@@ -230,15 +235,14 @@ def edit_insert(): ...@@ -230,15 +235,14 @@ def edit_insert():
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
# #
# CheckAndFix (article) # fix basic non-conformity (article)
# #
if selector.controller in ("articles", "proceedings"): if selector.controller in ("articles", "proceedings"):
check.paper_reference(record)
check.format_editor(record)
recId = UNDEF_ID
try: try:
record.format_editor()
recId = UNDEF_ID
recId = search_synonym(db.publishers, recId = search_synonym(db.publishers,
"abbreviation", "abbreviation",
record.paper_editor()) record.paper_editor())
...@@ -253,19 +257,19 @@ def edit_insert(): ...@@ -253,19 +257,19 @@ def edit_insert():
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
# #
# CheckAndFix (conference) # fix basic non-conformity (conference)
# #
if selector.controller in ("proceedings", "talks"): if selector.controller in ("proceedings", "talks"):
try: if isinstance(record, (RecordCdsConfPaper, RecordHepConfPaper)):
check.country(record)
check.conference_date(record)
except CheckException as e: try:
logger.debug(str(e)) record.check_conference_date()
pass
except CheckException as e:
logger.debug(str(e))
pass
if isinstance(record, RecordConf):
values["PublicationsConference_title"] = \ values["PublicationsConference_title"] = \
record.conference_title() record.conference_title()
...@@ -278,8 +282,8 @@ def edit_insert(): ...@@ -278,8 +282,8 @@ def edit_insert():
values["PublicationsConference_town"] = \ values["PublicationsConference_town"] = \
record.conference_town() record.conference_town()
recId = UNDEF_ID
try: try:
recId = UNDEF_ID
recId = search_synonym(db.countries, recId = search_synonym(db.countries,
"country", "country",
record.conference_country()) record.conference_country())
...@@ -295,25 +299,24 @@ def edit_insert(): ...@@ -295,25 +299,24 @@ def edit_insert():
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
# #
# CheckAndFix (theses) # fix basic non-conformity (thesis)
# #
if selector.controller == "theses": if selector.controller == "theses":
if isinstance(record, RecordThesis): if isinstance(record, (RecordCdsThesis, RecordHepThesis)):
record.format_universities()
values["PublicationsUniversities"] = \ values["PublicationsUniversities"] = \
record.these_universities() record.these_universities()
values["PublicationsDirectors"] = record.these_directors() values["PublicationsDirectors"] = record.these_directors()
values["PublicationsDefense"] = record.these_defense() values["PublicationsDefense"] = record.these_defense()
# ------------------------------------------------------------------------
#
# submitted date and year # submitted date and year
try: #
check.submitted(record)
except (CheckException, ToolException) as e:
logger.debug(str(e))
pass
values["PublicationsSubmitted"] = record.submitted() values["PublicationsSubmitted"] = record.submitted()
if record.is_published(): if record.is_published():
...@@ -357,9 +360,15 @@ def insert_recjson(): ...@@ -357,9 +360,15 @@ def insert_recjson():
tool.harvester.host = selector.host tool.harvester.host = selector.host
tool.logs = [] tool.logs = []
tool.shelf = \
("literature" if selector.host == "inspirehep.net" else None)
recjson = json.loads(selector.recjson) recjson = json.loads(selector.recjson)
recjson = (recjson[0] if isinstance(recjson, list) else recjson) recjson = (recjson[0] if isinstance(recjson, list) else recjson)
if selector.host == "inspirehep.net":
recjson = recjson["metadata"]
tool.process_recjson(recjson) tool.process_recjson(recjson)
except ToolException as e: except ToolException as e:
...@@ -455,6 +464,9 @@ def run(): ...@@ -455,6 +464,9 @@ def run():
logger.info("-"*79) logger.info("-"*79)
# filter logs to remove duplicated entries
logs = filter_logs(logs)
# delegate rendering to the report view # delegate rendering to the report view
response.view = "harvest/layout.%s" % request.extension response.view = "harvest/layout.%s" % request.extension
return dict(collection_logs=collection_logs, return dict(collection_logs=collection_logs,
...@@ -557,6 +569,9 @@ def run_all(): ...@@ -557,6 +569,9 @@ def run_all():
logger.info("-"*79) logger.info("-"*79)
# filter logs to remove duplicated entries
logs = filter_logs(logs)
# tune selector parameters used in the report title # tune selector parameters used in the report title
if query is None: if query is None:
selector.id_projects = None selector.id_projects = None
......
...@@ -7,7 +7,7 @@ import re ...@@ -7,7 +7,7 @@ import re
from check_tools import check_publication from check_tools import check_publication
from gluon.storage import Storage from gluon.storage import Storage
from harvest_tools import DRY_RUN from harvest_tools import DRY_RUN
from invenio_tools import CdsException, load_record from store_tools import load_record, StoreException
from plugin_dbui import (CALLBACK_ERRORS, from plugin_dbui import (CALLBACK_ERRORS,
get_foreign_field, get_foreign_field,
get_id, get_id,
...@@ -15,7 +15,8 @@ from plugin_dbui import (CALLBACK_ERRORS, ...@@ -15,7 +15,8 @@ from plugin_dbui import (CALLBACK_ERRORS,
is_foreign_field, is_foreign_field,
JSONEncoder, JSONEncoder,
Selector, Selector,
to_fields) to_fields,
UNDEF_ID)
MODE_DRY_RUN = T(DRY_RUN) MODE_DRY_RUN = T(DRY_RUN)
MSG_NO_AUTHORS = "<br><br>Removing affiliation failed.<br>"\ MSG_NO_AUTHORS = "<br><br>Removing affiliation failed.<br>"\
...@@ -43,7 +44,7 @@ def affiliation_institute(): ...@@ -43,7 +44,7 @@ def affiliation_institute():
try: try:
record = load_record("inspirehep.net", institute_id) record = load_record("inspirehep.net", institute_id)
except CdsException: except StoreException:
raise HTTP(500, T(MSG_NO_SERVER)) raise HTTP(500, T(MSG_NO_SERVER))
if record is None: if record is None:
...@@ -96,7 +97,7 @@ def affiliation_publication(): ...@@ -96,7 +97,7 @@ def affiliation_publication():
try: try:
record = load_record(publication_store, publication_id) record = load_record(publication_store, publication_id)
except CdsException: except StoreException:
raise HTTP(500, T(MSG_NO_SERVER)) raise HTTP(500, T(MSG_NO_SERVER))
if record is None: if record is None:
...@@ -238,6 +239,10 @@ def compare_publications(): ...@@ -238,6 +239,10 @@ def compare_publications():
if is_foreign_field(field): if is_foreign_field(field):
k_tablename, k_fieldname, k_id = get_foreign_field(field) k_tablename, k_fieldname, k_id = get_foreign_field(field)
# protection against None
value1 = (UNDEF_ID if value1 is None else value1)
value2 = (UNDEF_ID if value2 is None else value2)
value1 = db[k_tablename][value1][k_fieldname] value1 = db[k_tablename][value1][k_fieldname]
value2 = db[k_tablename][value2][k_fieldname] value2 = db[k_tablename][value2][k_fieldname]
...@@ -435,12 +440,12 @@ def update_citations(): ...@@ -435,12 +440,12 @@ def update_citations():
import datetime import datetime
import requests import requests
from invenio_tools import CdsException, InvenioStore
from json.decoder import JSONDecodeError from json.decoder import JSONDecodeError
from reporting_tools import repr_team_project from reporting_tools import repr_team_project
from store_tools import InspirehepStore, StoreException
today = datetime.date.today() today = datetime.date.today()
rex_ins = re.compile(r"https?://inspirehep.net/record/(\d+)") rex_ins = re.compile(r"inspirehep.net/record/(\d+)")
citations = db.citations citations = db.citations
publications = db.publications publications = db.publications
...@@ -461,7 +466,7 @@ def update_citations(): ...@@ -461,7 +466,7 @@ def update_citations():
query = selector.query(publications) query = selector.query(publications)
# get the number of citation and update the database table # get the number of citation and update the database table
store = InvenioStore("inspirehep.net", shelf="literature") store = InspirehepStore("inspirehep.net", shelf="literature")
for row in db(query).iterselect(publications.id, publications.origin): for row in db(query).iterselect(publications.id, publications.origin):
counters.article += 1 counters.article += 1
...@@ -470,15 +475,15 @@ def update_citations(): ...@@ -470,15 +475,15 @@ def update_citations():
# interrogate inspirehep.net # interrogate inspirehep.net
try: try:
recid = rex_ins.search(row.origin).group(1) recid = rex_ins.search(row.origin).group(1)
count = store.get_field(recid, "number_of_citations") count = store.get_field(recid, "citation_count")
except AttributeError: except AttributeError:
logger.warning(f"record identifier not found in {row.origin}") logger.warning(f"record identifier not found in {row.origin}")
counters.recid += 1 counters.recid += 1
continue continue
except CdsException: except StoreException as e:
logger.warning(f"failed to get citations for {recid}") logger.warning(f"failed to get citations for {recid}: {e}")
counters.failed += 1 counters.failed += 1
continue continue
......
store_tools.cdsstore.CdsStore.get_field
=======================================
.. currentmodule:: store_tools.cdsstore
.. automethod:: CdsStore.get_field
store_tools.cdsstore.CdsStore.get_ids
=====================================
.. currentmodule:: store_tools.cdsstore
.. automethod:: CdsStore.get_ids
store_tools.cdsstore.CdsStore.get_record
========================================
.. currentmodule:: store_tools.cdsstore
.. automethod:: CdsStore.get_record
store_tools.cdsstore.CdsStore.interrogate
=========================================
.. currentmodule:: store_tools.cdsstore
.. automethod:: CdsStore.interrogate
store_tools.cdsstore.CdsStore.last_search_url
=============================================
.. currentmodule:: store_tools.cdsstore
.. automethod:: CdsStore.last_search_url
store_tools.cdsstore.CdsStore.search
====================================
.. currentmodule:: store_tools.cdsstore
.. automethod:: CdsStore.search
invenio_tools.base.ARXIV
========================
.. currentmodule:: invenio_tools.base
.. autodata:: ARXIV
\ No newline at end of file
invenio_tools.base.ARXIV_PDF
============================
.. currentmodule:: invenio_tools.base
.. autodata:: ARXIV_PDF
\ No newline at end of file
invenio_tools.base.MSG_INV_CONF
===============================
.. currentmodule:: invenio_tools.base
.. autodata:: MSG_INV_CONF
\ No newline at end of file
invenio_tools.base.MSG_INV_CONF_KEY
===================================
.. currentmodule:: invenio_tools.base
.. autodata:: MSG_INV_CONF_KEY
\ No newline at end of file
invenio_tools.base.MSG_NO_CONF
==============================
.. currentmodule:: invenio_tools.base
.. autodata:: MSG_NO_CONF
\ No newline at end of file
invenio_tools.base.MSG_NO_CONF_ID_KEY
=====================================
.. currentmodule:: invenio_tools.base
.. autodata:: MSG_NO_CONF_ID_KEY
\ No newline at end of file
invenio_tools.base.MSG_NO_COUNTRY
=================================
.. currentmodule:: invenio_tools.base
.. autodata:: MSG_NO_COUNTRY
\ No newline at end of file
invenio_tools.base.MSG_NO_PUBLISHER
===================================
.. currentmodule:: invenio_tools.base
.. autodata:: MSG_NO_PUBLISHER
\ No newline at end of file
invenio_tools.base.MSG_NO_THESIS
================================
.. currentmodule:: invenio_tools.base
.. autodata:: MSG_NO_THESIS
\ No newline at end of file
invenio_tools.base.MSG_WELL_FORMED_COLLABORATION
================================================
.. currentmodule:: invenio_tools.base
.. autodata:: MSG_WELL_FORMED_COLLABORATION
\ No newline at end of file
invenio_tools.base.OAI
======================
.. currentmodule:: invenio_tools.base
.. autodata:: OAI
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment