Commit d0f2ff84 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Merge branch '96-new-inspirehep-api' into 'master'

Resolve "Migrate to new inspirehep API"

Closes #96

See merge request !105
parents b25bbf1a 2524a79e
......@@ -40,12 +40,12 @@ def citations():
mplstyle()
mpl.rcParams['date.autoformatter.year'] = '%Y'
mpl.rcParams['date.autoformatter.month'] = '%b'
mpl.rcParams['date.autoformatter.day'] = '%d %b'
mpl.rcParams['date.autoformatter.hour'] = '%b %d %H'
mpl.rcParams['date.autoformatter.minute'] = '%H:%M'
mpl.rcParams['date.autoformatter.second'] = '%H:%M:%S'
# mpl.rcParams['date.autoformatter.year'] = '%Y'
# mpl.rcParams['date.autoformatter.month'] = '%b %Y'
# mpl.rcParams['date.autoformatter.day'] = '%d %b'
# mpl.rcParams['date.autoformatter.hour'] = '%b %d %H'
# mpl.rcParams['date.autoformatter.minute'] = '%H:%M'
# mpl.rcParams['date.autoformatter.second'] = '%H:%M:%S'
pd.set_option("display.width", None)
pd.set_option("display.max_rows", 500)
......
......@@ -8,21 +8,24 @@ import traceback
from gluon import current
from gluon.restricted import RestrictedError
from harvest_tools import (build_harvester_tool,
CheckAndFix,
CheckException,
DRY_RUN,
MsgCollection,
search_synonym,
ToolException)
from invenio_tools import (load_record,
OAI_URL,
RecordConf,
RecordThesis)
filter_logs,
get_rex_institute,
MsgCollection)
from plugin_dbui import (inline_alert,
Selector,
to_formPanel,
UNDEF_ID)
from requests.exceptions import RequestException
from store_tools import (CheckException,
load_record,
OAI_URL,
RecordCdsConfPaper,
RecordCdsThesis,
RecordHepConfPaper,
RecordHepThesis,
search_synonym,
ToolException)
MODE_DRY_RUN = T(DRY_RUN)
MSG_GREMLIN = "Oops a gremlin..."
......@@ -92,7 +95,7 @@ def free_run():
def edit_insert():
"""Edit an invenio record and insert it in the database.
"""Edit a record and insert it in the database.
Note:
Recovery procedures are applied to fix basic non-conformity, but
......@@ -142,8 +145,10 @@ def edit_insert():
return inline_alert(T("Error"), msg)
# record
logger.debug("load the record...")
record = load_record(selector.host, selector.record_id)
host = selector.host
shelf = ("literature" if host == "inspirehep.net" else None)
logger.debug(f"load the record {selector.host} {selector.record_id}")
record = load_record(selector.host, selector.record_id, shelf=shelf)
if record is None:
return inline_alert(T(MSG_GREMLIN), T(MSG_NO_RECORD))
......@@ -175,19 +180,20 @@ def edit_insert():
# ------------------------------------------------------------------------
#
# CheckAndFix (general)
# fix basic non-conformity (general)
# - is record with authors
# - is record with authors form my institute
# - standardise name of collaboration
# - format authors according to my format
# - extract authors form my institute signing the publication
# - check and fix submitted date
#
check = CheckAndFix()
# authors
try:
check.authors(record)
check.format_authors(record, fmt="F. Last")
check.my_affiliation(
record, selector.id_projects, selector.id_teams)
check.get_my_authors(record, sort=True)
record.check_and_fix(fmt_author="F. Last",
rex_institute=get_rex_institute(db, current.app),
sep_author=", ",
sort_author=True)
except CheckException as e:
logger.debug(str(e))
......@@ -201,14 +207,13 @@ def edit_insert():
values["PublicationsAuthors"] = record.authors()
values["PublicationsAuthors_institute"] = record.my_authors
# collaboration
recId = UNDEF_ID
# repeat collaboration check
try:
recId = UNDEF_ID
recId = search_synonym(db.collaborations,
"collaboration",
record.collaboration())
except ToolException as e:
logger.debug(str(e))
pass
values["PublicationsId_collaborations"] = int(recId)
......@@ -230,15 +235,14 @@ def edit_insert():
# ------------------------------------------------------------------------
#
# CheckAndFix (article)
# fix basic non-conformity (article)
#
if selector.controller in ("articles", "proceedings"):
check.paper_reference(record)
check.format_editor(record)
try:
record.format_editor()
recId = UNDEF_ID
try:
recId = search_synonym(db.publishers,
"abbreviation",
record.paper_editor())
......@@ -253,19 +257,19 @@ def edit_insert():
# ------------------------------------------------------------------------
#
# CheckAndFix (conference)
# fix basic non-conformity (conference)
#
if selector.controller in ("proceedings", "talks"):
if isinstance(record, (RecordCdsConfPaper, RecordHepConfPaper)):
try:
check.country(record)
check.conference_date(record)
record.check_conference_date()
except CheckException as e:
logger.debug(str(e))
pass
if isinstance(record, RecordConf):
values["PublicationsConference_title"] = \
record.conference_title()
......@@ -278,8 +282,8 @@ def edit_insert():
values["PublicationsConference_town"] = \
record.conference_town()
recId = UNDEF_ID
try:
recId = UNDEF_ID
recId = search_synonym(db.countries,
"country",
record.conference_country())
......@@ -295,25 +299,24 @@ def edit_insert():
# ------------------------------------------------------------------------
#
# CheckAndFix (theses)
# fix basic non-conformity (thesis)
#
if selector.controller == "theses":
if isinstance(record, RecordThesis):
if isinstance(record, (RecordCdsThesis, RecordHepThesis)):
record.format_universities()
values["PublicationsUniversities"] = \
record.these_universities()
values["PublicationsDirectors"] = record.these_directors()
values["PublicationsDefense"] = record.these_defense()
# ------------------------------------------------------------------------
#
# submitted date and year
try:
check.submitted(record)
except (CheckException, ToolException) as e:
logger.debug(str(e))
pass
#
values["PublicationsSubmitted"] = record.submitted()
if record.is_published():
......@@ -357,9 +360,15 @@ def insert_recjson():
tool.harvester.host = selector.host
tool.logs = []
tool.shelf = \
("literature" if selector.host == "inspirehep.net" else None)
recjson = json.loads(selector.recjson)
recjson = (recjson[0] if isinstance(recjson, list) else recjson)
if selector.host == "inspirehep.net":
recjson = recjson["metadata"]
tool.process_recjson(recjson)
except ToolException as e:
......@@ -455,6 +464,9 @@ def run():
logger.info("-"*79)
# filter logs to remove duplicated entries
logs = filter_logs(logs)
# delegate rendering to the report view
response.view = "harvest/layout.%s" % request.extension
return dict(collection_logs=collection_logs,
......@@ -557,6 +569,9 @@ def run_all():
logger.info("-"*79)
# filter logs to remove duplicated entries
logs = filter_logs(logs)
# tune selector parameters used in the report title
if query is None:
selector.id_projects = None
......
......@@ -7,7 +7,7 @@ import re
from check_tools import check_publication
from gluon.storage import Storage
from harvest_tools import DRY_RUN
from invenio_tools import CdsException, load_record
from store_tools import load_record, StoreException
from plugin_dbui import (CALLBACK_ERRORS,
get_foreign_field,
get_id,
......@@ -15,7 +15,8 @@ from plugin_dbui import (CALLBACK_ERRORS,
is_foreign_field,
JSONEncoder,
Selector,
to_fields)
to_fields,
UNDEF_ID)
MODE_DRY_RUN = T(DRY_RUN)
MSG_NO_AUTHORS = "<br><br>Removing affiliation failed.<br>"\
......@@ -43,7 +44,7 @@ def affiliation_institute():
try:
record = load_record("inspirehep.net", institute_id)
except CdsException:
except StoreException:
raise HTTP(500, T(MSG_NO_SERVER))
if record is None:
......@@ -96,7 +97,7 @@ def affiliation_publication():
try:
record = load_record(publication_store, publication_id)
except CdsException:
except StoreException:
raise HTTP(500, T(MSG_NO_SERVER))
if record is None:
......@@ -238,6 +239,10 @@ def compare_publications():
if is_foreign_field(field):
k_tablename, k_fieldname, k_id = get_foreign_field(field)
# protection against None
value1 = (UNDEF_ID if value1 is None else value1)
value2 = (UNDEF_ID if value2 is None else value2)
value1 = db[k_tablename][value1][k_fieldname]
value2 = db[k_tablename][value2][k_fieldname]
......@@ -435,12 +440,12 @@ def update_citations():
import datetime
import requests
from invenio_tools import CdsException, InvenioStore
from json.decoder import JSONDecodeError
from reporting_tools import repr_team_project
from store_tools import InspirehepStore, StoreException
today = datetime.date.today()
rex_ins = re.compile(r"https?://inspirehep.net/record/(\d+)")
rex_ins = re.compile(r"inspirehep.net/record/(\d+)")
citations = db.citations
publications = db.publications
......@@ -461,7 +466,7 @@ def update_citations():
query = selector.query(publications)
# get the number of citation and update the database table
store = InvenioStore("inspirehep.net", shelf="literature")
store = InspirehepStore("inspirehep.net", shelf="literature")
for row in db(query).iterselect(publications.id, publications.origin):
counters.article += 1
......@@ -470,15 +475,15 @@ def update_citations():
# interrogate inspirehep.net
try:
recid = rex_ins.search(row.origin).group(1)
count = store.get_field(recid, "number_of_citations")
count = store.get_field(recid, "citation_count")
except AttributeError:
logger.warning(f"record identifier not found in {row.origin}")
counters.recid += 1
continue
except CdsException:
logger.warning(f"failed to get citations for {recid}")
except StoreException as e:
logger.warning(f"failed to get citations for {recid}: {e}")
counters.failed += 1
continue
......
store_tools.cdsstore.CdsStore.get_field
=======================================
.. currentmodule:: store_tools.cdsstore
.. automethod:: CdsStore.get_field
store_tools.cdsstore.CdsStore.get_ids
=====================================
.. currentmodule:: store_tools.cdsstore
.. automethod:: CdsStore.get_ids
store_tools.cdsstore.CdsStore.get_record
========================================
.. currentmodule:: store_tools.cdsstore
.. automethod:: CdsStore.get_record
store_tools.cdsstore.CdsStore.interrogate
=========================================
.. currentmodule:: store_tools.cdsstore
.. automethod:: CdsStore.interrogate
store_tools.cdsstore.CdsStore.last_search_url
=============================================
.. currentmodule:: store_tools.cdsstore
.. automethod:: CdsStore.last_search_url
store_tools.cdsstore.CdsStore.search
====================================
.. currentmodule:: store_tools.cdsstore
.. automethod:: CdsStore.search
invenio_tools.base.ARXIV
========================
.. currentmodule:: invenio_tools.base
.. autodata:: ARXIV
\ No newline at end of file
invenio_tools.base.ARXIV_PDF
============================
.. currentmodule:: invenio_tools.base
.. autodata:: ARXIV_PDF
\ No newline at end of file
invenio_tools.base.MSG_INV_CONF
===============================
.. currentmodule:: invenio_tools.base
.. autodata:: MSG_INV_CONF
\ No newline at end of file
invenio_tools.base.MSG_INV_CONF_KEY
===================================
.. currentmodule:: invenio_tools.base
.. autodata:: MSG_INV_CONF_KEY
\ No newline at end of file
invenio_tools.base.MSG_NO_CONF
==============================
.. currentmodule:: invenio_tools.base
.. autodata:: MSG_NO_CONF
\ No newline at end of file
invenio_tools.base.MSG_NO_CONF_ID_KEY
=====================================
.. currentmodule:: invenio_tools.base
.. autodata:: MSG_NO_CONF_ID_KEY
\ No newline at end of file
invenio_tools.base.MSG_NO_COUNTRY
=================================
.. currentmodule:: invenio_tools.base
.. autodata:: MSG_NO_COUNTRY
\ No newline at end of file
invenio_tools.base.MSG_NO_PUBLISHER
===================================
.. currentmodule:: invenio_tools.base
.. autodata:: MSG_NO_PUBLISHER
\ No newline at end of file
invenio_tools.base.MSG_NO_THESIS
================================
.. currentmodule:: invenio_tools.base
.. autodata:: MSG_NO_THESIS
\ No newline at end of file
invenio_tools.base.MSG_WELL_FORMED_COLLABORATION
================================================
.. currentmodule:: invenio_tools.base
.. autodata:: MSG_WELL_FORMED_COLLABORATION
\ No newline at end of file
invenio_tools.base.OAI
======================
.. currentmodule:: invenio_tools.base
.. autodata:: OAI
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment