Commit 0cde4e17 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Merge branch '84-fix-insert-marcxml' into 'master'

Resolve "Fail to insert MarcXML"

Closes #84

See merge request !85
parents bbe9d6d6 befcf194
......@@ -322,6 +322,7 @@ def insert_marcxml():
if not tool:
return inline_alert(T("Error"), T("Select an harvester."))
tool.harvester.host = selector.host
tool.process_xml(selector.xml)
except ToolException as e:
......
......@@ -5,7 +5,7 @@
import re
import regex
from base import search_synonym, ToolException
from base import format_author_fr, search_synonym, ToolException
from exception import CheckException
from gluon import current
from invenio_tools import (DECODE_REF,
......@@ -18,6 +18,7 @@ from invenio_tools import (DECODE_REF,
REG_OAI,
REG_YEAR)
from itertools import imap
from pandas import DataFrame
from plugin_dbui import CLEAN_SPACES, get_id
......@@ -679,7 +680,11 @@ class CheckAndFix(object):
if not isinstance(record, RecordThesis):
raise CheckException(MSG_NO_THESIS)
def my_affiliation(self, record, id_project, id_team):
def my_affiliation(self,
record,
id_project,
id_team,
func=format_author_fr):
"""Check that authors of my institute are signatories.
Launch a recovery procedure when affiliations are not defined.
......@@ -689,6 +694,7 @@ class CheckAndFix(object):
record (RecordPubli): record describing a publication.
id_project (int): identifier of the project in the database
id_team (int): identifier of the team in the database
func (reference): function used to format the author names.
Raises:
CheckException: when there is no authors from my institute.
......@@ -704,16 +710,21 @@ class CheckAndFix(object):
if not rescue_list:
raise CheckException(MSG_NO_MY_AUTHOR)
# format the author in the same way as the rescue list
# compute the intersection between the authors and the rescue list
set_1 = set(record.authors_as_list())
set_2 = set(rescue_list)
df = (DataFrame(record.authors_as_list(), columns=["raw_author"])
.assign(format_author=lambda x:
x.raw_author.apply(lambda y: func(y)))
.set_index("format_author"))
rescue_list = [el.decode("utf-8") for el in rescue_list]
intersection = df.index & rescue_list
li = list(set_1.intersection(set_2))
if not li:
if intersection.size == 0:
raise CheckException(MSG_NO_MY_AUTHOR)
# cache the result for a latter use
self.__my_authors[record.id()] = li
self.__my_authors[record.id()] = intersection.values.tolist()
def paper_reference(self, record):
"""Check that editor, page, volume and paper year are defined
......
......@@ -363,12 +363,14 @@ class Selector(object):
"marc12_selector",
Field("id_teams", "reference teams", label="Team"),
Field("id_projects", "reference projects", label="Project"),
Field("host", "string", default=STORES[0], label="Store"),
Field("xml", "text", comment=tp_xml, label="MARCXML"),
Field("controller", "string", label="Automaton"),
Field("id_categories", "reference categories", label="Category"),
Field("mode", "string", default=mode_dry_run))
table.controller.requires = IS_IN_SET(CONTROLLERS)
table.host.requires = IS_IN_SET(STORES)
table.id_categories.requires = IS_IN_DB(db, "categories.code")
table.id_projects.requires = IS_IN_DB(db, "projects.project")
table.id_teams.requires = IS_IN_DB(db, "teams.team")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment