Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
befcf194
Commit
befcf194
authored
May 22, 2017
by
LE GAC Renaud
Browse files
Update CheckAndFix to fix a bug in the method my_affiliation.
parent
7dc2b4ea
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
7 deletions
+18
-7
modules/harvest_tools/checkandfix.py
modules/harvest_tools/checkandfix.py
+18
-7
No files found.
modules/harvest_tools/checkandfix.py
View file @
befcf194
...
...
@@ -5,7 +5,7 @@
import
re
import
regex
from
base
import
search_synonym
,
ToolException
from
base
import
format_author_fr
,
search_synonym
,
ToolException
from
exception
import
CheckException
from
gluon
import
current
from
invenio_tools
import
(
DECODE_REF
,
...
...
@@ -18,6 +18,7 @@ from invenio_tools import (DECODE_REF,
REG_OAI
,
REG_YEAR
)
from
itertools
import
imap
from
pandas
import
DataFrame
from
plugin_dbui
import
CLEAN_SPACES
,
get_id
...
...
@@ -679,7 +680,11 @@ class CheckAndFix(object):
if
not
isinstance
(
record
,
RecordThesis
):
raise
CheckException
(
MSG_NO_THESIS
)
def
my_affiliation
(
self
,
record
,
id_project
,
id_team
):
def
my_affiliation
(
self
,
record
,
id_project
,
id_team
,
func
=
format_author_fr
):
"""Check that authors of my institute are signatories.
Launch a recovery procedure when affiliations are not defined.
...
...
@@ -689,6 +694,7 @@ class CheckAndFix(object):
record (RecordPubli): record describing a publication.
id_project (int): identifier of the project in the database
id_team (int): identifier of the team in the database
func (reference): function used to format the author names.
Raises:
CheckException: when there is no authors from my institute.
...
...
@@ -704,16 +710,21 @@ class CheckAndFix(object):
if
not
rescue_list
:
raise
CheckException
(
MSG_NO_MY_AUTHOR
)
# format the author in the same way as the rescue list
# compute the intersection between the authors and the rescue list
set_1
=
set
(
record
.
authors_as_list
())
set_2
=
set
(
rescue_list
)
df
=
(
DataFrame
(
record
.
authors_as_list
(),
columns
=
[
"raw_author"
])
.
assign
(
format_author
=
lambda
x
:
x
.
raw_author
.
apply
(
lambda
y
:
func
(
y
)))
.
set_index
(
"format_author"
))
rescue_list
=
[
el
.
decode
(
"utf-8"
)
for
el
in
rescue_list
]
intersection
=
df
.
index
&
rescue_list
li
=
list
(
set_1
.
intersection
(
set_2
))
if
not
li
:
if
intersection
.
size
==
0
:
raise
CheckException
(
MSG_NO_MY_AUTHOR
)
# cache the result for a latter use
self
.
__my_authors
[
record
.
id
()]
=
l
i
self
.
__my_authors
[
record
.
id
()]
=
i
ntersection
.
values
.
tolist
()
def
paper_reference
(
self
,
record
):
"""Check that editor, page, volume and paper year are defined
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment