Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
c2623351
Commit
c2623351
authored
Nov 07, 2019
by
LE GAC Renaud
Browse files
Apply 2to3 converter to harvest_tools.
parent
52483d05
Changes
15
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
204 additions
and
224 deletions
+204
-224
modules/harvest_tools/__init__.py
modules/harvest_tools/__init__.py
+22
-23
modules/harvest_tools/articles.py
modules/harvest_tools/articles.py
+11
-12
modules/harvest_tools/automaton.py
modules/harvest_tools/automaton.py
+27
-28
modules/harvest_tools/base.py
modules/harvest_tools/base.py
+2
-4
modules/harvest_tools/checkandfix.py
modules/harvest_tools/checkandfix.py
+100
-101
modules/harvest_tools/exception.py
modules/harvest_tools/exception.py
+0
-1
modules/harvest_tools/factory.py
modules/harvest_tools/factory.py
+7
-8
modules/harvest_tools/msg.py
modules/harvest_tools/msg.py
+3
-8
modules/harvest_tools/msgcollection.py
modules/harvest_tools/msgcollection.py
+0
-1
modules/harvest_tools/notes.py
modules/harvest_tools/notes.py
+5
-6
modules/harvest_tools/preprints.py
modules/harvest_tools/preprints.py
+6
-7
modules/harvest_tools/proceedings.py
modules/harvest_tools/proceedings.py
+5
-6
modules/harvest_tools/reports.py
modules/harvest_tools/reports.py
+5
-6
modules/harvest_tools/talks.py
modules/harvest_tools/talks.py
+5
-6
modules/harvest_tools/thesis.py
modules/harvest_tools/thesis.py
+6
-7
No files found.
modules/harvest_tools/__init__.py
View file @
c2623351
# -*- coding: utf-8 -*-
"""a collection of tools to search of publications in invenio store
and to push them in the database.
"""
from
base
import
(
DRY_RUN
,
MSG_CRASH
,
MSG_FIX_ORIGIN
,
MSG_IN_DB
,
MSG_LOAD
,
MSG_NO_ENTRY
,
MSG_TOOMANY_SYNONYM
,
family_name_fr
,
search_synonym
)
from
.
base
import
(
DRY_RUN
,
MSG_CRASH
,
MSG_FIX_ORIGIN
,
MSG_IN_DB
,
MSG_LOAD
,
MSG_NO_ENTRY
,
MSG_TOOMANY_SYNONYM
,
family_name_fr
,
search_synonym
)
from
automaton
import
Automaton
from
articles
import
Articles
from
checkandfix
import
CheckAndFix
,
MONTHS
from
exception
import
CheckException
,
ToolException
from
factory
import
build_harvester_tool
,
get_harvester_tool
from
msg
import
Msg
from
msgcollection
import
MsgCollection
from
notes
import
Notes
from
preprints
import
Preprints
from
proceedings
import
Proceedings
from
reports
import
Reports
from
talks
import
Talks
from
thesis
import
Thesis
from
.
automaton
import
Automaton
from
.
articles
import
Articles
from
.
checkandfix
import
CheckAndFix
,
MONTHS
from
.
exception
import
CheckException
,
ToolException
from
.
factory
import
build_harvester_tool
,
get_harvester_tool
from
.
msg
import
Msg
from
.
msgcollection
import
MsgCollection
from
.
notes
import
Notes
from
.
preprints
import
Preprints
from
.
proceedings
import
Proceedings
from
.
reports
import
Reports
from
.
talks
import
Talks
from
.
thesis
import
Thesis
modules/harvest_tools/articles.py
View file @
c2623351
# -*- coding: utf-8 -*-
""" harvest_tools.articles
"""
import
traceback
from
automaton
import
Automaton
from
base
import
(
learn_my_authors
,
MSG_CRASH
,
MSG_FIX_ORIGIN
,
MSG_IN_DB
,
MSG_LOAD
)
from
checkandfix
import
CheckException
from
.
automaton
import
Automaton
from
.
base
import
(
learn_my_authors
,
MSG_CRASH
,
MSG_FIX_ORIGIN
,
MSG_IN_DB
,
MSG_LOAD
)
from
.
checkandfix
import
CheckException
from
plugin_dbui
import
get_id
,
UNDEF_ID
...
...
@@ -45,7 +44,7 @@ class Articles(Automaton):
return
False
if
self
.
dbg
:
print
"check article record"
print
(
"check article record"
)
try
:
self
.
check
.
clean_erratum
(
record
)
...
...
@@ -70,7 +69,7 @@ class Articles(Automaton):
except
Exception
as
e
:
self
.
logs
[
-
1
].
reject
(
MSG_CRASH
%
e
,
record
=
record
,
translate
=
False
)
print
traceback
.
format_exc
()
print
((
traceback
.
format_exc
()
))
return
False
return
True
...
...
@@ -118,7 +117,7 @@ class Articles(Automaton):
"""
if
self
.
dbg
:
print
"get existing article by fields"
print
(
"get existing article by fields"
)
# alias
db
=
self
.
db
...
...
@@ -218,7 +217,7 @@ class Articles(Automaton):
"""
if
self
.
dbg
:
print
"check existing article by origin"
print
(
"check existing article by origin"
)
# alias
db
=
self
.
db
...
...
modules/harvest_tools/automaton.py
View file @
c2623351
# -*- coding: utf-8 -*-
""" harvest_tools.automaton
"""
...
...
@@ -6,17 +5,17 @@ import re
import
traceback
from
base
import
(
MSG_FIX_ORIGIN
,
MSG_IN_DB
,
search_synonym
,
ToolException
)
from
checkandfix
import
CheckAndFix
from
.
base
import
(
MSG_FIX_ORIGIN
,
MSG_IN_DB
,
search_synonym
,
ToolException
)
from
.
checkandfix
import
CheckAndFix
from
gluon.storage
import
Storage
from
invenio_tools
import
(
InvenioStore
,
Marc12
,
OAI_URL
)
from
msg
import
Msg
from
msgcollection
import
MsgCollection
from
.
invenio_tools
import
(
InvenioStore
,
Marc12
,
OAI_URL
)
from
.
msg
import
Msg
from
.
msgcollection
import
MsgCollection
from
plugin_dbui
import
CALLBACK_ERRORS
,
get_id
...
...
@@ -358,7 +357,7 @@ class Automaton(object):
"""
if
self
.
dbg
:
print
"check record"
print
(
"check record"
)
try
:
self
.
check
.
recover_oai
(
record
,
self
.
harvester
.
host
)
...
...
@@ -407,7 +406,7 @@ class Automaton(object):
"""
if
self
.
dbg
:
print
"get existing record by fields"
print
(
"get existing record by fields"
)
# alias
db
=
self
.
db
...
...
@@ -473,11 +472,11 @@ class Automaton(object):
"""
if
self
.
dbg
:
print
"start processing"
,
self
.
__class__
.
__name__
print
"decode request"
print
(
"start processing"
,
self
.
__class__
.
__name__
)
print
(
"decode request"
)
if
self
.
dbg
:
print
"get harvest parameters"
print
(
"get harvest parameters"
)
# decode the XML request
self
.
collection_logs
.
append
(
MsgCollection
(
found
=
1
))
...
...
@@ -511,7 +510,7 @@ class Automaton(object):
"""
if
self
.
dbg
:
print
"process URL search"
print
(
"process URL search"
)
# extend harvester for logs
self
.
harvester
.
host
=
host
...
...
@@ -557,25 +556,25 @@ class Automaton(object):
return
if
self
.
dbg
:
print
"%i records found in %s"
%
(
len
(
rec_ids
),
collection
)
print
(
"%i records found in %s"
%
(
len
(
rec_ids
),
collection
)
)
for
rec_id
in
rec_ids
:
if
self
.
dbg
:
print
"
\n
processing record"
,
rec_id
print
(
"
\n
processing record"
,
rec_id
)
try
:
db_id
=
is_record_in_db
(
title
,
host
=
host
,
rec_id
=
rec_id
)
if
db_id
:
if
self
.
dbg
:
print
"record in db"
,
rec_id
,
"->"
,
db_id
print
(
"record in db"
,
rec_id
,
"->"
,
db_id
)
continue
xml
=
store
.
get_record
(
rec_id
)
decode_xml
(
xml
)
except
Exception
as
e
:
print
traceback
.
format_exc
()
print
(
traceback
.
format_exc
()
)
url
=
OAI_URL
%
(
host
,
rec_id
)
logs
.
append
(
Msg
(
harvester
=
self
.
harvester
,
collection
=
title
,
...
...
@@ -592,7 +591,7 @@ class Automaton(object):
"""
if
self
.
dbg
:
print
"process xml record"
print
(
"process xml record"
)
# alias
db
=
self
.
db
...
...
@@ -610,8 +609,8 @@ class Automaton(object):
for
record
in
records
:
if
self
.
dbg
:
print
"record decoded"
print
record
.
title
()
.
encode
(
"utf-8"
)
print
(
"record decoded"
)
print
(
record
.
title
())
# reject the record using the secondary OAI
# require to cover the case:
...
...
@@ -627,7 +626,7 @@ class Automaton(object):
db
.
publications
[
rec_id
]
=
dict
(
origin
=
oai_url
)
if
self
.
dbg
:
print
"record in db (secondary oai) ->"
,
rec_id
print
(
"record in db (secondary oai) ->"
,
rec_id
)
continue
...
...
@@ -641,17 +640,17 @@ class Automaton(object):
# repair non-conformity as far as possible
if
not
check_record
(
record
):
if
self
.
dbg
:
print
"record rejected"
,
logs
[
-
1
].
txt
print
(
"record rejected"
,
logs
[
-
1
].
txt
)
continue
if
self
.
dbg
:
print
"insert record in the database"
print
(
"insert record in the database"
)
# insert the record in the database
insert_record
(
record
)
if
self
.
dbg
:
print
logs
[
-
1
].
action
.
upper
(),
logs
[
-
1
].
txt
print
(
logs
[
-
1
].
action
.
upper
(),
logs
[
-
1
].
txt
)
def
report
(
self
):
"""Build the processing report.
...
...
modules/harvest_tools/base.py
View file @
c2623351
# -*- coding: utf-8 -*-
""" harvest_tools.base
"""
from
exception
import
ToolException
from
.
exception
import
ToolException
from
plugin_dbui
import
get_id
,
UNDEF_ID
...
...
@@ -79,8 +78,7 @@ def learn_my_authors(db,
# NOTE2: handle the case J. Foo and J. M. Foo are the same person
elems
=
[]
for
elem
in
diff
:
if
isinstance
(
elem
,
unicode
):
elem
=
elem
.
encode
(
'utf8'
)
if
isinstance
(
elem
,
str
):
family_name
=
elem
[
elem
.
rfind
(
'. '
)
+
2
:]
if
family_name
not
in
row
.
authors
:
...
...
modules/harvest_tools/checkandfix.py
View file @
c2623351
...
...
@@ -5,18 +5,17 @@
import
re
import
regex
from
base
import
search_synonym
,
ToolException
from
exception
import
CheckException
from
.
base
import
search_synonym
,
ToolException
from
.
exception
import
CheckException
from
gluon
import
current
from
invenio_tools
import
(
DECODE_REF
,
MSG_NO_CONF
,
MSG_NO_THESIS
,
OAI_URL
,
RecordConf
,
RecordThesis
,
REG_OAI
,
REG_YEAR
)
from
itertools
import
imap
from
.invenio_tools
import
(
DECODE_REF
,
MSG_NO_CONF
,
MSG_NO_THESIS
,
OAI_URL
,
RecordConf
,
RecordThesis
,
REG_OAI
,
REG_YEAR
)
from
plugin_dbui
import
CLEAN_SPACES
,
get_id
...
...
@@ -27,21 +26,21 @@ DECODE_DD_MMM_YYYY = re.compile(r"(\d{1,2}) *([A-Za-z]{3}) *(\d{4})")
DECODE_DD_MM_YYYY
=
re
.
compile
(
r
"(\d{1,2}) +(\d{1,2}) +(\d{4})"
)
DECODE_YYYY
=
re
.
compile
(
r
"^(\d{4})$"
)
MONTHS
=
{
u
'
Jan
'
:
'
01
'
,
u
'
Feb
'
:
'
02
'
,
u
'
Fev
'
:
'
02
'
,
u
'
Mar
'
:
'
03
'
,
u
'
Apr
'
:
'
04
'
,
u
'
Avr
'
:
'
04
'
,
u
'
May
'
:
'
05
'
,
u
'
Mai
'
:
'
05
'
,
u
'
Jun
'
:
'
06
'
,
u
'
Jul
'
:
'
07
'
,
u
'
Aug
'
:
'
08
'
,
u
'
Sep
'
:
'
09
'
,
u
'
Oct
'
:
'
10
'
,
u
'
Nov
'
:
'
11
'
,
u
'
Dec
'
:
'
12
'
}
MONTHS
=
{
"
Jan
"
:
"
01
"
,
"
Feb
"
:
"
02
"
,
"
Fev
"
:
"
02
"
,
"
Mar
"
:
"
03
"
,
"
Apr
"
:
"
04
"
,
"
Avr
"
:
"
04
"
,
"
May
"
:
"
05
"
,
"
Mai
"
:
"
05
"
,
"
Jun
"
:
"
06
"
,
"
Jul
"
:
"
07
"
,
"
Aug
"
:
"
08
"
,
"
Sep
"
:
"
09
"
,
"
Oct
"
:
"
10
"
,
"
Nov
"
:
"
11
"
,
"
Dec
"
:
"
12
"
}
MSG_INVALID_HOST
=
"Invalid host"
...
...
@@ -232,19 +231,19 @@ class CheckAndFix(object):
record (RecordPubli): record describing a publication.
Returns:
unicode
: empty when procedure failed
str
: empty when procedure failed
"""
val
=
u
''
val
=
''
if
isinstance
(
record
,
RecordConf
):
# INSPIREHEP start date encoded as 2014-12-31
if
"x"
in
record
[
u
"111"
]:
val
=
record
[
u
"111"
][
"x"
]
if
"x"
in
record
[
"111"
]:
val
=
record
[
"111"
][
"x"
]
# CDS end date encoded as 20141231
elif
"z"
in
record
[
u
"111"
]:
val
=
record
[
u
"111"
][
"z"
]
elif
"z"
in
record
[
"111"
]:
val
=
record
[
"111"
][
"z"
]
val
=
"%s-%s-%s"
%
(
val
[
0
:
4
],
val
[
4
:
6
],
val
[
6
:
8
])
elif
isinstance
(
record
,
RecordThesis
):
...
...
@@ -276,34 +275,34 @@ class CheckAndFix(object):
"""
# standard case
if
isinstance
(
record
[
u
"773"
],
dict
):
if
isinstance
(
record
[
"773"
],
dict
):
if
"o"
in
record
[
u
"773"
]:
if
"o"
in
record
[
"773"
]:
for
reg
in
DECODE_REF
:
m
=
reg
.
match
(
record
[
u
"773"
][
"o"
])
m
=
reg
.
match
(
record
[
"773"
][
"o"
])
if
m
:
record
[
u
"773"
][
"p"
]
=
m
.
group
(
"p"
)
record
[
u
"773"
][
"v"
]
=
m
.
group
(
"v"
)
record
[
u
"773"
][
"y"
]
=
m
.
group
(
"y"
)
record
[
u
"773"
][
"c"
]
=
m
.
group
(
"c"
)
record
[
"773"
][
"p"
]
=
m
.
group
(
"p"
)
record
[
"773"
][
"v"
]
=
m
.
group
(
"v"
)
record
[
"773"
][
"y"
]
=
m
.
group
(
"y"
)
record
[
"773"
][
"c"
]
=
m
.
group
(
"c"
)
return
raise
CheckException
(
MSG_NO_REF
)
# list case -- paper with erratum
elif
isinstance
(
record
[
u
"773"
],
list
):
elif
isinstance
(
record
[
"773"
],
list
):
for
i
in
range
(
len
(
record
[
u
"773"
])):
for
i
in
range
(
len
(
record
[
"773"
])):
if
"o"
in
record
[
u
"773"
][
i
]:
if
"o"
in
record
[
"773"
][
i
]:
fixed
=
False
for
reg
in
DECODE_REF
:
m
=
reg
.
match
(
record
[
u
"773"
][
i
][
"o"
])
m
=
reg
.
match
(
record
[
"773"
][
i
][
"o"
])
if
m
:
record
[
u
"773"
][
i
][
"p"
]
=
m
.
group
(
"p"
)
record
[
u
"773"
][
i
][
"v"
]
=
m
.
group
(
"v"
)
record
[
u
"773"
][
i
][
"y"
]
=
m
.
group
(
"y"
)
record
[
u
"773"
][
i
][
"c"
]
=
m
.
group
(
"c"
)
record
[
"773"
][
i
][
"p"
]
=
m
.
group
(
"p"
)
record
[
"773"
][
i
][
"v"
]
=
m
.
group
(
"v"
)
record
[
"773"
][
i
][
"y"
]
=
m
.
group
(
"y"
)
record
[
"773"
][
i
][
"c"
]
=
m
.
group
(
"c"
)
fixed
=
True
break
...
...
@@ -331,7 +330,7 @@ class CheckAndFix(object):
if
not
record
.
is_authors
():
raise
CheckException
(
MSG_NO_AUTHOR
)
if
len
(
record
[
u
"100"
])
>
1
:
if
len
(
record
[
"100"
])
>
1
:
raise
CheckException
(
MSG_TO_MANY_FAUTHOR
)
def
clean_erratum
(
self
,
record
):
...
...
@@ -350,10 +349,10 @@ class CheckAndFix(object):
# use the simplest algorithm by selecting the first entry in the list
# fare to assume that the article is published first.
record
[
u
"773"
]
=
record
[
u
"773"
][
0
]
record
[
"773"
]
=
record
[
"773"
][
0
]
# treat year and submitted date
for
k
in
(
u
"260"
,
u
"269"
):
for
k
in
(
"260"
,
"269"
):
if
k
in
record
and
isinstance
(
record
[
k
],
list
):
record
[
k
]
=
record
[
k
][
0
]
...
...
@@ -428,10 +427,10 @@ class CheckAndFix(object):
return
# cds.cern.ch
if
not
(
u
"111"
in
record
and
"d"
in
record
[
u
"111"
]):
if
not
(
"111"
in
record
and
"d"
in
record
[
"111"
]):
raise
CheckException
(
MSG_NO_CONF_DATE
)
value
=
record
[
u
"111"
][
"d"
]
value
=
record
[
"111"
][
"d"
]
m
=
REG_CONF_DATES
.
match
(
value
)
if
not
m
:
...
...
@@ -441,10 +440,10 @@ class CheckAndFix(object):
m2
=
REG_CONF_DATES_2
.
match
(
value
)
if
m1
:
record
[
u
"111"
][
"d"
]
=
"%s-%s %s %s"
%
m1
.
groups
()
record
[
"111"
][
"d"
]
=
"%s-%s %s %s"
%
m1
.
groups
()
elif
m2
:
record
[
u
"111"
][
"d"
]
=
"%s %s - %s %s %s"
%
m2
.
groups
()
record
[
"111"
][
"d"
]
=
"%s %s - %s %s %s"
%
m2
.
groups
()
else
:
raise
CheckException
(
MSG_WELL_FORMED_CONF_DATES
)
...
...
@@ -511,11 +510,11 @@ class CheckAndFix(object):
return
# standard case
if
isinstance
(
record
[
u
"773"
],
dict
):
if
"p"
in
record
[
u
"773"
]
and
"v"
in
record
[
u
"773"
]:
if
isinstance
(
record
[
"773"
],
dict
):
if
"p"
in
record
[
"773"
]
and
"v"
in
record
[
"773"
]:
editor
=
record
[
u
"773"
][
"p"
]
volume
=
record
[
u
"773"
][
"v"
]
editor
=
record
[
"773"
][
"p"
]
volume
=
record
[
"773"
][
"v"
]
# add space after the dot Phys.Rev -> Phys. Rev
editor
=
re
.
sub
(
r
'\.([A-Z])'
,
r
'. \1'
,
editor
)
...
...
@@ -529,14 +528,14 @@ class CheckAndFix(object):
# remove stupid mistake
editor
=
CLEAN_SPACES
(
editor
)
record
[
u
"773"
][
"p"
]
=
editor
record
[
u
"773"
][
"v"
]
=
volume
record
[
"773"
][
"p"
]
=
editor
record
[
"773"
][
"v"
]
=
volume
# list case -- publication with erratum
elif
isinstance
(
record
[
u
"773"
],
list
):
elif
isinstance
(
record
[
"773"
],
list
):
editors
=
record
.
_get
(
u
"773"
,
'p'
,
force_list
=
True
)
volumes
=
record
.
_get
(
u
"773"
,
'v'
,
force_list
=
True
)
editors
=
record
.
_get
(
"773"
,
'p'
,
force_list
=
True
)
volumes
=
record
.
_get
(
"773"
,
'v'
,
force_list
=
True
)
if
len
(
editors
)
!=
len
(
volumes
):
raise
CheckException
(
MSG_WELL_FORMED_EDITOR
)
...
...
@@ -553,8 +552,8 @@ class CheckAndFix(object):
editor
=
CLEAN_SPACES
(
editor
)
record
[
u
"773"
][
i
][
"p"
]
=
editor
record
[
u
"773"
][
i
][
"v"
]
=
volume
record
[
"773"
][
i
][
"p"
]
=
editor
record
[
"773"
][
i
][
"v"
]
=
volume
def
format_universities
(
self
,
record
):
"""Format the name of the university for PhD:
...
...
@@ -577,39 +576,39 @@ class CheckAndFix(object):
year
=
REG_YEAR
.
search
(
record
.
these_defense
()).
group
(
1
)
if
int
(
year
)
<
2012
:
university
=
u
"Université de la Méditerrannée Aix-Marseille II"
university
=
"Université de la Méditerrannée Aix-Marseille II"
else
:
university
=
u
"Aix Marseille Université"
university
=
"Aix Marseille Université"
if
u
'
502
'
in
record
and
"b"
in
record
[
u
'
502
'
]:
if
isinstance
(
record
[
u
'
502
'
][
'b'
],
unicode
):
if
"Marseille"
in
record
[
u
'
502
'
][
'b'
]:
record
[
u
'
502
'
][
'b'
]
=
university
if
"
502
"
in
record
and
"b"
in
record
[
"
502
"
]:
if
isinstance
(
record
[
"
502
"
][
"b"
],
str
):
if
"Marseille"
in
record
[
"
502
"
][
"b"
]:
record
[
"
502
"
][
"b"
]
=
university
elif
isinstance
(
record
[
u
'
502
'
][
'b'
],
list
):
for
i
in
x
range
(
len
(
record
[
u
'
502
'
][
'b'
])):
if
"Marseille"
in
record
[
u
'
502
'
][
'b'
][
i
]:
record
[
u
'
502
'
][
'b'
][
i
]
=
university
elif
isinstance
(
record
[
"
502
"
][
"b"
],
list
):
for
i
in
range
(
len
(
record
[
"
502
"
][
"b"
])):
if
"Marseille"
in
record
[
"
502
"
][
"b"
][
i
]:
record
[
"
502
"
][
"b"
][
i
]
=
university
# Other: replace U. by University
else
:
university
=
current
.
T
(
UNIVERSITY
)
.
decode
(
"utf8"
)
university
=
current
.
T
(
UNIVERSITY
)
if
u
'
502
'
in
record
and
"b"
in
record
[
u
'
502
'
]:
if
isinstance
(
record
[
u
'
502
'
][
'b'
],
unicode
):
value
=
record
[
u
'
502
'
][
'b'
]
if
"
502
"
in
record
and
"b"
in
record
[
"
502
"
]:
if
isinstance
(
record
[
"
502
"
][
"b"
],
str
):
value
=
record
[
"
502
"
][
"b"
]
if
"U."
in
value
:
value
=
value
.
replace
(
'U.'
,
university
)
record
[
u
'
502
'
][
'b'
]
=
value
record
[
"
502
"
][
"b"
]
=
value
elif
isinstance
(
record
[
u
'
502
'
][
'b'
],
list
):
for
i
in
x
range
(
len
(
record
[
u
'
502
'
][
'b'
])):
value
=
record
[
u
'
502
'
][
'b'
][
i
]
elif
isinstance
(
record
[
"
502
"
][
"b"
],
list
):
for
i
in
range
(
len
(
record
[
"
502
"
][
"b"
])):
value
=
record
[
"
502
"
][
"b"
][
i
]
if
"U."
in
value
:
value
=
value
.
replace
(
'U.'
,
university
)
record
[
u
'
502
'
][
'b'
][
i
]
=
value
record
[
"
502
"
][
"b"
][
i
]
=
value
def
get_my_authors
(
self
,
record
,
sep
=
u
", "
,
sort
=
False
):
def
get_my_authors
(
self
,
record
,
sep
=
", "
,
sort
=
False
):
"""Get authors of my institutes signing the record.
The information is append to the Record object via the attribute
``my_authors``.
...
...
@@ -721,12 +720,12 @@ class CheckAndFix(object):
record
.
reformat_authors
(
fmt_rescue
)
if
sort
:
authors
=
(
record
[
u
"700"
][[
"last_name"
,
"fmt_name"
]]
authors
=
(
record
[
"700"
][[
"last_name"
,
"fmt_name"
]]
.
sort_values
(
by
=
"last_name"
)
.
fmt_name
)
else
:
authors
=
(
record
[
u
"700"
].
fmt_name
authors
=
(
record
[
"700"
].
fmt_name
.
sort_index
())
# go back to the origin formatting
...
...
@@ -741,7 +740,7 @@ class CheckAndFix(object):
# cache the result for a latter use
self
.
_my_authors
[
record
.
id
()]
=
list
(
intersection
)
return
u
""
return
""
def
paper_reference
(
self
,
record
):
"""Check that editor, page, volume and paper year are defined
...
...
@@ -762,8 +761,8 @@ class CheckAndFix(object):
return
# list of reference (paper with erratum)
refs
=
record
[
u
"773"
]
if
not
isinstance
(
record
[
u
"773"
],
list
):
refs
=
record
[
"773"
]
if
not
isinstance
(
record
[
"773"
],
list
):