Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
7dc22382
Commit
7dc22382
authored
Jun 29, 2017
by
LE GAC Renaud
Browse files
Update checkAndFix to add a debug mode.
parent
294ee8a8
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
66 additions
and
20 deletions
+66
-20
modules/harvest_tools/automaton.py
modules/harvest_tools/automaton.py
+1
-1
modules/harvest_tools/checkandfix.py
modules/harvest_tools/checkandfix.py
+65
-19
No files found.
modules/harvest_tools/automaton.py
View file @
7dc22382
...
...
@@ -129,7 +129,7 @@ class Automaton(object):
if
not
id_category
:
raise
ToolException
(
MSG_NO_CAT
)
self
.
check
=
CheckAndFix
()
self
.
check
=
CheckAndFix
(
debug
)
self
.
collection_logs
=
[]
self
.
controller
=
automaton
self
.
db
=
db
...
...
modules/harvest_tools/checkandfix.py
View file @
7dc22382
...
...
@@ -91,13 +91,17 @@ UNIVERSITY = "University"
class
CheckAndFix
(
object
):
"""A collection of tools to check and repair the content
of the Marc12 record.
"""A collection of tools to check and repair the content of record.
Args:
debug (bool):
activate the debug mode.
"""
def
__init__
(
self
):
def
__init__
(
self
,
debug
=
False
):
self
.
db
=
current
.
db
self
.
dbg
=
debug
self
.
reg_institute
=
self
.
_get_reg_institute
()
# private cache for my_author rescue list
...
...
@@ -339,8 +343,7 @@ class CheckAndFix(object):
return
val
@
staticmethod
def
authors
(
record
):
def
authors
(
self
,
record
):
"""Check that author fields are defined.
Args:
...
...
@@ -352,6 +355,8 @@ class CheckAndFix(object):
when there is no authors.
"""
if
self
.
dbg
:
print
"
\t\t
Check authors"
if
not
record
.
is_authors
():
raise
CheckException
(
MSG_NO_AUTHOR
)
...
...
@@ -370,6 +375,9 @@ class CheckAndFix(object):
nor entered as a synonym.
"""
if
self
.
dbg
:
print
"
\t\t
Check collaboration"
val
=
record
.
collaboration
()
if
not
val
:
return
...
...
@@ -393,6 +401,9 @@ class CheckAndFix(object):
the country is not defined nor entered as a synonym.
"""
if
self
.
dbg
:
print
"
\t\t
Check country"
if
not
isinstance
(
record
,
RecordConf
):
return
...
...
@@ -416,6 +427,9 @@ class CheckAndFix(object):
dates are not found.
"""
if
self
.
dbg
:
print
"
\t\t
Check conference date"
# conference information are available, i.e proceeding
if
not
isinstance
(
record
,
RecordConf
):
return
...
...
@@ -465,6 +479,9 @@ class CheckAndFix(object):
the bad OAI.
"""
if
self
.
dbg
:
print
"
\t\t
Check is bad oai used"
value
=
record
.
oai
()
match
=
REG_OAI
.
match
(
value
)
...
...
@@ -478,8 +495,7 @@ class CheckAndFix(object):
return
False
@
staticmethod
def
is_oai
(
record
):
def
is_oai
(
self
,
record
):
"""Raise exception when the OAI is not defined.
Args:
...
...
@@ -489,6 +505,9 @@ class CheckAndFix(object):
ToolException:
OAI is not defined
"""
if
self
.
dbg
:
print
"
\t\t
Check is oai"
# field / subfield depends on the store
test
=
(
u
"oai"
in
record
and
u
"value"
in
record
[
u
"oai"
])
or
\
(
u
"FIXME_OAI"
in
record
and
u
"id"
in
record
[
u
"FIXME_OAI"
])
...
...
@@ -496,8 +515,7 @@ class CheckAndFix(object):
if
not
test
:
raise
ToolException
(
MSG_NO_OAI
)
@
staticmethod
def
format_authors
(
record
,
fmt
=
"Last, First"
):
def
format_authors
(
self
,
record
,
fmt
=
"Last, First"
):
"""Format the author names.
Args:
...
...
@@ -508,10 +526,12 @@ class CheckAndFix(object):
"Last, First" and "Last F."
"""
if
self
.
dbg
:
print
"
\t\t
Format authors"
record
.
reformat_authors
(
fmt
)
@
staticmethod
def
format_editor
(
record
):
def
format_editor
(
self
,
record
):
"""Format the editor abbreviation.
The encoding depends on the store::
...
...
@@ -529,6 +549,9 @@ class CheckAndFix(object):
when the editor is not well formed.
"""
if
self
.
dbg
:
print
"
\t\t
Format editor"
if
not
record
.
is_published
():
return
...
...
@@ -562,6 +585,9 @@ class CheckAndFix(object):
record describing a thesis.
"""
if
self
.
dbg
:
print
"
\t\t
Format university"
# protection
if
not
isinstance
(
record
,
RecordThesis
):
return
...
...
@@ -611,6 +637,9 @@ class CheckAndFix(object):
when the list is empty
"""
if
self
.
dbg
:
print
"
\t\t
Get my authors"
# might have been computed when affiliation is checked
rec_id
=
record
.
id
()
if
rec_id
in
self
.
_my_authors
:
...
...
@@ -628,8 +657,7 @@ class CheckAndFix(object):
record
.
my_authors
=
value
@
staticmethod
def
is_conference
(
record
):
def
is_conference
(
self
,
record
):
"""Check that the record described a conference talk / proceeding.
Args:
...
...
@@ -641,11 +669,13 @@ class CheckAndFix(object):
the record is not associated to a conference.
"""
if
self
.
dbg
:
print
"
\t\t
Is conference"
if
not
isinstance
(
record
,
RecordConf
):
raise
CheckException
(
MSG_NO_CONF
)
@
staticmethod
def
is_thesis
(
record
):
def
is_thesis
(
self
,
record
):
"""Check that the record described a thesis.
Args:
...
...
@@ -657,6 +687,9 @@ class CheckAndFix(object):
the record does not describe a thesis.
"""
if
self
.
dbg
:
print
"
\t\t
Is thesis"
if
not
isinstance
(
record
,
RecordThesis
):
raise
CheckException
(
MSG_NO_THESIS
)
...
...
@@ -701,6 +734,9 @@ class CheckAndFix(object):
and the author is null.
"""
if
self
.
dbg
:
print
"
\t\t
Check my affiliation"
value
=
record
.
find_affiliation
(
self
.
reg_institute
)
if
len
(
value
)
>
0
:
return
value
...
...
@@ -738,8 +774,7 @@ class CheckAndFix(object):
return
""
@
staticmethod
def
paper_reference
(
record
):
def
paper_reference
(
self
,
record
):
"""Check that editor, page, volume and paper year are defined
for a published paper. Repair it from doi when possible.
...
...
@@ -752,6 +787,9 @@ class CheckAndFix(object):
when the paper reference is not well formed.
"""
if
self
.
dbg
:
print
"
\t\t
Check paper reference"
if
record
.
is_published
():
return
...
...
@@ -811,6 +849,9 @@ class CheckAndFix(object):
when the publisher is not defined nor entered as a synonym.
"""
if
self
.
dbg
:
print
"
\t\t
Check publisher"
val
=
record
.
paper_editor
()
if
len
(
val
)
==
0
:
return
...
...
@@ -840,6 +881,9 @@ class CheckAndFix(object):
than one date are found.
"""
if
self
.
dbg
:
print
"
\t\t
Check submitted"
date
=
record
.
submitted
()
# recover missing date using conference, preprint, thesis information
...
...
@@ -876,8 +920,7 @@ class CheckAndFix(object):
else
:
record
[
u
"prepublication"
]
=
{
u
"date"
:
date
}
@
staticmethod
def
temporary_record
(
record
):
def
temporary_record
(
self
,
record
):
"""Some records are marked temporary.
Args:
...
...
@@ -887,6 +930,9 @@ class CheckAndFix(object):
CheckException: when the record is marked temporary
"""
if
self
.
dbg
:
print
"
\t\t
Check temporary record"
# INSPIREHEP
# Can be find by using the XML syntax:
# http://inspirehep.net/search?500__a="*Temporary record*"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment