Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
limbra
limbra
Commits
7dc22382
Commit
7dc22382
authored
Jun 29, 2017
by
LE GAC Renaud
Browse files
Update checkAndFix to add a debug mode.
parent
294ee8a8
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
66 additions
and
20 deletions
+66
-20
modules/harvest_tools/automaton.py
modules/harvest_tools/automaton.py
+1
-1
modules/harvest_tools/checkandfix.py
modules/harvest_tools/checkandfix.py
+65
-19
No files found.
modules/harvest_tools/automaton.py
View file @
7dc22382
...
...
@@ -129,7 +129,7 @@ class Automaton(object):
if
not
id_category
:
raise
ToolException
(
MSG_NO_CAT
)
self
.
check
=
CheckAndFix
()
self
.
check
=
CheckAndFix
(
debug
)
self
.
collection_logs
=
[]
self
.
controller
=
automaton
self
.
db
=
db
...
...
modules/harvest_tools/checkandfix.py
View file @
7dc22382
...
...
@@ -91,13 +91,17 @@ UNIVERSITY = "University"
class
CheckAndFix
(
object
):
"""A collection of tools to check and repair the content
of the Marc12 record.
"""A collection of tools to check and repair the content of record.
Args:
debug (bool):
activate the debug mode.
"""
def
__init__
(
self
):
def
__init__
(
self
,
debug
=
False
):
self
.
db
=
current
.
db
self
.
dbg
=
debug
self
.
reg_institute
=
self
.
_get_reg_institute
()
# private cache for my_author rescue list
...
...
@@ -339,8 +343,7 @@ class CheckAndFix(object):
return
val
@
staticmethod
def
authors
(
record
):
def
authors
(
self
,
record
):
"""Check that author fields are defined.
Args:
...
...
@@ -352,6 +355,8 @@ class CheckAndFix(object):
when there is no authors.
"""
if
self
.
dbg
:
print
"
\t\t
Check authors"
if
not
record
.
is_authors
():
raise
CheckException
(
MSG_NO_AUTHOR
)
...
...
@@ -370,6 +375,9 @@ class CheckAndFix(object):
nor entered as a synonym.
"""
if
self
.
dbg
:
print
"
\t\t
Check collaboration"
val
=
record
.
collaboration
()
if
not
val
:
return
...
...
@@ -393,6 +401,9 @@ class CheckAndFix(object):
the country is not defined nor entered as a synonym.
"""
if
self
.
dbg
:
print
"
\t\t
Check country"
if
not
isinstance
(
record
,
RecordConf
):
return
...
...
@@ -416,6 +427,9 @@ class CheckAndFix(object):
dates are not found.
"""
if
self
.
dbg
:
print
"
\t\t
Check conference date"
# conference information are available, i.e proceeding
if
not
isinstance
(
record
,
RecordConf
):
return
...
...
@@ -465,6 +479,9 @@ class CheckAndFix(object):
the bad OAI.
"""
if
self
.
dbg
:
print
"
\t\t
Check is bad oai used"
value
=
record
.
oai
()
match
=
REG_OAI
.
match
(
value
)
...
...
@@ -478,8 +495,7 @@ class CheckAndFix(object):
return
False
@
staticmethod
def
is_oai
(
record
):
def
is_oai
(
self
,
record
):
"""Raise exception when the OAI is not defined.
Args:
...
...
@@ -489,6 +505,9 @@ class CheckAndFix(object):
ToolException:
OAI is not defined
"""
if
self
.
dbg
:
print
"
\t\t
Check is oai"
# field / subfield depends on the store
test
=
(
u
"oai"
in
record
and
u
"value"
in
record
[
u
"oai"
])
or
\
(
u
"FIXME_OAI"
in
record
and
u
"id"
in
record
[
u
"FIXME_OAI"
])
...
...
@@ -496,8 +515,7 @@ class CheckAndFix(object):
if
not
test
:
raise
ToolException
(
MSG_NO_OAI
)
@
staticmethod
def
format_authors
(
record
,
fmt
=
"Last, First"
):
def
format_authors
(
self
,
record
,
fmt
=
"Last, First"
):
"""Format the author names.
Args:
...
...
@@ -508,10 +526,12 @@ class CheckAndFix(object):
"Last, First" and "Last F."
"""
if
self
.
dbg
:
print
"
\t\t
Format authors"
record
.
reformat_authors
(
fmt
)
@
staticmethod
def
format_editor
(
record
):
def
format_editor
(
self
,
record
):
"""Format the editor abbreviation.
The encoding depends on the store::
...
...
@@ -529,6 +549,9 @@ class CheckAndFix(object):
when the editor is not well formed.
"""
if
self
.
dbg
:
print
"
\t\t
Format editor"
if
not
record
.
is_published
():
return
...
...
@@ -562,6 +585,9 @@ class CheckAndFix(object):
record describing a thesis.
"""
if
self
.
dbg
:
print
"
\t\t
Format university"
# protection
if
not
isinstance
(
record
,
RecordThesis
):
return
...
...
@@ -611,6 +637,9 @@ class CheckAndFix(object):
when the list is empty
"""
if
self
.
dbg
:
print
"
\t\t
Get my authors"
# might have been computed when affiliation is checked
rec_id
=
record
.
id
()
if
rec_id
in
self
.
_my_authors
:
...
...
@@ -628,8 +657,7 @@ class CheckAndFix(object):
record
.
my_authors
=
value
@
staticmethod
def
is_conference
(
record
):
def
is_conference
(
self
,
record
):
"""Check that the record described a conference talk / proceeding.
Args:
...
...
@@ -641,11 +669,13 @@ class CheckAndFix(object):
the record is not associated to a conference.
"""
if
self
.
dbg
:
print
"
\t\t
Is conference"
if
not
isinstance
(
record
,
RecordConf
):
raise
CheckException
(
MSG_NO_CONF
)
@
staticmethod
def
is_thesis
(
record
):
def
is_thesis
(
self
,
record
):
"""Check that the record described a thesis.
Args:
...
...
@@ -657,6 +687,9 @@ class CheckAndFix(object):
the record does not describe a thesis.
"""
if
self
.
dbg
:
print
"
\t\t
Is thesis"
if
not
isinstance
(
record
,
RecordThesis
):
raise
CheckException
(
MSG_NO_THESIS
)
...
...
@@ -701,6 +734,9 @@ class CheckAndFix(object):
and the author is null.
"""
if
self
.
dbg
:
print
"
\t\t
Check my affiliation"
value
=
record
.
find_affiliation
(
self
.
reg_institute
)
if
len
(
value
)
>
0
:
return
value
...
...
@@ -738,8 +774,7 @@ class CheckAndFix(object):
return
""
@
staticmethod
def
paper_reference
(
record
):
def
paper_reference
(
self
,
record
):
"""Check that editor, page, volume and paper year are defined
for a published paper. Repair it from doi when possible.
...
...
@@ -752,6 +787,9 @@ class CheckAndFix(object):
when the paper reference is not well formed.
"""
if
self
.
dbg
:
print
"
\t\t
Check paper reference"
if
record
.
is_published
():
return
...
...
@@ -811,6 +849,9 @@ class CheckAndFix(object):
when the publisher is not defined nor entered as a synonym.
"""
if
self
.
dbg
:
print
"
\t\t
Check publisher"
val
=
record
.
paper_editor
()
if
len
(
val
)
==
0
:
return
...
...
@@ -840,6 +881,9 @@ class CheckAndFix(object):
than one date are found.
"""
if
self
.
dbg
:
print
"
\t\t
Check submitted"
date
=
record
.
submitted
()
# recover missing date using conference, preprint, thesis information
...
...
@@ -876,8 +920,7 @@ class CheckAndFix(object):
else
:
record
[
u
"prepublication"
]
=
{
u
"date"
:
date
}
@
staticmethod
def
temporary_record
(
record
):
def
temporary_record
(
self
,
record
):
"""Some records are marked temporary.
Args:
...
...
@@ -887,6 +930,9 @@ class CheckAndFix(object):
CheckException: when the record is marked temporary
"""
if
self
.
dbg
:
print
"
\t\t
Check temporary record"
# INSPIREHEP
# Can be find by using the XML syntax:
# http://inspirehep.net/search?500__a="*Temporary record*"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment