Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
limbra
limbra
Commits
b4c683d8
Commit
b4c683d8
authored
Jan 29, 2020
by
LE GAC Renaud
Browse files
Update CheckAndFix to use logger.
parent
0da73491
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
21 additions
and
35 deletions
+21
-35
modules/harvest_tools/automaton.py
modules/harvest_tools/automaton.py
+1
-1
modules/harvest_tools/checkandfix.py
modules/harvest_tools/checkandfix.py
+20
-34
No files found.
modules/harvest_tools/automaton.py
View file @
b4c683d8
...
...
@@ -586,7 +586,7 @@ class Automaton(object):
# check that the record is well formed
# repair non-conformity as far as possible
if
not
self
.
check_record
(
record
):
logger
.
debug
(
f
"
{
T4
}
rejected
{
logs
[
-
1
].
txt
}
"
)
logger
.
debug
(
f
"
{
T4
}{
logs
[
-
1
].
txt
}
"
)
return
txt
=
(
"(dry run)"
if
self
.
dry_run
else
""
)
...
...
modules/harvest_tools/checkandfix.py
View file @
b4c683d8
""" harvest_tools.checkandfix
"""
import
logging
import
numpy
as
np
import
re
...
...
@@ -69,6 +70,7 @@ REG_WELL_FORMED_CONF_DATES_1 = \
REG_WELL_FORMED_CONF_DATES_2
=
\
re
.
compile
(
"\d{1,2} [A-Z][a-z]{2} - \d{1,2} [A-Z][a-z]{2} \d{4}"
)
T6
=
" "
*
6
UNIVERSITY
=
"University"
...
...
@@ -84,6 +86,7 @@ class CheckAndFix(object):
self
.
db
=
current
.
db
self
.
dbg
=
debug
self
.
logger
=
logging
.
getLogger
(
"web2py.app.limbra"
)
self
.
reg_institute
=
self
.
_get_reg_institute
()
# private cache for my_author rescue list
...
...
@@ -340,8 +343,7 @@ class CheckAndFix(object):
when there is no authors.
"""
if
self
.
dbg
:
print
(
"
\t\t
Check authors"
)
self
.
logger
.
debug
(
f
"
{
T6
}
check authors"
)
if
not
record
.
is_authors
():
raise
CheckException
(
MSG_NO_AUTHOR
)
...
...
@@ -361,8 +363,7 @@ class CheckAndFix(object):
* the collaboration is unknown (neither collaborationnor synonym)
* more than one synonym found.
"""
if
self
.
dbg
:
print
(
"
\t\t
Check collaboration"
)
self
.
logger
.
debug
(
f
"
{
T6
}
check collaboration"
)
val
=
record
.
collaboration
()
if
not
val
:
...
...
@@ -404,8 +405,7 @@ class CheckAndFix(object):
* more than one synonym found.
"""
if
self
.
dbg
:
print
(
"
\t\t
Check country"
)
self
.
logger
.
debug
(
f
"
{
T6
}
check country"
)
if
not
isinstance
(
record
,
RecordConf
):
return
...
...
@@ -451,8 +451,7 @@ class CheckAndFix(object):
dates are not found.
"""
if
self
.
dbg
:
print
(
"
\t\t
Check conference date"
)
self
.
logger
.
debug
(
f
"
{
T6
}
check conference date"
)
# conference information are available, i.e proceeding
if
not
isinstance
(
record
,
RecordConf
):
...
...
@@ -503,8 +502,7 @@ class CheckAndFix(object):
the bad OAI.
"""
if
self
.
dbg
:
print
(
"
\t\t
Check is bad oai used"
)
self
.
logger
.
debug
(
f
"
{
T6
}
check is bad oai used"
)
value
=
record
.
oai
()
match
=
REG_OAI
.
match
(
value
)
...
...
@@ -529,8 +527,7 @@ class CheckAndFix(object):
bool:
``True`` when the OAI is not defined in the record.
"""
if
self
.
dbg
:
print
(
"
\t\t
Check is oai"
)
self
.
logger
.
debug
(
f
"
{
T6
}
check is oai"
)
# field / subfield depends on the store
test
=
(
"oai"
in
record
and
"value"
in
record
[
"oai"
])
or
\
...
...
@@ -551,8 +548,7 @@ class CheckAndFix(object):
``Last, First`` and ``Last F.``
"""
if
self
.
dbg
:
print
(
"
\t\t
Format authors"
)
self
.
logger
.
debug
(
f
"
{
T6
}
format authors"
)
record
.
reformat_authors
(
fmt
)
...
...
@@ -574,8 +570,7 @@ class CheckAndFix(object):
when the editor is not well formed.
"""
if
self
.
dbg
:
print
(
"
\t\t
Format editor"
)
self
.
logger
.
debug
(
f
"
{
T6
}
format editor"
)
if
not
record
.
is_published
():
return
...
...
@@ -610,8 +605,7 @@ class CheckAndFix(object):
record describing a thesis.
"""
if
self
.
dbg
:
print
(
"
\t\t
Format university"
)
self
.
logger
.
debug
(
f
"
{
T6
}
format university"
)
# protection
if
not
isinstance
(
record
,
RecordThesis
):
...
...
@@ -675,8 +669,7 @@ class CheckAndFix(object):
the list is empty
"""
if
self
.
dbg
:
print
(
"
\t\t
Get my authors"
)
self
.
logger
.
debug
(
f
"
{
T6
}
get my authors"
)
# might have been computed when affiliation is checked
rec_id
=
record
.
id
()
...
...
@@ -707,8 +700,7 @@ class CheckAndFix(object):
the record is not associated to a conference.
"""
if
self
.
dbg
:
print
(
"
\t\t
Is conference"
)
self
.
logger
.
debug
(
f
"
{
T6
}
is conference"
)
if
not
isinstance
(
record
,
RecordConf
):
raise
CheckException
(
MSG_NO_CONF
)
...
...
@@ -728,8 +720,7 @@ class CheckAndFix(object):
the record does not describe a thesis.
"""
if
self
.
dbg
:
print
(
"
\t\t
Is thesis"
)
self
.
logger
.
debug
(
f
"
{
T6
}
is thesis"
)
if
not
isinstance
(
record
,
RecordThesis
):
raise
CheckException
(
MSG_NO_THESIS
)
...
...
@@ -775,8 +766,7 @@ class CheckAndFix(object):
and the author is null.
"""
if
self
.
dbg
:
print
(
"
\t\t
Check my affiliation"
)
self
.
logger
.
debug
(
f
"
{
T6
}
check my affiliation"
)
value
=
record
.
find_affiliation
(
self
.
reg_institute
)
if
len
(
value
)
>
0
:
...
...
@@ -828,8 +818,7 @@ class CheckAndFix(object):
the paper reference is not well formed.
"""
if
self
.
dbg
:
print
(
"
\t\t
Check paper reference"
)
self
.
logger
.
debug
(
f
"
{
T6
}
check paper reference"
)
if
record
.
is_published
():
return
...
...
@@ -891,8 +880,7 @@ class CheckAndFix(object):
* more than one synonym found.
"""
if
self
.
dbg
:
print
(
"
\t\t
Check publisher"
)
self
.
logger
.
debug
(
f
"
{
T6
}
check publisher"
)
val
=
record
.
paper_editor
()
if
len
(
val
)
==
0
:
...
...
@@ -931,8 +919,7 @@ class CheckAndFix(object):
* more than one date are found.
"""
if
self
.
dbg
:
print
(
"
\t\t
Check submitted"
)
self
.
logger
.
debug
(
f
"
{
T6
}
check submitted"
)
date
=
record
.
submitted
()
...
...
@@ -982,8 +969,7 @@ class CheckAndFix(object):
the record is marked temporary
"""
if
self
.
dbg
:
print
(
"
\t\t
Check temporary record"
)
self
.
logger
.
debug
(
f
"
{
T6
}
check temporary record"
)
# INSPIREHEP
# Can be find by using the XML syntax:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment