Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
limbra
limbra
Commits
054e743b
Commit
054e743b
authored
Jan 29, 2020
by
LE GAC Renaud
Browse files
Update automaton to use logger.
parent
3fadc41a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
28 additions
and
27 deletions
+28
-27
modules/harvest_tools/automaton.py
modules/harvest_tools/automaton.py
+28
-27
No files found.
modules/harvest_tools/automaton.py
View file @
054e743b
""" harvest_tools.automaton
"""
import
logging
import
re
import
traceback
...
...
@@ -31,6 +32,9 @@ OAI = "oai:%s:%i"
# require for "Hal Hidden"
REG_COLLECTION
=
re
.
compile
(
r
"cc([A-Za-z ]+)(and|$)"
)
T2
=
" "
*
2
T4
=
" "
*
4
class
Automaton
(
object
):
"""Base class to search and process publications:
...
...
@@ -138,6 +142,7 @@ class Automaton(object):
self
.
id_team
=
id_team
self
.
id_project
=
id_project
self
.
logs
=
[]
self
.
logger
=
logging
.
getLogger
(
"web2py.app.limbra"
)
self
.
store
=
None
self
.
year_start
=
year_start
self
.
year_end
=
year_end
...
...
@@ -286,8 +291,9 @@ class Automaton(object):
self
.
logs
[
-
1
].
idle
(
MSG_IN_DB
,
publication
.
year
)
if
self
.
dbg
:
print
(
"
\t
record already in db:"
,
rec_id
,
"->"
,
publication
.
id
)
logger
=
self
.
logger
logger
.
debug
(
""
)
logger
.
debug
(
f
"
{
T2
}
record
{
rec_id
}
in db with id
{
publication
.
id
}
"
)
return
publication
.
id
...
...
@@ -387,8 +393,7 @@ class Automaton(object):
corrected.
"""
if
self
.
dbg
:
print
(
"
\t
check record automaton"
)
self
.
logger
.
debug
(
f
"
{
T4
}
check record (automaton)"
)
try
:
# fix record with a missing OAI
...
...
@@ -442,8 +447,7 @@ class Automaton(object):
modified zero otherwise.
"""
if
self
.
dbg
:
print
(
"get existing record by fields"
)
self
.
logger
(
"get existing record by fields..."
)
# alias
db
=
self
.
db
...
...
@@ -504,8 +508,8 @@ class Automaton(object):
in order to understand what happen.
"""
if
self
.
dbg
:
print
(
"
\n
process collection
"
,
collection
)
logger
=
self
.
logger
logger
.
debug
(
f
"
process collection
{
collection
}
"
)
# alias
collection_logs
=
self
.
collection_logs
...
...
@@ -536,12 +540,10 @@ class Automaton(object):
collection_logs
[
-
1
].
found
=
len
(
rec_ids
)
if
len
(
rec_ids
)
==
0
:
if
self
.
dbg
:
print
(
"
\t
No records found in %s"
%
collection
)
logger
.
debug
(
f
"no records found in
{
collection
}
"
)
return
if
self
.
dbg
:
print
(
"
\t
%i records found in %s"
%
(
len
(
rec_ids
),
collection
))
logger
.
debug
(
f
"
{
len
(
rec_ids
)
}
records found in
{
collection
}
"
)
# remove form the list identifier already registered in the data base
# and log them
...
...
@@ -563,8 +565,8 @@ class Automaton(object):
record provided by the store.
"""
if
self
.
dbg
:
print
(
"
\t
process
ing
record
"
,
recjson
[
"
recid
"
],
"
(process_recjson)"
)
logger
=
self
.
logger
logger
.
debug
(
f
"
{
T4
}
process record
{
recjson
[
'
recid
'
]
}
(process_recjson)"
)
collection_logs
=
self
.
collection_logs
harvester
=
self
.
harvester
...
...
@@ -573,8 +575,7 @@ class Automaton(object):
# instantiate the record
record
=
build_record
(
recjson
)
if
self
.
dbg
:
print
(
f
"
\t
{
record
.
title
()
}
"
)
logger
.
debug
(
f
"
{
T4
}{
record
.
title
()[:
72
]
}
"
)
# start the log for the record
logs
.
append
(
Msg
(
harvester
=
harvester
,
...
...
@@ -585,21 +586,20 @@ class Automaton(object):
# check that the record is well formed
# repair non-conformity as far as possible
if
not
self
.
check_record
(
record
):
if
self
.
dbg
:
print
(
"
\t
record rejected"
,
logs
[
-
1
].
txt
)
logger
.
debug
(
f
"
{
T4
}
rejected
{
logs
[
-
1
].
txt
}
"
)
return
if
self
.
dbg
:
print
(
"
\t
insert record in the database"
)
txt
=
(
"(dry run)"
if
self
.
dry_run
else
""
)
logger
.
debug
(
f
"
{
T4
}
insert record in the database
{
txt
}
"
)
# insert the record in the database
self
.
insert_record
(
record
)
if
self
.
dbg
:
if
logger
.
getEffectiveLevel
()
==
logging
.
DEBUG
:
log
=
logs
[
-
1
]
action
=
log
.
action
action
=
(
action
.
upper
()
if
isinstance
(
action
,
str
)
else
action
)
print
(
"
\t
log:
"
,
action
,
log
.
txt
)
logger
.
debug
(
f
"
{
T4
}
log:
{
action
}
{
log
.
txt
}
"
)
def
process_recid
(
self
,
rec_id
):
"""Process the publication identified by its record identifier:
...
...
@@ -621,8 +621,9 @@ class Automaton(object):
identifier of the publication in the store.
"""
if
self
.
dbg
:
print
(
"
\n
get record"
,
rec_id
,
"(process_recid)"
)
logger
=
self
.
logger
logger
.
debug
(
""
)
logger
.
debug
(
f
"
{
T2
}
get record
{
rec_id
}
(process_recid)"
)
collection_logs
=
self
.
collection_logs
harvester
=
self
.
harvester
...
...
@@ -633,7 +634,7 @@ class Automaton(object):
self
.
process_recjson
(
recjson
)
except
Exception
as
e
:
print
(
traceback
.
format_exc
()
)
logger
.
debug
(
f
"
{
T2
}{
str
(
e
)
}
"
)
url
=
OAI_URL
%
(
harvester
.
host
,
rec_id
)
logs
.
append
(
Msg
(
harvester
=
harvester
,
collection
=
collection_logs
[
-
1
].
title
,
...
...
@@ -661,8 +662,8 @@ class Automaton(object):
Collections are separated by a comma.
"""
if
self
.
dbg
:
print
(
"process URL search
"
)
self
.
logger
.
debug
(
""
)
self
.
logger
.
debug
(
f
"process URL search --
{
host
}
--
{
collections
}
"
)
# extend harvester for logs
self
.
harvester
.
host
=
host
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment