Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
limbra
limbra
Commits
d9027694
Commit
d9027694
authored
Jul 05, 2017
by
LE GAC Renaud
Browse files
Update Automaton to add the method process_recjson.
parent
4414c211
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
48 additions
and
27 deletions
+48
-27
modules/harvest_tools/automaton.py
modules/harvest_tools/automaton.py
+48
-27
No files found.
modules/harvest_tools/automaton.py
View file @
d9027694
...
...
@@ -551,49 +551,30 @@ class Automaton(object):
# process the remaining identifiers
[
self
.
process_recid
(
rec_id
)
for
rec_id
in
rec_ids
]
def
process_rec
id
(
self
,
rec
_id
):
"""Process the publication:
def
process_rec
json
(
self
,
rec
json
):
"""Process the publication
provided as a JSON record
:
* get the publication data from the store using its identifier
* instantiate the record (RecordPubli, REcordConf, RecordThesis)
* process OAI data
* check the record
* insert new record in the database
Note:
* Design to never stop although exception are raised
* Have a look to the collection_logs and logs in order to
understand what happen.
Args:
rec
_id (in
t):
identifier of the publication in
the store.
rec
json (dic
t):
record provided by
the store.
"""
if
self
.
dbg
:
print
(
"
\n
processing record"
,
rec_id
)
print
(
"
\n
processing record"
)
collection_logs
=
self
.
collection_logs
harvester
=
self
.
harvester
logs
=
self
.
logs
# instantiate the record
try
:
recjson
=
self
.
store
.
get_record
(
rec_id
)
record
=
build_record
(
recjson
)
if
self
.
dbg
:
print
(
"
\t
"
,
record
.
title
())
record
=
build_record
(
recjson
)
except
Exception
as
e
:
print
(
traceback
.
format_exc
())
url
=
OAI_URL
%
(
harvester
.
host
,
rec_id
)
logs
.
append
(
Msg
(
harvester
=
harvester
,
collection
=
collection_logs
[
-
1
].
title
,
record_id
=
rec_id
,
title
=
url
))
logs
[
-
1
].
reject
(
e
)
return
if
self
.
dbg
:
print
(
"
\t
"
,
record
.
title
().
encode
(
"utf-8"
))
# start the log for the record
logs
.
append
(
Msg
(
harvester
=
harvester
,
...
...
@@ -620,6 +601,46 @@ class Automaton(object):
action
=
(
action
.
upper
()
if
isinstance
(
action
,
str
)
else
action
)
print
(
"
\t
log:"
,
action
,
log
.
txt
)
def
process_recid
(
self
,
rec_id
):
"""Process the publication identified by its record identifier:
* get the publication data from the store using its identifier
* instantiate the record (RecordPubli, REcordConf, RecordThesis)
* process OAI data
* check the record
* insert new record in the database
Note:
* Design to never stop although exception are raised
* Have a look to the collection_logs and logs in order to
understand what happen.
Args:
rec_id (int):
identifier of the publication in the store.
"""
if
self
.
dbg
:
print
(
"
\n
processing record"
,
rec_id
)
collection_logs
=
self
.
collection_logs
harvester
=
self
.
harvester
logs
=
self
.
logs
try
:
recjson
=
self
.
store
.
get_record
(
rec_id
)
self
.
process_recjson
(
recjson
)
except
Exception
as
e
:
print
(
traceback
.
format_exc
())
url
=
OAI_URL
%
(
harvester
.
host
,
rec_id
)
logs
.
append
(
Msg
(
harvester
=
harvester
,
collection
=
collection_logs
[
-
1
].
title
,
record_id
=
rec_id
,
title
=
url
))
logs
[
-
1
].
reject
(
e
)
return
def
process_url
(
self
,
host
,
collections
):
"""Retrieve JSON objects from the invenio store and
insert corresponding records in the database.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment