Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
cb34d638
Commit
cb34d638
authored
Jul 05, 2016
by
LE GAC Renaud
Browse files
Update Automaton to build the cc keywork when running with inspirehep.
parent
956e8dff
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
0 deletions
+13
-0
modules/harvest_tools/automaton.py
modules/harvest_tools/automaton.py
+13
-0
No files found.
modules/harvest_tools/automaton.py
View file @
cb34d638
...
...
@@ -28,6 +28,10 @@ MSG_NSERT_FAIL = "Fail to insert the new record in the database."
MSG_NO_OAI
=
"Reject no OAI identifier"
MSG_WELL_FORM_OAI
=
"Reject OAI is not well formed"
# search collection when using inspirehep
# require for "Hal Hidden"
REG_COLLECTION
=
re
.
compile
(
r
"cc([A-Za-z ]+)(and|$)"
)
class
Automaton
(
object
):
"""Base class to search and process publications:
...
...
@@ -299,6 +303,15 @@ class Automaton(object):
sf
=
'year'
,
# sort by date
so
=
'd'
)
# descending order
# handle the cc keyword (true inspirehep collection)
match
=
REG_COLLECTION
.
search
(
query
)
if
match
:
dic
[
"cc"
]
=
match
.
group
(
1
).
strip
()
dic
[
"p"
]
=
REG_COLLECTION
.
sub
(
""
,
query
).
strip
()
dic
[
"p"
]
=
dic
[
"p"
].
replace
(
" "
,
" "
)
if
dic
[
"p"
]
==
"find"
:
del
dic
[
"p"
]
# CERN INVENIO store
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment