Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
f61c7f28
Commit
f61c7f28
authored
Apr 30, 2015
by
MEESSEN Christophe
Browse files
fix some remaining spaces
parent
869c3d54
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
45 additions
and
45 deletions
+45
-45
modules/harvest_tools.py
modules/harvest_tools.py
+45
-45
No files found.
modules/harvest_tools.py
View file @
f61c7f28
...
...
@@ -175,50 +175,50 @@ def get_harvester_tool(controller):
return
Tool
def
build_harvester_tool
(
db
,
id_team
,
id_project
,
controller
,
id_category
,
def
build_harvester_tool
(
db
,
id_team
,
id_project
,
controller
,
id_category
,
year_start
=
None
,
year_end
=
None
,
dry_run
=
True
,
debug
=
False
):
"""
Harvest tool factory function, returns non if no factory exist for controller.
@type db: gluon.dal.DAL
@param db:
@type id_team: int
@param id_team: Identifier of the team in the db
@type id_project: int
@param id_project: Identifier of the project in the db
@type controller: unicode
@param controller: Type of publication (i.e. 'article', 'proceedings', ...)
@type id_category: int
@param id_category: Identifier of the category of publication (i.e. ACL, ACTI, ...)
@type year_start: int
@keyword year_start: Start year of search (i.e. '2014')
@type year_end: int
@keyword year_end: End year of search (i.e. '2015')
@type dry_run: boolean
@keyword dry_run: True if no record is to be written to the db
@type debug: bool
@param debug: activate the debug mode
"""
tool_class
=
get_harvester_tool
(
controller
)
if
tool_class
is
None
:
return
None
return
tool_class
(
db
,
id_team
,
id_project
,
controller
,
id_category
,
return
tool_class
(
db
,
id_team
,
id_project
,
controller
,
id_category
,
year_start
,
year_end
,
dry_run
,
debug
)
def
learn_my_authors
(
db
,
authors
=
None
,
id_project
=
None
,
id_team
=
None
,
def
learn_my_authors
(
db
,
authors
=
None
,
id_project
=
None
,
id_team
=
None
,
year
=
None
):
"""Train the rescue list of the authors of my institute,
stored in the database, using the list C{authors} provided in argument.
...
...
@@ -447,12 +447,12 @@ class PublicationsTool(object):
are defined by the current request.
"""
def
__init__
(
self
,
db
,
id_team
,
id_project
,
controller
,
id_category
,
def
__init__
(
self
,
db
,
id_team
,
id_project
,
controller
,
id_category
,
year_start
=
None
,
year_end
=
None
,
dry_run
=
True
,
debug
=
False
):
"""
@note see C{build_harvester_tool} factory function building C{PublicationsTools}
@type db: gluon.dal.DAL
...
...
@@ -469,19 +469,19 @@ class PublicationsTool(object):
@type id_category: int
@param id_category: Identifier of the category of publication (i.e. ACL, ACTI, ...)
@type year_start: int
@keyword year_start: Start year of search (i.e. '2014')
@type year_end: int
@keyword year_end: End year of search (i.e. '2015')
@type dry_run: boolean
@keyword dry_run: True if no record is to be written to the db
@type debug: bool
@param debug: activate the debug mode
"""
self
.
collection_logs
=
[]
self
.
db
=
db
...
...
@@ -497,9 +497,9 @@ class PublicationsTool(object):
self
.
check
=
CheckAndFix
()
self
.
marc12
=
Marc12
()
# check parameters
# protection team, project and/or category have to be defined
# protection team, project and/or category have to be defined
if
not
self
.
id_team
:
raise
ToolException
(
MSG_NO_TEAM
)
...
...
@@ -509,7 +509,7 @@ class PublicationsTool(object):
if
not
self
.
id_category
:
raise
ToolException
(
MSG_NO_CAT
)
# Construct harvester Storage needed for the log
# Construct harvester Storage needed for the log
self
.
harvester
=
Storage
(
id_teams
=
self
.
id_team
,
id_projects
=
self
.
id_project
,
controller
=
self
.
controller
,
...
...
@@ -535,7 +535,7 @@ class PublicationsTool(object):
L{invenio_tools.InvenioStore.get_ids}.
"""
# INSPIREHEP store
if
collection
.
startswith
(
'find'
):
...
...
@@ -549,12 +549,12 @@ class PublicationsTool(object):
elif
self
.
year_start
and
self
.
year_end
:
query
+=
" and date > %s and date < %s "
\
%
(
self
.
year_start
-
1
,
self
.
year_end
+
1
)
%
(
self
.
year_start
-
1
,
self
.
year_end
+
1
)
di
=
dict
(
p
=
query
,
# query à la spires
rg
=
1000
,
# maximum number of records returned
sf
=
'year'
,
# sort by date
so
=
'd'
)
# descending order
di
=
dict
(
p
=
query
,
# query à la spires
rg
=
1000
,
# maximum number of records returned
sf
=
'year'
,
# sort by date
so
=
'd'
)
# descending order
# CERN INVENIO store
else
:
...
...
@@ -567,7 +567,7 @@ class PublicationsTool(object):
elif
self
.
year_start
and
self
.
year_end
:
li
=
[]
for
year
in
range
(
self
.
year_start
,
self
.
year_end
+
1
):
for
year
in
range
(
self
.
year_start
,
self
.
year_end
+
1
):
li
.
append
(
str
(
year
))
rex
=
'|'
.
join
(
li
)
...
...
@@ -802,7 +802,7 @@ class PublicationsTool(object):
"""
return
0
def
process_url
(
self
,
host
,
collections
):
"""Retrieve the MARC XML string and launch its decoding.
...
...
@@ -811,25 +811,25 @@ class PublicationsTool(object):
@type host: unicode
@keyword host: Web host name to query for publication
@type collections: unicode
@keyword collections: Request string to send to the host to get the publications
@rtype: int
@return: one when the record is inserted / updated in the database
@rtype: int
@return: one when the record is inserted / updated in the database
zero otherwise.
"""
if
self
.
dbg
:
print
"process URL search"
self
.
host
=
host
self
.
collections
=
collections
marc12xmls
=
[]
# extend harvester for logs
self
.
harvester
.
host
=
host
self
.
harvester
.
collections
=
collections
self
.
harvester
.
collections
=
collections
store
=
InvenioStore
(
self
.
host
)
...
...
@@ -952,7 +952,7 @@ class PublicationsTool(object):
return
dict
(
collection_logs
=
self
.
collection_logs
,
controller
=
self
.
controller
,
logs
=
self
.
logs
)
logs
=
self
.
logs
)
def
__call__
(
self
,
xml
):
...
...
@@ -970,12 +970,12 @@ class PublicationsTool(object):
@type xml: unicode
@keyword xml: marc12 xml encoding of the publication record
"""
if
self
.
dbg
:
print
"start processing"
,
self
.
__class__
.
__name__
print
"decode request"
if
self
.
dbg
:
print
"get harvest parameters"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment