Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
68597c28
Commit
68597c28
authored
Sep 29, 2015
by
LE GAC Renaud
Browse files
Apply PEP-8 rules.
parent
4d708b04
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
230 additions
and
187 deletions
+230
-187
modules/callbacks.py
modules/callbacks.py
+25
-23
modules/check_tools.py
modules/check_tools.py
+49
-35
modules/countries.py
modules/countries.py
+4
-4
modules/filters.py
modules/filters.py
+22
-21
modules/harvest_tools/automaton.py
modules/harvest_tools/automaton.py
+2
-2
modules/harvest_tools/base.py
modules/harvest_tools/base.py
+3
-3
modules/harvest_tools/msgcollection.py
modules/harvest_tools/msgcollection.py
+0
-2
modules/invenio_tools/base.py
modules/invenio_tools/base.py
+7
-6
modules/invenio_tools/checkandfix.py
modules/invenio_tools/checkandfix.py
+18
-18
modules/invenio_tools/exception.py
modules/invenio_tools/exception.py
+20
-5
modules/invenio_tools/inveniostore.py
modules/invenio_tools/inveniostore.py
+4
-4
modules/invenio_tools/iterrecord.py
modules/invenio_tools/iterrecord.py
+14
-5
modules/invenio_tools/record.py
modules/invenio_tools/record.py
+3
-3
modules/invenio_tools/recordconf.py
modules/invenio_tools/recordconf.py
+4
-3
modules/invenio_tools/recordpubli.py
modules/invenio_tools/recordpubli.py
+3
-3
modules/list_postprocessing.py
modules/list_postprocessing.py
+40
-41
modules/regex.py
modules/regex.py
+2
-1
modules/reporting_tools.py
modules/reporting_tools.py
+10
-8
No files found.
modules/callbacks.py
View file @
68597c28
...
@@ -13,6 +13,10 @@ from plugin_dbui import (CALLBACK_ERRORS,
...
@@ -13,6 +13,10 @@ from plugin_dbui import (CALLBACK_ERRORS,
get_where_query
)
get_where_query
)
MSG_DUPLICATE
=
\
"Can't delete this record since several publications refer to it."
def
INHIBIT_CASCADE_DELETE
(
set_records
):
def
INHIBIT_CASCADE_DELETE
(
set_records
):
"""Inhibit the delete when publications use the reference field.
"""Inhibit the delete when publications use the reference field.
...
@@ -51,8 +55,7 @@ def INHIBIT_CASCADE_DELETE(set_records):
...
@@ -51,8 +55,7 @@ def INHIBIT_CASCADE_DELETE(set_records):
query
=
(
query
)
&
(
set_records
.
query
)
query
=
(
query
)
&
(
set_records
.
query
)
if
db
(
query
).
count
():
if
db
(
query
).
count
():
field
.
_table
[
CALLBACK_ERRORS
]
=
\
field
.
_table
[
CALLBACK_ERRORS
]
=
T
(
MSG_DUPLICATE
)
T
(
"Can't delete this record since several publications refer to it."
)
return
True
return
True
return
False
return
False
...
@@ -80,12 +83,12 @@ def INHIBIT_DUPLICATE_PUBLICATION(publication):
...
@@ -80,12 +83,12 @@ def INHIBIT_DUPLICATE_PUBLICATION(publication):
if
ids
:
if
ids
:
db
.
publications
[
CALLBACK_ERRORS
]
=
[
db
.
publications
[
CALLBACK_ERRORS
]
=
[
T
(
"Can't insert the article."
),
T
(
"Can't insert the article."
),
T
(
"An article already exists with the same:"
),
T
(
"An article already exists with the same:"
),
T
(
"• title, publisher, volume and pages"
),
T
(
"• title, publisher, volume and pages"
),
T
(
"• or publisher, volume and pages"
),
T
(
"• or publisher, volume and pages"
),
T
(
"• or publisher and title."
),
T
(
"• or publisher and title."
),
T
(
"See publication id(s) %s"
)
%
', '
.
join
(
ids
)]
T
(
"See publication id(s) %s"
)
%
', '
.
join
(
ids
)]
return
True
return
True
...
@@ -99,12 +102,12 @@ def INHIBIT_DUPLICATE_PUBLICATION(publication):
...
@@ -99,12 +102,12 @@ def INHIBIT_DUPLICATE_PUBLICATION(publication):
if
ids
:
if
ids
:
db
.
publications
[
CALLBACK_ERRORS
]
=
[
db
.
publications
[
CALLBACK_ERRORS
]
=
[
T
(
"Can't insert the talk/proceeding."
),
T
(
"Can't insert the talk/proceeding."
),
T
(
"A talk/proceeding already exists with the same:"
),
T
(
"A talk/proceeding already exists with the same:"
),
T
(
"• title, conference title, date and town"
),
T
(
"• title, conference title, date and town"
),
T
(
"• or title, conference date and town"
),
T
(
"• or title, conference date and town"
),
T
(
"• or title, conference title and town"
),
T
(
"• or title, conference title and town"
),
T
(
"See publication id(s) %s"
)
%
', '
.
join
(
ids
)]
T
(
"See publication id(s) %s"
)
%
', '
.
join
(
ids
)]
return
True
return
True
...
@@ -116,9 +119,9 @@ def INHIBIT_DUPLICATE_PUBLICATION(publication):
...
@@ -116,9 +119,9 @@ def INHIBIT_DUPLICATE_PUBLICATION(publication):
if
ids
:
if
ids
:
db
.
publications
[
CALLBACK_ERRORS
]
=
[
db
.
publications
[
CALLBACK_ERRORS
]
=
[
T
(
"Can't insert the report."
),
T
(
"Can't insert the report."
),
T
(
"A report already exists with the same title"
),
T
(
"A report already exists with the same title"
),
T
(
"See publication id(s) %s"
)
%
', '
.
join
(
ids
)]
T
(
"See publication id(s) %s"
)
%
', '
.
join
(
ids
)]
return
True
return
True
...
@@ -158,9 +161,9 @@ def INHIBIT_HARVESTER_ON_CATEGORY(harvester):
...
@@ -158,9 +161,9 @@ def INHIBIT_HARVESTER_ON_CATEGORY(harvester):
code
=
db
.
categories
[
id_category
].
code
code
=
db
.
categories
[
id_category
].
code
db
.
harvesters
[
CALLBACK_ERRORS
]
=
[
db
.
harvesters
[
CALLBACK_ERRORS
]
=
[
T
(
"Can't insert the harvester."
),
T
(
"Can't insert the harvester."
),
T
(
"Harvester already exists with the same automaton "
),
T
(
"Harvester already exists with the same automaton "
),
T
(
"but with different category: %s"
)
%
code
]
T
(
"but with different category: %s"
)
%
code
]
return
True
return
True
...
@@ -190,7 +193,7 @@ def INHIBIT_PUBLICATION_DELETE_ON_OK(s):
...
@@ -190,7 +193,7 @@ def INHIBIT_PUBLICATION_DELETE_ON_OK(s):
id_rec
=
s
.
query
.
second
id_rec
=
s
.
query
.
second
if
db
.
publications
[
id_rec
].
id_status
==
id_ok
:
if
db
.
publications
[
id_rec
].
id_status
==
id_ok
:
db
.
publications
[
CALLBACK_ERRORS
]
=
\
db
.
publications
[
CALLBACK_ERRORS
]
=
\
T
(
"Can't delete a publication marked OK."
)
T
(
"Can't delete a publication marked OK."
)
return
True
return
True
return
False
return
False
...
@@ -221,8 +224,7 @@ def INHIBIT_PUBLICATION_UPDATE_ON_OK(s, f):
...
@@ -221,8 +224,7 @@ def INHIBIT_PUBLICATION_UPDATE_ON_OK(s, f):
if
db
.
publications
[
id_rec
].
id_status
==
id_ok
:
if
db
.
publications
[
id_rec
].
id_status
==
id_ok
:
db
.
publications
[
CALLBACK_ERRORS
]
=
\
db
.
publications
[
CALLBACK_ERRORS
]
=
\
T
(
"Can't updated a publication marked OK."
)
T
(
"Can't updated a publication marked OK."
)
return
True
return
True
return
False
return
False
modules/check_tools.py
View file @
68597c28
...
@@ -78,7 +78,8 @@ def check_publication(row):
...
@@ -78,7 +78,8 @@ def check_publication(row):
# publication URL
# publication URL
if
row
.
publications
.
publication_url
:
if
row
.
publications
.
publication_url
:
if
'pdf'
not
in
row
.
publications
.
publication_url
:
if
'pdf'
not
in
row
.
publications
.
publication_url
:
text
=
T
(
"Check that the publication URL corresponds to a pdf file."
)
text
=
\
T
(
"Check that the publication URL corresponds to a pdf file."
)
li
.
append
(
text
)
li
.
append
(
text
)
# latex syntax
# latex syntax
...
@@ -224,30 +225,34 @@ def duplicate_article(publication):
...
@@ -224,30 +225,34 @@ def duplicate_article(publication):
ids
=
[]
ids
=
[]
db
=
current
.
globalenv
[
'db'
]
db
=
current
.
globalenv
[
'db'
]
qcat
=
(
db
.
categories
.
code
==
'ACL'
)
|
(
db
.
categories
.
code
==
'ACLN'
)
categories
=
db
.
categories
publications
=
db
.
pulications
qmain
=
get_where_query
(
db
.
publications
)
qcat
=
(
categories
.
code
==
'ACL'
)
|
(
categories
.
code
==
'ACLN'
)
qpub
=
publications
.
id_publishers
==
publication
[
'id_publishers'
]
qmain
=
get_where_query
(
publications
)
qmain
=
((
qmain
)
&
(
qcat
))
qmain
=
((
qmain
)
&
(
qcat
))
qmain
=
((
qmain
)
&
(
db
.
publications
.
id_teams
==
publication
[
'id_teams'
]))
qmain
=
((
qmain
)
&
(
publications
.
id_teams
==
publication
[
'id_teams'
]))
qmain
=
((
qmain
)
&
(
db
.
publications
.
id_publishers
==
publication
[
'id_publishers'
]
))
qmain
=
((
qmain
)
&
(
qpub
))
if
'id'
in
publication
and
publication
[
'id'
]:
if
'id'
in
publication
and
publication
[
'id'
]:
qmain
=
((
qmain
)
&
(
db
.
publications
.
id
!=
publication
[
'id'
]))
qmain
=
((
qmain
)
&
(
publications
.
id
!=
publication
[
'id'
]))
# title, publishers, volume and pages
# title, publishers, volume and pages
query
=
((
qmain
)
&
(
db
.
publications
.
title
==
publication
[
'title'
]))
query
=
((
qmain
)
&
(
publications
.
title
==
publication
[
'title'
]))
query
=
((
query
)
&
(
db
.
publications
.
volume
==
publication
[
'volume'
]))
query
=
((
query
)
&
(
publications
.
volume
==
publication
[
'volume'
]))
query
=
((
query
)
&
(
db
.
publications
.
pages
==
publication
[
'pages'
]))
query
=
((
query
)
&
(
publications
.
pages
==
publication
[
'pages'
]))
extend_ids
(
db
,
query
,
ids
)
extend_ids
(
db
,
query
,
ids
)
# publisher, volume, pages and year
# publisher, volume, pages and year
query
=
((
qmain
)
&
(
db
.
publications
.
volume
==
publication
[
'volume'
]))
query
=
((
qmain
)
&
(
publications
.
volume
==
publication
[
'volume'
]))
query
=
((
query
)
&
(
db
.
publications
.
pages
==
publication
[
'pages'
]))
query
=
((
query
)
&
(
publications
.
pages
==
publication
[
'pages'
]))
query
=
((
query
)
&
(
db
.
publications
.
year
==
publication
[
'year'
]))
query
=
((
query
)
&
(
publications
.
year
==
publication
[
'year'
]))
extend_ids
(
db
,
query
,
ids
)
extend_ids
(
db
,
query
,
ids
)
# publisher and title
# publisher and title
query
=
((
qmain
)
&
(
db
.
publications
.
title
==
publication
[
'title'
]))
query
=
((
qmain
)
&
(
publications
.
title
==
publication
[
'title'
]))
extend_ids
(
db
,
query
,
ids
)
extend_ids
(
db
,
query
,
ids
)
return
ids
return
ids
...
@@ -272,32 +277,39 @@ def duplicate_conference(publication):
...
@@ -272,32 +277,39 @@ def duplicate_conference(publication):
ids
=
[]
ids
=
[]
db
=
current
.
globalenv
[
'db'
]
db
=
current
.
globalenv
[
'db'
]
qcat
=
(
db
.
categories
.
code
==
'ACTI'
)
|
\
categories
=
db
.
categories
(
db
.
categories
.
code
==
'ACTN'
)
|
\
publications
=
db
.
publications
(
db
.
categories
.
code
==
'COM'
)
qcat
=
(
categories
.
code
==
'ACTI'
)
|
\
(
categories
.
code
==
'ACTN'
)
|
\
(
categories
.
code
==
'COM'
)
qmain
=
get_where_query
(
db
.
publications
)
qmain
=
get_where_query
(
publications
)
qmain
=
((
qmain
)
&
(
qcat
))
qmain
=
((
qmain
)
&
(
qcat
))
qmain
=
((
qmain
)
&
(
db
.
publications
.
id_teams
==
publication
[
'id_teams'
]))
qmain
=
((
qmain
)
&
(
publications
.
id_teams
==
publication
[
'id_teams'
]))
qmain
=
((
qmain
)
&
(
db
.
publications
.
title
==
publication
[
'title'
]))
qmain
=
((
qmain
)
&
(
publications
.
title
==
publication
[
'title'
]))
if
'id'
in
publication
and
publication
[
'id'
]:
if
'id'
in
publication
and
publication
[
'id'
]:
qmain
=
((
qmain
)
&
(
db
.
publications
.
id
!=
publication
[
'id'
]))
qmain
=
((
qmain
)
&
(
publications
.
id
!=
publication
[
'id'
]))
# title, conference title, conference date and conference town
# title, conference title, conference date and conference town
query
=
((
qmain
)
&
(
db
.
publications
.
conference_title
==
publication
[
'conference_title'
]))
qtitle
=
publications
.
conference_title
==
publication
[
'conference_title'
]
query
=
((
query
)
&
(
db
.
publications
.
conference_dates
==
publication
[
'conference_dates'
]))
qdates
=
publications
.
conference_dates
==
publication
[
'conference_dates'
]
query
=
((
query
)
&
(
db
.
publications
.
conference_town
==
publication
[
'conference_town'
]))
qtown
=
publications
.
conference_town
==
publication
[
'conference_town'
]
query
=
((
qmain
)
&
(
qtitle
))
query
=
((
query
)
&
(
qdates
))
query
=
((
query
)
&
(
qtown
))
extend_ids
(
db
,
query
,
ids
)
extend_ids
(
db
,
query
,
ids
)
# title, conference date and conference town
# title, conference date and conference town
query
=
((
query
)
&
(
db
.
publications
.
conference_dates
==
publication
[
'conference_dates'
]
))
query
=
((
query
)
&
(
qdates
))
query
=
((
query
)
&
(
db
.
publications
.
conference_town
==
publication
[
'conference_
town
'
]
))
query
=
((
query
)
&
(
q
town
))
extend_ids
(
db
,
query
,
ids
)
extend_ids
(
db
,
query
,
ids
)
# title, conference title and conference town
# title, conference title and conference town
query
=
((
qmain
)
&
(
db
.
publications
.
conference_title
==
publication
[
'conference_title'
]
))
query
=
((
qmain
)
&
(
qtitle
))
query
=
((
query
)
&
(
db
.
publications
.
conference_town
==
publication
[
'conference_
town
'
]
))
query
=
((
query
)
&
(
q
town
))
extend_ids
(
db
,
query
,
ids
)
extend_ids
(
db
,
query
,
ids
)
return
ids
return
ids
...
@@ -316,13 +328,15 @@ def duplicate_origin(publication):
...
@@ -316,13 +328,15 @@ def duplicate_origin(publication):
ids
=
[]
ids
=
[]
db
=
current
.
globalenv
[
'db'
]
db
=
current
.
globalenv
[
'db'
]
publications
=
db
.
publications
# protection against empty origin field
# protection against empty origin field
if
not
publication
[
'origin'
]:
if
not
publication
[
'origin'
]:
return
ids
return
ids
# look for publication with the same origin field
# look for publication with the same origin field
query
=
db
.
publications
.
id
!=
publication
[
'id'
]
query
=
publications
.
id
!=
publication
[
'id'
]
query
=
((
query
)
&
(
db
.
publications
.
origin
==
publication
[
'origin'
]))
query
=
((
query
)
&
(
publications
.
origin
==
publication
[
'origin'
]))
set_records
=
db
(
query
)
set_records
=
db
(
query
)
if
set_records
.
count
():
if
set_records
.
count
():
...
@@ -349,15 +363,15 @@ def duplicate_report(publication):
...
@@ -349,15 +363,15 @@ def duplicate_report(publication):
ids
=
[]
ids
=
[]
db
=
current
.
globalenv
[
'db'
]
db
=
current
.
globalenv
[
'db'
]
qcat
=
db
.
categories
.
code
==
'AP'
publications
=
db
.
publications
qmain
=
get_where_query
(
db
.
publications
)
qmain
=
get_where_query
(
publications
)
qmain
=
((
qmain
)
&
(
qcat
))
qmain
=
((
qmain
)
&
(
db
.
categories
.
code
==
'AP'
))
qmain
=
((
qmain
)
&
(
db
.
publications
.
id_teams
==
publication
[
'id_teams'
]))
qmain
=
((
qmain
)
&
(
publications
.
id_teams
==
publication
[
'id_teams'
]))
qmain
=
((
qmain
)
&
(
db
.
publications
.
title
==
publication
[
'title'
]))
qmain
=
((
qmain
)
&
(
publications
.
title
==
publication
[
'title'
]))
if
'id'
in
publication
and
publication
[
'id'
]:
if
'id'
in
publication
and
publication
[
'id'
]:
qmain
=
((
qmain
)
&
(
db
.
publications
.
id
!=
publication
[
'id'
]))
qmain
=
((
qmain
)
&
(
publications
.
id
!=
publication
[
'id'
]))
extend_ids
(
db
,
qmain
,
ids
)
extend_ids
(
db
,
qmain
,
ids
)
...
...
modules/countries.py
View file @
68597c28
...
@@ -2,13 +2,13 @@
...
@@ -2,13 +2,13 @@
""" countries
""" countries
List of countries extract from the geographical database www.geonames.org:
List of countries extract from the geographical database www.geonames.org:
1. get the file C{countryInfo.txt}
1. get the file C{countryInfo.txt}
from U{http://download.geonames.org/export/dump/}
from U{http://download.geonames.org/export/dump/}
2. open the file with libreoffice calc
2. open the file with libreoffice calc
and remove all columns but the country names
and remove all columns but the country names
Extract in Nov. 2014
Extract in Nov. 2014
"""
"""
COUNTRIES
=
[
'Andorra'
,
COUNTRIES
=
[
'Andorra'
,
...
...
modules/filters.py
View file @
68597c28
...
@@ -4,77 +4,78 @@
...
@@ -4,77 +4,78 @@
"""
"""
def
CLEAN_COLLABORATION
(
value
):
def
CLEAN_COLLABORATION
(
value
):
"""Correct stupid mistakes on the collaboration field.
"""Correct stupid mistakes on the collaboration field.
- No heading and trailing spaces
- No heading and trailing spaces
- No duplicate entries
- No duplicate entries
- Remove entries starting with C{on behalf}
- Remove entries starting with C{on behalf}
- Collaboration always start with a Capital letter
- Collaboration always start with a Capital letter
@type value: str
@type value: str
@param value: string where collaborations are separated by comma
@param value: string where collaborations are separated by comma
@rtype: str
@rtype: str
"""
"""
li
=
[]
li
=
[]
for
el
in
value
.
split
(
','
):
for
el
in
value
.
split
(
','
):
# Fix to remove space at the beginning and at the end
# Fix to remove space at the beginning and at the end
el
=
el
.
strip
()
el
=
el
.
strip
()
# Fix "XXX collaboration" as "XXX Collaboration"
# Fix "XXX collaboration" as "XXX Collaboration"
el
=
el
.
replace
(
'collaboration'
,
'Collaboration'
)
el
=
el
.
replace
(
'collaboration'
,
'Collaboration'
)
el
=
el
.
replace
(
'consortium'
,
'Consortium'
)
el
=
el
.
replace
(
'consortium'
,
'Consortium'
)
el
=
el
.
replace
(
'group'
,
'Group'
)
el
=
el
.
replace
(
'group'
,
'Group'
)
# Fix to avoid duplicate entries
# Fix to avoid duplicate entries
if
el
in
li
:
if
el
in
li
:
continue
continue
# Fix to remove 'on behalf of the LHCb Collaboration'
# Fix to remove 'on behalf of the LHCb Collaboration'
if
el
.
startswith
(
'on behalf'
):
if
el
.
startswith
(
'on behalf'
):
continue
continue
li
.
append
(
el
)
li
.
append
(
el
)
return
', '
.
join
(
li
)
return
', '
.
join
(
li
)
def
CLEAN_REVIEW
(
value
):
def
CLEAN_REVIEW
(
value
):
"""Correct stupid mistakes on the paper_editor field.
"""Correct stupid mistakes on the paper_editor field.
- Remove dot and coma
- Remove dot and coma
- No heading and trailing spaces
- No heading and trailing spaces
@type value: str
@type value: str
@param value: review abbreviation
@param value: review abbreviation
@rtype: str
@rtype: str
"""
"""
# Fix to remove dot and comma
# Fix to remove dot and comma
value
=
value
.
replace
(
"."
,
""
).
replace
(
","
,
""
)
value
=
value
.
replace
(
"."
,
""
).
replace
(
","
,
""
)
# Fix to have only one space between word
# Fix to have only one space between word
value
=
' '
.
join
(
value
.
split
())
value
=
' '
.
join
(
value
.
split
())
return
value
return
value
def
CLEAN_THESIS_DEFENSE
(
value
):
def
CLEAN_THESIS_DEFENSE
(
value
):
"""Correct stupid mistakes on the thesis_defense field.
"""Correct stupid mistakes on the thesis_defense field.
- Remove prefix like C{Presented}, C{on}, ...
- Remove prefix like C{Presented}, C{on}, ...
@type value: str
@type value: str
@param value: string with the defense date
@param value: string with the defense date
@rtype: str
@rtype: str
"""
"""
value
=
value
.
replace
(
'Presented '
,
''
)
value
=
value
.
replace
(
'Presented '
,
''
)
value
=
value
.
replace
(
'presented '
,
''
)
value
=
value
.
replace
(
'presented '
,
''
)
value
=
value
.
replace
(
'on '
,
''
)
value
=
value
.
replace
(
'on '
,
''
)
return
value
return
value
\ No newline at end of file
modules/harvest_tools/automaton.py
View file @
68597c28
...
@@ -159,7 +159,7 @@ class Automaton(object):
...
@@ -159,7 +159,7 @@ class Automaton(object):
db
=
self
.
db
db
=
self
.
db
try
:
try
:
rec_id
=
db
.
publications
.
insert
(
**
fields
)
rec_id
=
db
.
publications
.
insert
(
**
fields
)
if
rec_id
:
if
rec_id
:
return
1
return
1
...
@@ -358,7 +358,7 @@ class Automaton(object):
...
@@ -358,7 +358,7 @@ class Automaton(object):
# fix origin field
# fix origin field
ok
=
db
.
publications
[
rec_id
].
origin
and
\
ok
=
db
.
publications
[
rec_id
].
origin
and
\
db
.
publications
[
rec_id
].
origin
==
oai_url
db
.
publications
[
rec_id
].
origin
==
oai_url
if
not
ok
:
if
not
ok
:
if
not
self
.
dry_run
:
if
not
self
.
dry_run
:
db
.
publications
[
rec_id
]
=
dict
(
origin
=
oai_url
)
db
.
publications
[
rec_id
]
=
dict
(
origin
=
oai_url
)
...
...
modules/harvest_tools/base.py
View file @
68597c28
...
@@ -128,7 +128,7 @@ def learn_my_authors(db,
...
@@ -128,7 +128,7 @@ def learn_my_authors(db,
for
elem
in
diff
:
for
elem
in
diff
:
if
isinstance
(
elem
,
unicode
):
if
isinstance
(
elem
,
unicode
):
elem
=
elem
.
encode
(
'utf8'
)
elem
=
elem
.
encode
(
'utf8'
)
family_name
=
elem
[
elem
.
rfind
(
'. '
)
+
2
:]
# extract family name
family_name
=
elem
[
elem
.
rfind
(
'. '
)
+
2
:]
if
family_name
not
in
row
.
authors
:
if
family_name
not
in
row
.
authors
:
elems
.
append
(
elem
)
elems
.
append
(
elem
)
...
@@ -138,5 +138,5 @@ def learn_my_authors(db,
...
@@ -138,5 +138,5 @@ def learn_my_authors(db,
db
.
my_authors
[
row
.
id
]
=
dict
(
authors
=
', '
.
join
(
database_authors
))
db
.
my_authors
[
row
.
id
]
=
dict
(
authors
=
', '
.
join
(
database_authors
))
class
ToolException
(
Exception
):
pass
class
ToolException
(
Exception
):
pass
modules/harvest_tools/msgcollection.py
View file @
68597c28
...
@@ -28,5 +28,3 @@ class MsgCollection(Storage):
...
@@ -28,5 +28,3 @@ class MsgCollection(Storage):
"""
"""
return
self
.
url
.
replace
(
"of=id"
,
"of=hb"
)
return
self
.
url
.
replace
(
"of=id"
,
"of=hb"
)
modules/invenio_tools/base.py
View file @
68597c28
...
@@ -66,9 +66,11 @@ def is_institute(record):
...
@@ -66,9 +66,11 @@ def is_institute(record):
bool: true when the MARC record describes an institute
bool: true when the MARC record describes an institute
"""
"""
# u'980': [{'b': [u'CK90', u'HEP200', u'PDGLIST', u'PPF', u'TOP500', u'WEB']},
# u'980': [
# {'a': u'INSTITUTION'},
# {'b': [u'CK90', u'HEP200', u'PDGLIST', u'PPF', u'TOP500', u'WEB']},
# {'a': u'CORE'}]}
# {'a': u'INSTITUTION'},
# {'a': u'CORE'}
# ]
if
u
"980"
in
record
:
if
u
"980"
in
record
:
if
isinstance
(
record
[
u
"980"
],
list
):
if
isinstance
(
record
[
u
"980"
],
list
):
...
@@ -77,9 +79,8 @@ def is_institute(record):
...
@@ -77,9 +79,8 @@ def is_institute(record):
if
k
==
"a"
and
v
==
u
"INSTITUTION"
:
if
k
==
"a"
and
v
==
u
"INSTITUTION"
:
return
True
return
True
elif
isinstance
(
record
[
u
"980"
],
dict
)
and
\
elif
isinstance
(
record
[
u
"980"
],
dict
)
and
"a"
in
record
[
u
"980"
]
and
\
"a"
in
record
[
u
"980"
]
and
\
record
[
u
"980"
][
"a"
]
==
u
"INSTITUTION"
:
record
[
u
"980"
][
"a"
]
==
u
"INSTITUTION"
:
return
True
return
True
return
False
return
False
...
...
modules/invenio_tools/checkandfix.py
View file @
68597c28
...
@@ -35,21 +35,21 @@ _ref1 = r"(?P<p>[A-Za-z\. ]+) +(?P<v>\d+),? +(?P<c>[\d-]+) +\((?P<y>[\d]+)\)"
...
@@ -35,21 +35,21 @@ _ref1 = r"(?P<p>[A-Za-z\. ]+) +(?P<v>\d+),? +(?P<c>[\d-]+) +\((?P<y>[\d]+)\)"
_ref2
=
r
"(?P<p>[A-Za-z\. ]+) +\((?P<y>\d+)\) +(?P<v>[\d]+):(?P<c>[\d-]+)"
_ref2
=
r
"(?P<p>[A-Za-z\. ]+) +\((?P<y>\d+)\) +(?P<v>[\d]+):(?P<c>[\d-]+)"
DECODE_REF
=
[
re
.
compile
(
_ref1
),
re
.
compile
(
_ref2
)]
DECODE_REF
=
[
re
.
compile
(
_ref1
),
re
.
compile
(
_ref2
)]
MONTHS
=
{
u
'Jan'
:
'01'
,
MONTHS
=
{
u
'Jan'
:
'01'
,
u
'Feb'
:
'02'
,
u
'Feb'
:
'02'
,
u
'Fev'
:
'02'
,
u
'Fev'
:
'02'
,
u
'Mar'
:
'03'
,
u
'Mar'
:
'03'
,
u
'Apr'
:
'04'
,
u
'Apr'
:
'04'
,
u
'Avr'
:
'04'
,