Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
076d20c6
Commit
076d20c6
authored
Sep 25, 2015
by
LE GAC Renaud
Browse files
Move the function recover_oai in the CheckAndFix class.
parent
df4943f4
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
37 additions
and
38 deletions
+37
-38
modules/harvest_tools/automaton.py
modules/harvest_tools/automaton.py
+4
-11
modules/harvest_tools/base.py
modules/harvest_tools/base.py
+0
-27
modules/invenio_tools/checkandfix.py
modules/invenio_tools/checkandfix.py
+33
-0
No files found.
modules/harvest_tools/automaton.py
View file @
076d20c6
...
...
@@ -6,7 +6,7 @@ import re
import
traceback
from
base
import
MSG_FIX_ORIGIN
,
MSG_IN_DB
,
recover_oai
,
ToolException
from
base
import
MSG_FIX_ORIGIN
,
MSG_IN_DB
,
ToolException
from
gluon.storage
import
Storage
from
invenio_tools
import
(
CheckAndFix
,
InvenioStore
,
...
...
@@ -280,6 +280,8 @@ class Automaton(object):
print
"check record"
try
:
self
.
check
.
recover_oai
(
record
,
self
.
harvester
.
host
)
if
self
.
check
.
is_bad_oai_used
(
record
):
self
.
logs
[
-
1
].
idle
(
MSG_IN_DB
,
record
.
year
())
return
False
...
...
@@ -528,22 +530,13 @@ class Automaton(object):
record_id
=
record
.
id
(),
title
=
record
.
title
()))
# the OAI is not defined -- recover it
oai
=
record
.
oai
()
if
oai
is
None
:
recover_oai
(
record
,
self
.
harvester
.
host
)
# the OAI is not well --recover it
if
not
REG_OAI
.
match
(
oai
):
recover_oai
(
record
,
self
.
harvester
.
host
)
# check that the record is well formed
# repair non-conformity as far as possible
if
not
self
.
check_record
(
record
):
continue
if
self
.
dbg
:
print
"
start loading
in the database"
print
"
insert record
in the database"
# insert the record in the database
self
.
insert_record
(
record
)
...
...
modules/harvest_tools/base.py
View file @
076d20c6
...
...
@@ -14,10 +14,6 @@ MSG_FIX_ORIGIN = "Fixed the origin field"
MSG_IN_DB
=
"Already in the database"
MSG_LOAD
=
"Load in the database"
MSG_INVALID_HOST
=
"Invalid host"
OAI_INVENIO
=
"oai:%s:%s"
def
family_name_fr
(
full_name
):
"""Extract the family name when the full name is encoded as C{J. Doe}.
...
...
@@ -145,28 +141,5 @@ def learn_my_authors(db,
db
.
my_authors
[
row
.
id
]
=
dict
(
authors
=
', '
.
join
(
database_authors
))
def
recover_oai
(
record
,
host
):
"""Helper function to recover the OAI identifier when it is not defined
or not well form.
@type record: Record
@param record:
"""
if
host
==
"cds.cern.ch"
:
field
,
subfield
=
u
"0248"
,
"a"
elif
host
==
"inspirehep.net"
:
field
,
subfield
=
u
"909CO"
,
"o"
else
:
raise
ValueError
(
MSG_INVALID_HOST
)
if
field
not
in
record
:
record
[
field
]
=
dict
()
record
[
field
][
subfield
]
=
OAI_INVENIO
%
(
host
,
record
.
id
())
class
ToolException
(
Exception
):
pass
modules/invenio_tools/checkandfix.py
View file @
076d20c6
...
...
@@ -51,6 +51,8 @@ MONTHS = {u'Jan':'01',
u
'Nov'
:
'11'
,
u
'Dec'
:
'12'
}
MSG_INVALID_HOST
=
"Invalid host"
MSG_NO_AUTHOR
=
"Reject no author(s)"
MSG_NO_COUNTRY
=
"Reject invalid country"
MSG_NO_CONF_DATE
=
"Reject no conference date"
...
...
@@ -71,6 +73,8 @@ MSG_WELL_FORMED_DATE = "Reject submission date is not well formed"
MSG_WELL_FORMED_EDITOR
=
"Reject editor is not well formed"
OAI_INVENIO
=
"oai:%s:%s"
REG_COLLABORATION
=
re
.
compile
(
regex
.
REG_COLLABORATION
)
REG_CONF_DATES_1
=
re
.
compile
(
"(\d+) *-? *(\d+) *([A-Z][a-z]{2}) *(\d{4})"
)
...
...
@@ -740,6 +744,35 @@ class CheckAndFix(object):
self
.
_repair_paper_reference
(
record
)
return
def
recover_oai
(
self
,
record
,
host
):
"""Recover the OAI identifier when it is not defined
or not well form.
@type record: Record
@param record:
@type host: unicode
@param host: either cds.cern.ch or inspirehep.net
"""
oai
=
record
.
oai
()
if
oai
is
not
None
and
REG_OAI
.
match
(
oai
):
return
if
host
==
"cds.cern.ch"
:
field
,
subfield
=
u
"0248"
,
"a"
elif
host
==
"inspirehep.net"
:
field
,
subfield
=
u
"909CO"
,
"o"
else
:
raise
ValueError
(
MSG_INVALID_HOST
)
if
field
not
in
record
:
record
[
field
]
=
dict
()
record
[
field
][
subfield
]
=
OAI_INVENIO
%
(
host
,
record
.
id
())
def
submitted
(
self
,
record
):
"""Standardize the submitted date as YYYY-MM or YYYY-MM-DD.
Look for alternative when it is not defined.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment