Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
limbra
limbra
Commits
076d20c6
Commit
076d20c6
authored
Sep 25, 2015
by
LE GAC Renaud
Browse files
Move the function recover_oai in the CheckAndFix class.
parent
df4943f4
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
37 additions
and
38 deletions
+37
-38
modules/harvest_tools/automaton.py
modules/harvest_tools/automaton.py
+4
-11
modules/harvest_tools/base.py
modules/harvest_tools/base.py
+0
-27
modules/invenio_tools/checkandfix.py
modules/invenio_tools/checkandfix.py
+33
-0
No files found.
modules/harvest_tools/automaton.py
View file @
076d20c6
...
...
@@ -6,7 +6,7 @@ import re
import
traceback
from
base
import
MSG_FIX_ORIGIN
,
MSG_IN_DB
,
recover_oai
,
ToolException
from
base
import
MSG_FIX_ORIGIN
,
MSG_IN_DB
,
ToolException
from
gluon.storage
import
Storage
from
invenio_tools
import
(
CheckAndFix
,
InvenioStore
,
...
...
@@ -280,6 +280,8 @@ class Automaton(object):
print
"check record"
try
:
self
.
check
.
recover_oai
(
record
,
self
.
harvester
.
host
)
if
self
.
check
.
is_bad_oai_used
(
record
):
self
.
logs
[
-
1
].
idle
(
MSG_IN_DB
,
record
.
year
())
return
False
...
...
@@ -528,22 +530,13 @@ class Automaton(object):
record_id
=
record
.
id
(),
title
=
record
.
title
()))
# the OAI is not defined -- recover it
oai
=
record
.
oai
()
if
oai
is
None
:
recover_oai
(
record
,
self
.
harvester
.
host
)
# the OAI is not well --recover it
if
not
REG_OAI
.
match
(
oai
):
recover_oai
(
record
,
self
.
harvester
.
host
)
# check that the record is well formed
# repair non-conformity as far as possible
if
not
self
.
check_record
(
record
):
continue
if
self
.
dbg
:
print
"
start loading
in the database"
print
"
insert record
in the database"
# insert the record in the database
self
.
insert_record
(
record
)
...
...
modules/harvest_tools/base.py
View file @
076d20c6
...
...
@@ -14,10 +14,6 @@ MSG_FIX_ORIGIN = "Fixed the origin field"
MSG_IN_DB
=
"Already in the database"
MSG_LOAD
=
"Load in the database"
MSG_INVALID_HOST
=
"Invalid host"
OAI_INVENIO
=
"oai:%s:%s"
def
family_name_fr
(
full_name
):
"""Extract the family name when the full name is encoded as C{J. Doe}.
...
...
@@ -145,28 +141,5 @@ def learn_my_authors(db,
db
.
my_authors
[
row
.
id
]
=
dict
(
authors
=
', '
.
join
(
database_authors
))
def
recover_oai
(
record
,
host
):
"""Helper function to recover the OAI identifier when it is not defined
or not well form.
@type record: Record
@param record:
"""
if
host
==
"cds.cern.ch"
:
field
,
subfield
=
u
"0248"
,
"a"
elif
host
==
"inspirehep.net"
:
field
,
subfield
=
u
"909CO"
,
"o"
else
:
raise
ValueError
(
MSG_INVALID_HOST
)
if
field
not
in
record
:
record
[
field
]
=
dict
()
record
[
field
][
subfield
]
=
OAI_INVENIO
%
(
host
,
record
.
id
())
class
ToolException
(
Exception
):
pass
modules/invenio_tools/checkandfix.py
View file @
076d20c6
...
...
@@ -51,6 +51,8 @@ MONTHS = {u'Jan':'01',
u
'Nov'
:
'11'
,
u
'Dec'
:
'12'
}
MSG_INVALID_HOST
=
"Invalid host"
MSG_NO_AUTHOR
=
"Reject no author(s)"
MSG_NO_COUNTRY
=
"Reject invalid country"
MSG_NO_CONF_DATE
=
"Reject no conference date"
...
...
@@ -71,6 +73,8 @@ MSG_WELL_FORMED_DATE = "Reject submission date is not well formed"
MSG_WELL_FORMED_EDITOR
=
"Reject editor is not well formed"
OAI_INVENIO
=
"oai:%s:%s"
REG_COLLABORATION
=
re
.
compile
(
regex
.
REG_COLLABORATION
)
REG_CONF_DATES_1
=
re
.
compile
(
"(\d+) *-? *(\d+) *([A-Z][a-z]{2}) *(\d{4})"
)
...
...
@@ -740,6 +744,35 @@ class CheckAndFix(object):
self
.
_repair_paper_reference
(
record
)
return
def
recover_oai
(
self
,
record
,
host
):
"""Recover the OAI identifier when it is not defined
or not well form.
@type record: Record
@param record:
@type host: unicode
@param host: either cds.cern.ch or inspirehep.net
"""
oai
=
record
.
oai
()
if
oai
is
not
None
and
REG_OAI
.
match
(
oai
):
return
if
host
==
"cds.cern.ch"
:
field
,
subfield
=
u
"0248"
,
"a"
elif
host
==
"inspirehep.net"
:
field
,
subfield
=
u
"909CO"
,
"o"
else
:
raise
ValueError
(
MSG_INVALID_HOST
)
if
field
not
in
record
:
record
[
field
]
=
dict
()
record
[
field
][
subfield
]
=
OAI_INVENIO
%
(
host
,
record
.
id
())
def
submitted
(
self
,
record
):
"""Standardize the submitted date as YYYY-MM or YYYY-MM-DD.
Look for alternative when it is not defined.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment