Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
f65f458f
Commit
f65f458f
authored
Sep 20, 2015
by
LE GAC Renaud
Browse files
Improved method Record.oai.
parent
0d17cb9c
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
47 additions
and
57 deletions
+47
-57
modules/invenio_tools/checkandfix.py
modules/invenio_tools/checkandfix.py
+9
-33
modules/invenio_tools/iterrecord.py
modules/invenio_tools/iterrecord.py
+1
-1
modules/invenio_tools/record.py
modules/invenio_tools/record.py
+34
-21
tests/harvester/Record/test_non_conformities.py
tests/harvester/Record/test_non_conformities.py
+3
-2
No files found.
modules/invenio_tools/checkandfix.py
View file @
f65f458f
...
...
@@ -525,57 +525,33 @@ class CheckAndFix(object):
@raise CheckException:
"""
# the location of the OAI information depends on the store
# CDS: (248, a) or INSPIREHEP: (909CO, o)
if
"0248"
in
record
:
field
,
subfield
=
"0248"
,
"a"
elif
"909CO"
in
record
:
field
,
subfield
=
"909CO"
,
"o"
else
:
# check that the OAI is defined
value
=
record
.
oai
()
if
not
value
:
raise
CheckException
(
MSG_NO_OAI
)
myid
=
record
.
id
()
# Clean OAI information.
# in some case OAI is a list,e.g when two records were entered
# for the same entry but one deleted.
# Select the OAI corresponding to the current ID.
if
isinstance
(
record
[
field
],
list
):
val
=
''
for
di
in
record
[
field
]:
if
di
[
subfield
].
endswith
(
myid
):
val
=
di
break
if
val
:
record
[
field
]
=
val
else
:
raise
CheckException
(
MSG_NO_OAI
)
# check that the OAI is well formed
m
=
REG_OAI
.
match
(
record
[
field
][
subfield
]
)
if
not
m
:
m
atch
=
REG_OAI
.
match
(
value
)
if
not
m
atch
:
raise
CheckException
(
MSG_WELL_FORMED_OAI
)
# The id in the OAI field might be different from the record id.
# In INVENIO there is a mechanism to redirect to the correct one
#
# The fix depend on the content of the database
if
m
.
group
(
2
)
!=
myid
:
if
m
atch
.
group
(
2
)
!=
myid
:
db
=
current
.
globalenv
[
'db'
]
# The record OAI is already used in the database. Do nothing
oai_url
=
OAI_URL
%
(
m
.
group
(
1
),
m
.
group
(
2
))
oai_url
=
OAI_URL
%
(
m
atch
.
group
(
1
),
m
atch
.
group
(
2
))
if
get_id
(
db
.
publications
,
origin
=
oai_url
):
return
# The OAI based on the record id is already used in the database.
# Modify the record OAI
oai_url
=
OAI_URL
%
(
m
.
group
(
1
),
myid
)
oai_url
=
OAI_URL
%
(
m
atch
.
group
(
1
),
myid
)
if
get_id
(
db
.
publications
,
origin
=
oai_url
):
record
[
field
][
subfield
]
=
OAI_INVENIO
%
(
m
.
group
(
1
),
myid
)
record
[
field
][
subfield
]
=
OAI_INVENIO
%
(
m
atch
.
group
(
1
),
myid
)
def
paper_reference
(
self
,
record
):
"""Check that editor, page, volume and paper year are defined
...
...
modules/invenio_tools/iterrecord.py
View file @
f65f458f
...
...
@@ -244,7 +244,7 @@ class IterRecord(object):
return
di
def
_is_not_xml
(
self
,
xml
):
"""C{True} when the C{xml} sting is well formed.
"""C{True} when the C{xml} st
r
ing is
not
well formed.
@type xml: unicode
@param xml:
...
...
modules/invenio_tools/record.py
View file @
f65f458f
...
...
@@ -6,6 +6,7 @@ import pprint
from
base
import
OAI_URL
,
REG_OAI
from
exception
import
RecordException
class
Record
(
dict
):
...
...
@@ -106,22 +107,10 @@ class Record(dict):
self
.
__host
=
u
''
return
self
.
__host
if
isinstance
(
val
,
unicode
):
match
=
REG_OAI
.
match
(
val
)
if
match
:
self
.
__host
=
match
.
group
(
1
)
return
self
.
__host
# NOTE: in few case we can have a list
# see cds.cern.ch/record/1513204
# [u'oai:cds.cern.ch:1513204', u'oai:cds.cern.ch:1512766']
if
isinstance
(
val
,
list
):
for
elt
in
val
:
if
elt
.
endswith
(
self
.
id
()):
match
=
REG_OAI
.
match
(
elt
)
if
match
:
self
.
__host
=
match
.
group
(
1
)
return
self
.
__host
match
=
REG_OAI
.
match
(
val
)
if
match
:
self
.
__host
=
match
.
group
(
1
)
return
self
.
__host
return
u
''
...
...
@@ -138,16 +127,35 @@ class Record(dict):
"""The Open Archive Initiative identifier
@rtype: unicode
@return: the pattern of the string is "oai:host:record_id"
@return: the pattern of the string is "oai:host:record_id".
It is en empty string when not defined
"""
# the location of the oai information depends on the store
# cds: (248, a), inspirehep: (909C0, o)
if
u
"0248"
in
self
:
return
self
.
_get
(
u
"0248"
,
"a"
)
field
,
subfield
=
u
"0248"
,
"a"
elif
u
"909CO"
in
self
:
return
self
.
_get
(
u
"909CO"
,
"o"
)
field
,
subfield
=
u
"909CO"
,
"o"
else
:
return
u
""
value
=
self
.
_get
(
field
,
subfield
)
# in some case OAI is a list, e.g when two records were entered
# for the same entry but one deleted.
# Select the OAI corresponding to the current ID.
if
isinstance
(
value
,
list
):
myid
=
self
.
id
()
for
el
in
value
:
if
el
.
endswith
(
myid
):
return
el
return
u
""
return
value
def
oai_url
(
self
):
"""The Open Archive Initiative URL
...
...
@@ -157,10 +165,15 @@ class Record(dict):
"http://inspirehep.net/record/123456" or an empty string
when it is not defined
"""
val
=
self
.
oai
()
m
=
REG_OAI
.
match
(
val
)
return
OAI_URL
%
(
m
.
group
(
1
),
m
.
group
(
2
))
match
=
REG_OAI
.
match
(
val
)
if
match
:
return
OAI_URL
%
(
match
.
group
(
1
),
match
.
group
(
2
))
return
u
""
def
sysno
(
self
):
return
self
.
_get
(
u
"970"
,
"a"
)
tests/harvester/Record/test_non_conformities.py
View file @
f65f458f
...
...
@@ -8,9 +8,10 @@ Protection are add in the record method to correct them
from
invenio_tools
import
load_record
def
test_protection_host
():
def
test_protection_oai
():
"""['oai:cds.cern.ch:1513204', 'oai:cds.cern.ch:1512766']"""
record
=
load_record
(
'cds.cern.ch'
,
1513204
)
assert
record
.
oai
()
==
[
'oai:cds.cern.ch:1513204'
,
'oai:cds.cern.ch:1512766'
]
assert
record
.
oai
()
==
'oai:cds.cern.ch:1513204'
assert
record
.
host
()
==
"cds.cern.ch"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment