Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
76a3ae99
Commit
76a3ae99
authored
Jun 23, 2017
by
LE GAC Renaud
Browse files
Migrate RecordInst.
parent
d94fe829
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
275 additions
and
135 deletions
+275
-135
modules/invenio_tools/factory.py
modules/invenio_tools/factory.py
+44
-2
modules/invenio_tools/recordinst.py
modules/invenio_tools/recordinst.py
+63
-28
tests/basis/test_02_record_factory.py
tests/basis/test_02_record_factory.py
+132
-105
tests/basis/test_07_RecordInst.py
tests/basis/test_07_RecordInst.py
+36
-0
No files found.
modules/invenio_tools/factory.py
View file @
76a3ae99
...
...
@@ -2,6 +2,8 @@
""" invenio_tools.factory
"""
import
requests
from
base
import
(
is_conference
,
is_institute
,
is_thesis
,
...
...
@@ -20,6 +22,44 @@ from recordpubli import RecordPubli
from
recordthesis
import
RecordThesis
def
add_affiliation_keys
(
recjson
):
"""A the affiliation keys to the record describing an institute:
* The XML record contains the affiliation keys used by inspirehep.net.
They are located in the field 110__u and 110__t (future).
* The JSON record does not contains this information.
* This tool add the affiliation keys to the JSON record.
They are located:
+----------------+------------------------------------+
| field (limbra) | subfield |
+----------------+------------------------------------+
| corporate_note | identifier, futur_identifier, name |
+----------------+------------------------------------+
Args
recjson (dict): record data (MarcJSON)
"""
url
=
"http://inspirehep.net/record/%i"
%
recjson
[
u
"recid"
]
rep
=
requests
.
get
(
url
,
params
=
{
"ot"
:
"110"
,
"of"
:
"txt"
})
# decode the string: '000recid 110__ $$aXXX$$bYYY$$tZZZ\n'
txt
=
rep
.
content
.
replace
(
"
\n
"
,
""
)
li
=
txt
[
txt
.
find
(
"$"
):].
split
(
"$$"
)
di
=
{}
for
el
in
li
:
if
len
(
el
)
==
0
:
continue
di
[
el
[
0
:
1
]]
=
el
[
1
:]
recjson
[
u
"corporate_note"
]
=
{
u
"identifier"
:
di
[
"u"
],
u
"future_identifier"
:
di
[
"t"
]}
def
add_conference_data
(
recjson
):
"""Add the conference data to the recjson.
...
...
@@ -83,6 +123,7 @@ def add_conference_data(recjson):
# get the data
if
conf_id
is
not
None
:
conf_id
=
(
conf_id
if
isinstance
(
conf_id
,
int
)
else
int
(
conf_id
))
confjson
=
get_conference_data
(
host
,
conf_id
=
conf_id
)
else
:
...
...
@@ -117,6 +158,7 @@ def build_record(recjson):
upcast_record
=
RecordConf
(
recjson
)
elif
is_institute
(
recjson
):
add_affiliation_keys
(
recjson
)
upcast_record
=
RecordInst
(
recjson
)
elif
is_thesis
(
recjson
):
...
...
@@ -135,7 +177,7 @@ def get_conference_data(host, conf_id=None, key=None):
host (unicode):
possible values are ``cds.cern.ch`` or ``inspirehep.net``.
conf_id (
unicode
):
conf_id (
int
):
the conference identifier in the store.
This is the preferred way.
...
...
@@ -159,7 +201,7 @@ def get_conference_data(host, conf_id=None, key=None):
#
if
conf_id
is
not
None
:
recjson
=
cds
.
get_record
(
conf_id
)
if
str
(
recjson
[
"recid"
]
)
!=
conf_id
:
if
recjson
[
"recid"
]
!=
conf_id
:
raise
CdsException
(
MSG_INV_CONF
)
return
recjson
...
...
modules/invenio_tools/recordinst.py
View file @
76a3ae99
""" invenio_tools.recordinst
"""
<<<<<<<
HEAD
from
.base
import
is_institute
from
.exception
import
RecordException
from
.record
import
Record
=======
from
base
import
is_institute
,
REG_OAI
from
exception
import
RecordException
from
record
import
Record
>>>>>>>
Migrate
RecordInst
.
MSG_INVALID_ARG
=
"Invalid argument record"
...
...
@@ -12,70 +18,99 @@ MSG_INVALID_RECORD = "Invalid record, it is not describing an institute"
class
RecordInst
(
Record
):
"""The MARC record describing an institute.
The relation between methods and MARC fields are the following::
------------------------+-------------+
| | INSPIREHEP |
------------------------+-------------+
| institute identifier | 110 u |
| future institute id | 110 t |
| name | 110 b |
| type of record | 980 a |
------------------------+-------------+
"""The record describing an institute.
Fields are::
+-----------------------------+----------------------------------+
| field (inspirehep) | subfield |
+-----------------------------+----------------------------------+
| FIXME_OAI | id, set |
| administrative_history | |
| authority_institution | institution |
| cataloguer_info | creation_date, modification_date |
| collection | primary, secondary |
| corporate_name | name, subordinate_unit |
| creation_date | |
| files | |
| filetypes | |
| number_of_citations | |
| number_of_comments | |
| number_of_reviews | |
| persistent_identifiers_keys | |
| recid | |
| source_of_description | note |
| system_control_number | institute, value |
| url | |
| version_id | |
+-----------------------------+----------------------------------+
One field is added by limbra:
+-----------------------------+----------------------------------+
| field (limbra) | subfield |
+-----------------------------+----------------------------------+
| corporate_note | identifier, futur_identifier, |
| | name |
+-----------------------------+----------------------------------+
Args:
record (Record):
recjson (dict):
institute data (MarcJSON)
"""
def
__init__
(
self
,
rec
ord
):
def
__init__
(
self
,
rec
json
):
if
not
isinstance
(
record
,
Record
):
raise
RecordException
(
MSG_INVALID_ARG
)
host
=
REG_OAI
.
match
(
recjson
[
u
"FIXME_OAI"
][
u
"id"
]).
group
(
1
)
if
host
!=
"inspirehep.net"
:
raise
RecordException
(
MSG_INVALID_HOST
)
if
not
is_institute
(
rec
ord
):
if
not
is_institute
(
rec
json
):
raise
RecordException
(
MSG_INVALID_RECORD
)
if
record
.
host
()
!=
"inspirehep.net"
:
raise
RecordException
(
MSG_INVALID_HOST
)
Record
.
__init__
(
self
,
record
)
Record
.
__init__
(
self
,
recjson
)
def
future_identifier
(
self
):
"""Future identifier of the institute.
Returns:
str: the future inspirehep identifier or an empty string
unicode:
the future inspirehep identifier or an empty string
if the identifier is not defined.
"""
return
self
.
_get
(
"110"
,
"t
"
)
return
self
.
_get
(
u
"corporate_note"
,
u
"future_identifier
"
)
def
identifier
(
self
):
"""Identifier of the institute.
Returns:
str: the current inspirehep identifier (2015) or an empty
unicode:
the current inspirehep identifier (2015) or an empty
string if it is not defined.
"""
return
self
.
_get
(
"110"
,
"u
"
)
return
self
.
_get
(
u
"corporate_note"
,
u
"identifier
"
)
def
name
(
self
):
""" Name of the institute.
Returns:
str: the name of the institute or an empty string if
it is not defined.
unicode:
- the name of the institute.
- an empty string when it is not defined.
"""
return
self
.
_get
(
"110"
,
"a"
)
value
=
self
.
_get
(
u
"corporate_name"
,
u
"subordinate_unit"
)
if
isinstance
(
value
,
list
)
and
len
(
value
)
==
1
:
return
value
[
0
]
return
u
""
def
rex
(
self
):
""" Regular expression to search authors affiliate to the institute.
Returns:
str: the regular expression to search author affiliate
unicode:
the regular expression to search author affiliate
to the institute in the store ``cds.cern.ch`` or
``inspirehep.net``.
...
...
tests/basis/test_02_record_factory.py
View file @
76a3ae99
...
...
@@ -10,7 +10,8 @@ from invenio_tools.base import (is_conference,
is_institute
,
is_thesis
)
from
invenio_tools.factory
import
(
add_conference_data
,
from
invenio_tools.factory
import
(
add_affiliation_keys
,
add_conference_data
,
build_record
,
get_conference_data
)
...
...
@@ -24,104 +25,7 @@ from invenio_tools.recordthesis import RecordThesis
# ............................................................................
#
# Section to test introspection and instantiation
#
def
test_conference_cds
():
store
=
InvenioStore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1411352
)
assert
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordConf
)
def
test_conference_inspirehep
():
store
=
InvenioStore
(
"inspirehep.net"
)
recjson
=
store
.
get_record
(
1276938
)
assert
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordConf
)
def
test_institute
():
"""CPPM"""
store
=
InvenioStore
(
"inspirehep.net"
)
recjson
=
store
.
get_record
(
902989
)
assert
not
is_conference
(
recjson
)
assert
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordInst
)
def
test_publi_cds
():
"""Precision luminosity measurements at LHCb"""
store
=
InvenioStore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1951625
)
assert
not
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordPubli
)
def
test_publi_inspirehep
():
"""Precision luminosity measurements at LHCb"""
store
=
InvenioStore
(
"inspirehep.net"
)
recjson
=
store
.
get_record
(
1319638
)
assert
not
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordPubli
)
def
test_talk_cds
():
store
=
InvenioStore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
2239092
)
assert
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordConf
)
def
test_thesis_cds
():
store
=
InvenioStore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1632177
)
assert
not
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordThesis
)
# ............................................................................
#
# Section to test tool to get and add conference data
# Conference proceeding and talk
#
def
test_get_conference_data
():
""" check the different approach to get the conference data
...
...
@@ -196,10 +100,13 @@ def test_add_conference_data():
add_conference_data
(
recjson
)
assert
"meeting_name"
in
recjson
assert
"meeting"
in
recjson
assert
"meeting
_note
"
in
recjson
assert
recjson
[
"meeting_name"
][
0
][
"coference_code"
]
==
"rome20101206"
assert
recjson
[
"meeting"
][
"url"
]
==
"http://www.roma1.infn.it/discrete10"
assert
recjson
[
"meeting"
][
"recid"
]
==
1181092
assert
recjson
[
"meeting_note"
][
"url"
]
==
\
"http://www.roma1.infn.it/discrete10"
assert
recjson
[
"meeting_note"
][
"recid"
]
==
1181092
# ........................................................................
#
...
...
@@ -210,7 +117,127 @@ def test_add_conference_data():
add_conference_data
(
recjson
)
assert
"meeting_name"
in
recjson
assert
"meeting"
in
recjson
assert
"meeting
_note
"
in
recjson
assert
recjson
[
"meeting_name"
][
0
][
"coference_code"
]
==
"C10-12-06"
assert
recjson
[
"meeting"
][
"url"
]
==
"http://www.roma1.infn.it/discrete10"
assert
recjson
[
"meeting"
][
"recid"
]
==
980401
assert
recjson
[
"meeting_note"
][
"url"
]
==
\
"http://www.roma1.infn.it/discrete10"
assert
recjson
[
"meeting_note"
][
"recid"
]
==
980401
def
test_conference_cds
():
store
=
InvenioStore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1411352
)
assert
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordConf
)
def
test_conference_inspirehep
():
store
=
InvenioStore
(
"inspirehep.net"
)
recjson
=
store
.
get_record
(
1276938
)
assert
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordConf
)
def
test_talk_cds
():
store
=
InvenioStore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
2239092
)
assert
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordConf
)
# ............................................................................
#
# Institute
#
def
test_add_affiliation_keys
():
"""CPPM"""
store
=
InvenioStore
(
"inspirehep.net"
)
recjson
=
store
.
get_record
(
902989
)
add_affiliation_keys
(
recjson
)
assert
u
"corporate_note"
in
recjson
assert
recjson
[
u
"corporate_note"
][
u
"identifier"
]
==
"Marseille, CPPM"
assert
recjson
[
u
"corporate_note"
][
u
"futur_identifier"
]
==
"CPPM, Marseille"
def
test_institute
():
"""CPPM"""
store
=
InvenioStore
(
"inspirehep.net"
)
recjson
=
store
.
get_record
(
902989
)
assert
not
is_conference
(
recjson
)
assert
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordInst
)
# ............................................................................
#
# Article, ...
#
def
test_publi_cds
():
"""Precision luminosity measurements at LHCb"""
store
=
InvenioStore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1951625
)
assert
not
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordPubli
)
def
test_publi_inspirehep
():
"""Precision luminosity measurements at LHCb"""
store
=
InvenioStore
(
"inspirehep.net"
)
recjson
=
store
.
get_record
(
1319638
)
assert
not
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordPubli
)
# ............................................................................
#
# Thesis
#
def
test_thesis_cds
():
store
=
InvenioStore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1632177
)
assert
not
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordThesis
)
tests/basis/test_07_RecordInst.py
0 → 100644
View file @
76a3ae99
# -*- coding: utf-8 -*-
"""test_06_RecordThesis
Test specific methods of the RecordInst class for CPPM
"""
import
pytest
from
invenio_tools
import
load_record
@
pytest
.
fixture
(
scope
=
"module"
)
def
record
():
return
load_record
(
"inspirehep.net"
,
902989
)
def
test_future_identifer
(
record
):
assert
record
.
future_identifier
()
==
u
'CPPM, Marseille'
def
test_id
(
record
):
assert
record
.
id
()
==
902989
def
test_identifier
(
record
):
assert
record
.
identifier
()
==
u
'Marseille, CPPM'
def
test_name
(
record
):
assert
record
.
name
()
==
\
u
'Centre de Physique des Particules de Marseille (CPPM)'
def
test_rex
(
record
):
assert
record
.
rex
()
==
\
r
"Marseille, CPPM|CPPM, Marseille|"
\
"Centre de Physique des Particules de Marseille (CPPM)"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment