Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
limbra
limbra
Commits
76a3ae99
Commit
76a3ae99
authored
Jun 23, 2017
by
LE GAC Renaud
Browse files
Migrate RecordInst.
parent
d94fe829
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
275 additions
and
135 deletions
+275
-135
modules/invenio_tools/factory.py
modules/invenio_tools/factory.py
+44
-2
modules/invenio_tools/recordinst.py
modules/invenio_tools/recordinst.py
+63
-28
tests/basis/test_02_record_factory.py
tests/basis/test_02_record_factory.py
+132
-105
tests/basis/test_07_RecordInst.py
tests/basis/test_07_RecordInst.py
+36
-0
No files found.
modules/invenio_tools/factory.py
View file @
76a3ae99
...
...
@@ -2,6 +2,8 @@
""" invenio_tools.factory
"""
import
requests
from
base
import
(
is_conference
,
is_institute
,
is_thesis
,
...
...
@@ -20,6 +22,44 @@ from recordpubli import RecordPubli
from
recordthesis
import
RecordThesis
def
add_affiliation_keys
(
recjson
):
"""A the affiliation keys to the record describing an institute:
* The XML record contains the affiliation keys used by inspirehep.net.
They are located in the field 110__u and 110__t (future).
* The JSON record does not contains this information.
* This tool add the affiliation keys to the JSON record.
They are located:
+----------------+------------------------------------+
| field (limbra) | subfield |
+----------------+------------------------------------+
| corporate_note | identifier, futur_identifier, name |
+----------------+------------------------------------+
Args
recjson (dict): record data (MarcJSON)
"""
url
=
"http://inspirehep.net/record/%i"
%
recjson
[
u
"recid"
]
rep
=
requests
.
get
(
url
,
params
=
{
"ot"
:
"110"
,
"of"
:
"txt"
})
# decode the string: '000recid 110__ $$aXXX$$bYYY$$tZZZ\n'
txt
=
rep
.
content
.
replace
(
"
\n
"
,
""
)
li
=
txt
[
txt
.
find
(
"$"
):].
split
(
"$$"
)
di
=
{}
for
el
in
li
:
if
len
(
el
)
==
0
:
continue
di
[
el
[
0
:
1
]]
=
el
[
1
:]
recjson
[
u
"corporate_note"
]
=
{
u
"identifier"
:
di
[
"u"
],
u
"future_identifier"
:
di
[
"t"
]}
def
add_conference_data
(
recjson
):
"""Add the conference data to the recjson.
...
...
@@ -83,6 +123,7 @@ def add_conference_data(recjson):
# get the data
if
conf_id
is
not
None
:
conf_id
=
(
conf_id
if
isinstance
(
conf_id
,
int
)
else
int
(
conf_id
))
confjson
=
get_conference_data
(
host
,
conf_id
=
conf_id
)
else
:
...
...
@@ -117,6 +158,7 @@ def build_record(recjson):
upcast_record
=
RecordConf
(
recjson
)
elif
is_institute
(
recjson
):
add_affiliation_keys
(
recjson
)
upcast_record
=
RecordInst
(
recjson
)
elif
is_thesis
(
recjson
):
...
...
@@ -135,7 +177,7 @@ def get_conference_data(host, conf_id=None, key=None):
host (unicode):
possible values are ``cds.cern.ch`` or ``inspirehep.net``.
conf_id (
unicode
):
conf_id (
int
):
the conference identifier in the store.
This is the preferred way.
...
...
@@ -159,7 +201,7 @@ def get_conference_data(host, conf_id=None, key=None):
#
if
conf_id
is
not
None
:
recjson
=
cds
.
get_record
(
conf_id
)
if
str
(
recjson
[
"recid"
]
)
!=
conf_id
:
if
recjson
[
"recid"
]
!=
conf_id
:
raise
CdsException
(
MSG_INV_CONF
)
return
recjson
...
...
modules/invenio_tools/recordinst.py
View file @
76a3ae99
""" invenio_tools.recordinst
"""
<<<<<<<
HEAD
from
.base
import
is_institute
from
.exception
import
RecordException
from
.record
import
Record
=======
from
base
import
is_institute
,
REG_OAI
from
exception
import
RecordException
from
record
import
Record
>>>>>>>
Migrate
RecordInst
.
MSG_INVALID_ARG
=
"Invalid argument record"
...
...
@@ -12,70 +18,99 @@ MSG_INVALID_RECORD = "Invalid record, it is not describing an institute"
class
RecordInst
(
Record
):
"""The MARC record describing an institute.
The relation between methods and MARC fields are the following::
------------------------+-------------+
| | INSPIREHEP |
------------------------+-------------+
| institute identifier | 110 u |
| future institute id | 110 t |
| name | 110 b |
| type of record | 980 a |
------------------------+-------------+
"""The record describing an institute.
Fields are::
+-----------------------------+----------------------------------+
| field (inspirehep) | subfield |
+-----------------------------+----------------------------------+
| FIXME_OAI | id, set |
| administrative_history | |
| authority_institution | institution |
| cataloguer_info | creation_date, modification_date |
| collection | primary, secondary |
| corporate_name | name, subordinate_unit |
| creation_date | |
| files | |
| filetypes | |
| number_of_citations | |
| number_of_comments | |
| number_of_reviews | |
| persistent_identifiers_keys | |
| recid | |
| source_of_description | note |
| system_control_number | institute, value |
| url | |
| version_id | |
+-----------------------------+----------------------------------+
One field is added by limbra:
+-----------------------------+----------------------------------+
| field (limbra) | subfield |
+-----------------------------+----------------------------------+
| corporate_note | identifier, futur_identifier, |
| | name |
+-----------------------------+----------------------------------+
Args:
record (Record):
recjson (dict):
institute data (MarcJSON)
"""
def
__init__
(
self
,
rec
ord
):
def
__init__
(
self
,
rec
json
):
if
not
isinstance
(
record
,
Record
):
raise
RecordException
(
MSG_INVALID_ARG
)
host
=
REG_OAI
.
match
(
recjson
[
u
"FIXME_OAI"
][
u
"id"
]).
group
(
1
)
if
host
!=
"inspirehep.net"
:
raise
RecordException
(
MSG_INVALID_HOST
)
if
not
is_institute
(
rec
ord
):
if
not
is_institute
(
rec
json
):
raise
RecordException
(
MSG_INVALID_RECORD
)
if
record
.
host
()
!=
"inspirehep.net"
:
raise
RecordException
(
MSG_INVALID_HOST
)
Record
.
__init__
(
self
,
record
)
Record
.
__init__
(
self
,
recjson
)
def
future_identifier
(
self
):
"""Future identifier of the institute.
Returns:
str: the future inspirehep identifier or an empty string
unicode:
the future inspirehep identifier or an empty string
if the identifier is not defined.
"""
return
self
.
_get
(
"110"
,
"t
"
)
return
self
.
_get
(
u
"corporate_note"
,
u
"future_identifier
"
)
def
identifier
(
self
):
"""Identifier of the institute.
Returns:
str: the current inspirehep identifier (2015) or an empty
unicode:
the current inspirehep identifier (2015) or an empty
string if it is not defined.
"""
return
self
.
_get
(
"110"
,
"u
"
)
return
self
.
_get
(
u
"corporate_note"
,
u
"identifier
"
)
def
name
(
self
):
""" Name of the institute.
Returns:
str: the name of the institute or an empty string if
it is not defined.
unicode:
- the name of the institute.
- an empty string when it is not defined.
"""
return
self
.
_get
(
"110"
,
"a"
)
value
=
self
.
_get
(
u
"corporate_name"
,
u
"subordinate_unit"
)
if
isinstance
(
value
,
list
)
and
len
(
value
)
==
1
:
return
value
[
0
]
return
u
""
def
rex
(
self
):
""" Regular expression to search authors affiliate to the institute.
Returns:
str: the regular expression to search author affiliate
unicode:
the regular expression to search author affiliate
to the institute in the store ``cds.cern.ch`` or
``inspirehep.net``.
...
...
tests/basis/test_02_record_factory.py
View file @
76a3ae99
...
...
@@ -10,7 +10,8 @@ from invenio_tools.base import (is_conference,
is_institute
,
is_thesis
)
from
invenio_tools.factory
import
(
add_conference_data
,
from
invenio_tools.factory
import
(
add_affiliation_keys
,
add_conference_data
,
build_record
,
get_conference_data
)
...
...
@@ -24,104 +25,7 @@ from invenio_tools.recordthesis import RecordThesis
# ............................................................................
#
# Section to test introspection and instantiation
#
def
test_conference_cds
():
store
=
InvenioStore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1411352
)
assert
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordConf
)
def
test_conference_inspirehep
():
store
=
InvenioStore
(
"inspirehep.net"
)
recjson
=
store
.
get_record
(
1276938
)
assert
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordConf
)
def
test_institute
():
"""CPPM"""
store
=
InvenioStore
(
"inspirehep.net"
)
recjson
=
store
.
get_record
(
902989
)
assert
not
is_conference
(
recjson
)
assert
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordInst
)
def
test_publi_cds
():
"""Precision luminosity measurements at LHCb"""
store
=
InvenioStore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1951625
)
assert
not
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordPubli
)
def
test_publi_inspirehep
():
"""Precision luminosity measurements at LHCb"""
store
=
InvenioStore
(
"inspirehep.net"
)
recjson
=
store
.
get_record
(
1319638
)
assert
not
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordPubli
)
def
test_talk_cds
():
store
=
InvenioStore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
2239092
)
assert
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordConf
)
def
test_thesis_cds
():
store
=
InvenioStore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1632177
)
assert
not
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordThesis
)
# ............................................................................
#
# Section to test tool to get and add conference data
# Conference proceeding and talk
#
def
test_get_conference_data
():
""" check the different approach to get the conference data
...
...
@@ -196,10 +100,13 @@ def test_add_conference_data():
add_conference_data
(
recjson
)
assert
"meeting_name"
in
recjson
assert
"meeting"
in
recjson
assert
"meeting
_note
"
in
recjson
assert
recjson
[
"meeting_name"
][
0
][
"coference_code"
]
==
"rome20101206"
assert
recjson
[
"meeting"
][
"url"
]
==
"http://www.roma1.infn.it/discrete10"
assert
recjson
[
"meeting"
][
"recid"
]
==
1181092
assert
recjson
[
"meeting_note"
][
"url"
]
==
\
"http://www.roma1.infn.it/discrete10"
assert
recjson
[
"meeting_note"
][
"recid"
]
==
1181092
# ........................................................................
#
...
...
@@ -210,7 +117,127 @@ def test_add_conference_data():
add_conference_data
(
recjson
)
assert
"meeting_name"
in
recjson
assert
"meeting"
in
recjson
assert
"meeting
_note
"
in
recjson
assert
recjson
[
"meeting_name"
][
0
][
"coference_code"
]
==
"C10-12-06"
assert
recjson
[
"meeting"
][
"url"
]
==
"http://www.roma1.infn.it/discrete10"
assert
recjson
[
"meeting"
][
"recid"
]
==
980401
assert
recjson
[
"meeting_note"
][
"url"
]
==
\
"http://www.roma1.infn.it/discrete10"
assert
recjson
[
"meeting_note"
][
"recid"
]
==
980401
def
test_conference_cds
():
store
=
InvenioStore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1411352
)
assert
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordConf
)
def
test_conference_inspirehep
():
store
=
InvenioStore
(
"inspirehep.net"
)
recjson
=
store
.
get_record
(
1276938
)
assert
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordConf
)
def
test_talk_cds
():
store
=
InvenioStore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
2239092
)
assert
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordConf
)
# ............................................................................
#
# Institute
#
def
test_add_affiliation_keys
():
"""CPPM"""
store
=
InvenioStore
(
"inspirehep.net"
)
recjson
=
store
.
get_record
(
902989
)
add_affiliation_keys
(
recjson
)
assert
u
"corporate_note"
in
recjson
assert
recjson
[
u
"corporate_note"
][
u
"identifier"
]
==
"Marseille, CPPM"
assert
recjson
[
u
"corporate_note"
][
u
"futur_identifier"
]
==
"CPPM, Marseille"
def
test_institute
():
"""CPPM"""
store
=
InvenioStore
(
"inspirehep.net"
)
recjson
=
store
.
get_record
(
902989
)
assert
not
is_conference
(
recjson
)
assert
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordInst
)
# ............................................................................
#
# Article, ...
#
def
test_publi_cds
():
"""Precision luminosity measurements at LHCb"""
store
=
InvenioStore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1951625
)
assert
not
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordPubli
)
def
test_publi_inspirehep
():
"""Precision luminosity measurements at LHCb"""
store
=
InvenioStore
(
"inspirehep.net"
)
recjson
=
store
.
get_record
(
1319638
)
assert
not
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
not
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordPubli
)
# ............................................................................
#
# Thesis
#
def
test_thesis_cds
():
store
=
InvenioStore
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1632177
)
assert
not
is_conference
(
recjson
)
assert
not
is_institute
(
recjson
)
assert
is_thesis
(
recjson
)
record
=
build_record
(
recjson
)
assert
isinstance
(
record
,
RecordThesis
)
tests/basis/test_07_RecordInst.py
0 → 100644
View file @
76a3ae99
# -*- coding: utf-8 -*-
"""test_06_RecordThesis
Test specific methods of the RecordInst class for CPPM
"""
import
pytest
from
invenio_tools
import
load_record
@
pytest
.
fixture
(
scope
=
"module"
)
def
record
():
return
load_record
(
"inspirehep.net"
,
902989
)
def
test_future_identifer
(
record
):
assert
record
.
future_identifier
()
==
u
'CPPM, Marseille'
def
test_id
(
record
):
assert
record
.
id
()
==
902989
def
test_identifier
(
record
):
assert
record
.
identifier
()
==
u
'Marseille, CPPM'
def
test_name
(
record
):
assert
record
.
name
()
==
\
u
'Centre de Physique des Particules de Marseille (CPPM)'
def
test_rex
(
record
):
assert
record
.
rex
()
==
\
r
"Marseille, CPPM|CPPM, Marseille|"
\
"Centre de Physique des Particules de Marseille (CPPM)"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment