Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
limbra
limbra
Commits
b81c3c96
Commit
b81c3c96
authored
Sep 17, 2016
by
LE GAC Renaud
Browse files
Add the authors modules containing a collection of iterators.
parent
b5d7b17b
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
260 additions
and
0 deletions
+260
-0
modules/invenio_tools/__init__.py
modules/invenio_tools/__init__.py
+6
-0
modules/invenio_tools/authors.py
modules/invenio_tools/authors.py
+184
-0
tests/invenio_tools/author_iterators/test_acl_cds1951625.py
tests/invenio_tools/author_iterators/test_acl_cds1951625.py
+70
-0
No files found.
modules/invenio_tools/__init__.py
View file @
b81c3c96
...
...
@@ -26,6 +26,12 @@ from exception import (CdsException,
RecordException
,
XmlException
)
from
authors
import
(
iter_author_affiliations
,
iter_author_affiliation_keys
,
iter_author_fields
,
iter_author_items
,
iter_author_names
)
from
inveniostore
import
InvenioStore
from
iterrecord
import
IterRecord
,
REG_INT
from
marc12
import
Marc12
...
...
modules/invenio_tools/authors.py
0 → 100644
View file @
b81c3c96
# -*- coding: utf-8 -*-
""" invenio_tools.authors
"""
from
base
import
REG_AUTHOR
from
itertools
import
chain
,
imap
,
izip_longest
to_list
=
lambda
x
:
(
x
if
isinstance
(
x
,
list
)
else
[
x
])
def
affiliation_keys
(
field
):
"""Extract affiliation key(s) from the author field.
Note:
The affiliation keys are obtained by concatenating the "u" and "v"
keys. Keep in mind that an author can have several affiliations.
Args:
field (dict): author field
Returns:
itertools.imap
"""
if
"u"
not
in
field
:
return
()
key_u
,
key_v
=
to_list
(
field
[
"u"
]),
[]
if
"v"
in
field
:
key_v
=
to_list
(
field
[
"v"
])
# tuple with u and v key
iter_uv
=
izip_longest
(
key_u
,
key_v
,
fillvalue
=
""
)
# concatenate u and v key
return
imap
(
lambda
x
:
u
"%s%s"
%
x
,
iter_uv
)
def
author_item
(
field
):
"""Extract the item from the author field.
The author item is a tuple containing the author name and an iterator
on the author affiliation keys.
Args:
field (dict): author field
Returns:
tuple: (author name, affiliation keys iterator)
"""
if
"a"
not
in
field
:
return
(
u
""
,
u
""
)
return
(
field
[
"a"
],
affiliation_keys
(
field
))
def
author_name
(
field
):
"""Extract the name from the author field.
Note:
In most of the case the author is a string
but it can be a list, e.g inspirehep.net/138663:
[u'Zuniga, J.', u'(the A.N.T.ARES. Collaboration)']
Args:
field (dict): author field
Returns:
str:
"""
name
=
(
field
[
"a"
]
if
"a"
in
field
else
u
""
)
if
isinstance
(
name
,
list
):
for
elt
in
name
:
if
REG_AUTHOR
.
match
(
elt
):
return
elt
return
name
def
iter_author_fields
(
record
):
"""Iterator over the author field.
The author are spread over the 100 and 700 field. The function takes into
cases where the first author is defined in 100 but not in 700, first author
is defined in 100 and in 700 or no author in 100.
Args:
record (Record): MARC record for the publication
Returns:
iterator: either a itertools.chain or a list. The list is empty
when the author field does not exist.
"""
if
u
"100"
in
record
and
u
"700"
in
record
:
u100
,
u700
=
record
[
u
"100"
],
record
[
u
"700"
]
if
isinstance
(
u100
,
dict
)
and
isinstance
(
u700
,
list
):
if
"a"
in
u100
and
"a"
in
u700
[
0
]
and
u100
[
"a"
]
==
u700
[
0
][
"a"
]:
return
to_list
(
u700
)
elif
isinstance
(
u100
,
dict
)
and
isinstance
(
u700
,
dict
):
if
"a"
in
u100
and
"a"
in
u700
and
u100
[
"a"
]
==
u700
[
"a"
]:
return
to_list
(
u100
)
return
chain
(
to_list
(
u100
),
to_list
(
u700
))
elif
u
"100"
in
record
:
authors
=
to_list
(
record
[
u
"100"
])
elif
u
"700"
in
record
:
authors
=
record
[
u
"700"
]
else
:
return
()
def
iter_author_affiliations
(
record
):
"""Iterator over the author affiliations.
Note:
The affiliation is defined by the "u" key.
Keep in mind that an author can have several affiliations.
Args:
record (Record): MARC record for the publication
Returns:
itertools.imap
"""
ftpl
=
lambda
x
:
(
tuple
(
x
)
if
isinstance
(
x
,
(
list
,
tuple
))
else
(
x
,))
func
=
lambda
x
:
(
ftpl
(
x
[
"u"
])
if
"u"
in
x
else
())
return
imap
(
func
,
iter_author_fields
(
record
))
def
iter_author_affiliation_keys
(
record
):
"""Iterator over the author affiliation keys.
Note:
The affiliation keys are obtained by concatenating the "u" and "v"
keys. Keep in mind that an author can have several affiliations.
Args:
record (Record): MARC record for the publication
Returns:
itertools.imap
"""
return
imap
(
affiliation_keys
,
iter_author_fields
(
record
))
def
iter_author_items
(
record
):
"""Iterator over the author items.
The author item is a tuple containing the author name and an iterator
on the author affiliation keys.
Args:
record (Record): MARC record for the publication
Returns:
itertools.imap
"""
return
imap
(
author_item
,
iter_author_fields
(
record
))
def
iter_author_names
(
record
):
"""Iterator over the author names.
Args:
record (Record): MARC record for the publication
Returns:
itertools.imap
"""
return
imap
(
author_name
,
iter_author_fields
(
record
))
tests/invenio_tools/author_iterators/test_acl_cds1951625.py
0 → 100644
View file @
b81c3c96
# -*- coding: utf-8 -*-
"""ARTICLE
http://cds.cern.ch/record/1951625.
Precision luminosity measurements at LHCb,
J. Instrum. 9 (2014) P12005
arXiv:1410.0149
704 authors
Note:
* The first author is not in the author list
* LHCb collaboration
* The publication year is a list (duplicate 773y)
* The submitted date is not formatted: 01 Oct 2014
"""
import
itertools
import
pytest
from
invenio_tools
import
(
iter_author_affiliation_keys
,
iter_author_fields
,
iter_author_items
,
iter_author_names
,
load_record
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
record
():
return
load_record
(
'cds.cern.ch'
,
1951625
)
def
test_iter_author_fields
(
record
):
assert
type
(
iter_author_fields
(
record
))
==
itertools
.
chain
def
test_iter_names
(
record
):
assert
type
(
iter_author_names
(
record
))
==
itertools
.
imap
def
test_iter_affiliation_keys
(
record
):
assert
type
(
iter_author_affiliation_keys
(
record
))
==
itertools
.
imap
def
test_names
(
record
):
names
=
list
(
iter_author_names
(
record
))
assert
len
(
names
)
==
704
assert
names
[
0
]
==
"Aaij, Roel"
assert
names
[
1
]
==
"Adeva, Bernardo"
assert
names
[
344
]
==
"Le Gac, Renaud"
assert
names
[
-
1
]
==
"Zvyagin, Alexander"
def
test_affiliations
(
record
):
affiliations
=
[
tuple
(
el
)
for
el
in
iter_author_affiliation_keys
(
record
)]
assert
len
(
affiliations
)
==
704
assert
affiliations
[
0
]
==
(
"NIKHEF, Amsterdam"
,)
assert
affiliations
[
-
8
]
==
(
u
"Heidelberg, Max Planck Inst."
,
u
"Lebedev Inst."
)
def
test_items
(
record
):
items
=
[(
el
[
0
],
tuple
(
el
[
1
]))
for
el
in
iter_author_items
(
record
)]
import
pprint
pprint
.
pprint
(
items
)
assert
len
(
items
)
==
704
assert
items
[
0
]
==
(
u
'Aaij, Roel'
,
(
u
'NIKHEF, Amsterdam'
,))
assert
items
[
-
8
]
==
(
u
'Zavertyaev, Mikhail'
,
(
u
'Heidelberg, Max Planck Inst.'
,
u
'Lebedev Inst.'
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment