Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit b81c3c96 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Add the authors modules containing a collection of iterators.

parent b5d7b17b
......@@ -26,6 +26,12 @@ from exception import (CdsException,
RecordException,
XmlException)
from authors import (iter_author_affiliations,
iter_author_affiliation_keys,
iter_author_fields,
iter_author_items,
iter_author_names)
from inveniostore import InvenioStore
from iterrecord import IterRecord, REG_INT
from marc12 import Marc12
......
# -*- coding: utf-8 -*-
""" invenio_tools.authors
"""
from base import REG_AUTHOR
from itertools import chain, imap, izip_longest
to_list = lambda x: (x if isinstance(x, list) else [x])
def affiliation_keys(field):
"""Extract affiliation key(s) from the author field.
Note:
The affiliation keys are obtained by concatenating the "u" and "v"
keys. Keep in mind that an author can have several affiliations.
Args:
field (dict): author field
Returns:
itertools.imap
"""
if "u" not in field:
return ()
key_u, key_v = to_list(field["u"]), []
if "v" in field:
key_v = to_list(field["v"])
# tuple with u and v key
iter_uv = izip_longest(key_u, key_v, fillvalue="")
# concatenate u and v key
return imap(lambda x: u"%s%s" % x, iter_uv)
def author_item(field):
"""Extract the item from the author field.
The author item is a tuple containing the author name and an iterator
on the author affiliation keys.
Args:
field (dict): author field
Returns:
tuple: (author name, affiliation keys iterator)
"""
if "a" not in field:
return (u"", u"")
return (field["a"], affiliation_keys(field))
def author_name(field):
"""Extract the name from the author field.
Note:
In most of the case the author is a string
but it can be a list, e.g inspirehep.net/138663:
[u'Zuniga, J.', u'(the A.N.T.ARES. Collaboration)']
Args:
field (dict): author field
Returns:
str:
"""
name = (field["a"] if "a" in field else u"")
if isinstance(name, list):
for elt in name:
if REG_AUTHOR.match(elt):
return elt
return name
def iter_author_fields(record):
"""Iterator over the author field.
The author are spread over the 100 and 700 field. The function takes into
cases where the first author is defined in 100 but not in 700, first author
is defined in 100 and in 700 or no author in 100.
Args:
record (Record): MARC record for the publication
Returns:
iterator: either a itertools.chain or a list. The list is empty
when the author field does not exist.
"""
if u"100" in record and u"700" in record:
u100, u700 = record[u"100"], record[u"700"]
if isinstance(u100, dict) and isinstance(u700, list):
if "a" in u100 and "a" in u700[0] and u100["a"] == u700[0]["a"]:
return to_list(u700)
elif isinstance(u100, dict) and isinstance(u700, dict):
if "a" in u100 and "a" in u700 and u100["a"] == u700["a"]:
return to_list(u100)
return chain(to_list(u100), to_list(u700))
elif u"100" in record:
authors = to_list(record[u"100"])
elif u"700" in record:
authors = record[u"700"]
else:
return ()
def iter_author_affiliations(record):
"""Iterator over the author affiliations.
Note:
The affiliation is defined by the "u" key.
Keep in mind that an author can have several affiliations.
Args:
record (Record): MARC record for the publication
Returns:
itertools.imap
"""
ftpl = lambda x: (tuple(x) if isinstance(x, (list, tuple)) else (x,))
func = lambda x: (ftpl(x["u"]) if "u" in x else ())
return imap(func, iter_author_fields(record))
def iter_author_affiliation_keys(record):
"""Iterator over the author affiliation keys.
Note:
The affiliation keys are obtained by concatenating the "u" and "v"
keys. Keep in mind that an author can have several affiliations.
Args:
record (Record): MARC record for the publication
Returns:
itertools.imap
"""
return imap(affiliation_keys, iter_author_fields(record))
def iter_author_items(record):
"""Iterator over the author items.
The author item is a tuple containing the author name and an iterator
on the author affiliation keys.
Args:
record (Record): MARC record for the publication
Returns:
itertools.imap
"""
return imap(author_item, iter_author_fields(record))
def iter_author_names(record):
"""Iterator over the author names.
Args:
record (Record): MARC record for the publication
Returns:
itertools.imap
"""
return imap(author_name, iter_author_fields(record))
# -*- coding: utf-8 -*-
"""ARTICLE
http://cds.cern.ch/record/1951625.
Precision luminosity measurements at LHCb,
J. Instrum. 9 (2014) P12005
arXiv:1410.0149
704 authors
Note:
* The first author is not in the author list
* LHCb collaboration
* The publication year is a list (duplicate 773y)
* The submitted date is not formatted: 01 Oct 2014
"""
import itertools
import pytest
from invenio_tools import (iter_author_affiliation_keys,
iter_author_fields,
iter_author_items,
iter_author_names,
load_record)
@pytest.fixture(scope="module")
def record():
return load_record('cds.cern.ch', 1951625)
def test_iter_author_fields(record):
assert type(iter_author_fields(record)) == itertools.chain
def test_iter_names(record):
assert type(iter_author_names(record)) == itertools.imap
def test_iter_affiliation_keys(record):
assert type(iter_author_affiliation_keys(record)) == itertools.imap
def test_names(record):
names = list(iter_author_names(record))
assert len(names) == 704
assert names[0] == "Aaij, Roel"
assert names[1] == "Adeva, Bernardo"
assert names[344] == "Le Gac, Renaud"
assert names[-1] == "Zvyagin, Alexander"
def test_affiliations(record):
affiliations = [tuple(el) for el in iter_author_affiliation_keys(record)]
assert len(affiliations) == 704
assert affiliations[0] == ("NIKHEF, Amsterdam",)
assert affiliations[-8] == (u"Heidelberg, Max Planck Inst.", u"Lebedev Inst.")
def test_items(record):
items = [(el[0], tuple(el[1])) for el in iter_author_items(record)]
import pprint
pprint.pprint(items)
assert len(items) == 704
assert items[0] == (u'Aaij, Roel', (u'NIKHEF, Amsterdam',))
assert items[-8] == (u'Zavertyaev, Mikhail', (u'Heidelberg, Max Planck Inst.', u'Lebedev Inst.'))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment