Commit 10310a58 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Update RecordPubli in order to use author iterators.

parent b81c3c96
......@@ -5,12 +5,16 @@
import re
from authors import (iter_author_affiliations,
iter_author_affiliation_keys,
iter_author_items,
iter_author_names)
from base import (ARXIV,
ARXIV_PDF,
REG_ARXIV_NUMBER,
REG_AUTHOR,
REG_YEAR)
from filters import CLEAN_COLLABORATION
from itertools import ifilter, imap
from plugin_dbui import as_list, CLEAN_SPACES
from record import Record
......@@ -79,52 +83,7 @@ class RecordPubli(Record):
list: the list is empty when authors are not defined.
"""
authors = []
first_author = self.first_author()
# a single author
if u"700" in self and isinstance(self[u"700"], dict):
if "a" in self[u"700"]:
authors.append(self[u"700"]["a"])
# a list of authors
elif u"700" in self and isinstance(self[u"700"], list):
for di in self[u"700"]:
if "a" in di:
author = di["a"]
# PROTECTION
# in most of the case the author is a string
# but it can be a list, e.g inspirehep.net/138663:
# [u'Zuniga, J.', u'(the A.N.T.ARES. Collaboration)']
if isinstance(author, unicode):
authors.append(author)
elif isinstance(author, list):
for elt in author:
if REG_AUTHOR.match(elt):
authors.append(elt)
break
# the first author is defined not the other one
elif first_author and isinstance(first_author, list):
authors.extend(first_author)
elif first_author:
authors.append(first_author)
# sometime the first author is missing
if first_author and len(authors) > 0:
if isinstance(first_author, list):
for i in xrange(len(first_author)):
if first_author[i] != authors[i]:
authors.insert(i, first_author[i])
elif first_author != authors[0]:
authors.insert(0, first_author)
return authors
return list(iter_author_names(self))
def collaboration(self):
"""The collaboration(s) signing the publication.
......@@ -155,41 +114,13 @@ class RecordPubli(Record):
- empty string when nothing is found.
"""
reg_affiliation = re.compile(pattern)
for field in (u"100", u"700"):
if field not in self:
continue
dictionaries = self[field]
if isinstance(dictionaries, dict):
dictionaries = [dictionaries]
for di in dictionaries:
if "u" not in di:
continue
key_u = di["u"]
if isinstance(key_u, unicode):
if "v" in di:
keys = ["%s%s" % (key_u, di["v"])]
else:
keys = [key_u]
regex = re.compile(pattern)
# affiliation can be list
# length for the v field can be less than u one
# see for example inspirehep.net 1426414
else:
keys = key_u
if "v" in di:
keys_v = (di["v"] if isinstance(di["v"], list) else [di["v"]])
for i in xrange(len(keys_v)):
keys[i] = "%s%s" % (keys[i], keys_v[i])
for iter_key in iter_author_affiliation_keys(self):
li = list(ifilter(regex.match, iter_key))
for affiliation in keys:
if reg_affiliation.match(affiliation):
return affiliation
if len(li) > 0:
return li[0]
return u""
......@@ -207,16 +138,11 @@ class RecordPubli(Record):
"""
li = []
regex = re.compile(pattern)
for author in self.authors_as_list():
if regex.search(author):
li.append(author)
return u', '.join(li)
return u", ".join(list(ifilter(regex.search, iter_author_names(self))))
def find_authors_by_affiliation(self, pattern, cmpFct=None):
"""Find authors belonging to a given institute(s) defined by a regular
expression.
expression. The search is performed on the affiliation keys.
Note:
Affiliation keys are obtained by concatenating the field 100u
......@@ -242,47 +168,15 @@ class RecordPubli(Record):
if not self.is_authors():
return u""
authors = []
# filter the list using affiliation key(s)
regex = re.compile(pattern)
faff = lambda x: len(list(ifilter(regex.search, x[1]))) > 0
# standard case
data_authors = (self[u"700"] if u"700" in self else [])
if isinstance(data_authors, dict):
data_authors = [data_authors]
# to cover the case in which the first author is not in self[u"700"]
data_first_author = (self[u"100"] if u"100" in self else [])
if isinstance(data_first_author, dict):
data_first_author = [data_first_author]
# scan
for dictionaries in (data_authors, data_first_author):
for di in dictionaries:
# one author without affiliation -- skip it
if 'u' not in di:
continue
keys = di['u']
if isinstance(keys, list):
if "v" in di:
keys_v = (di["v"] if isinstance(di["v"], list) else [di["v"]])
for i in xrange(len(keys_v)):
keys[i] = "%s%s" % (keys[i], keys_v[i])
keys = u", ".join(keys)
else:
if "v" in di:
keys = "%s%s" % (keys, di["v"])
# affiliation match
if regex.search(keys):
if "a" in di:
authors.append(di["a"])
# extract author name from the item
fname = lambda x: x[0]
authors = list(imap(fname, ifilter(faff, iter_author_items(self))))
# remove duplicate entries and sort
authors = list(set(authors))
# short the list
if cmpFct:
authors.sort(key=cmpFct)
......@@ -297,89 +191,42 @@ class RecordPubli(Record):
- List of name when there is more than one.
"""
# standard case
value = self._get(u"100", "a")
if value:
# PROTECTION
# It happens that the first author is duplicate, remove it
if isinstance(value, list):
value = list(set(value))
if len(value) == 1:
return value[0]
return value
# sometime it is only defined in the authors list
if u"700" in self:
if isinstance(self[u"700"], dict) and "a" in self[u"700"]:
return self[u"700"]["a"]
elif isinstance(self[u"700"], list):
if "a" in self[u"700"][0]:
return self[u"700"][0]["a"]
return u""
return iter_author_names(self).next()
def first_author_institutes(self):
"""The institute(s) associated to the first author.
Note:
Search is performed via the affiliation key 100(700)u.
Returns:
unicode:
- names are separated by a comma.
- The string is empty when institutes are not defined.
"""
# standard case
# PROTECTION
# sometime the first author is duplicate -- remove duplicate
li = self._get(u"100", "u", force_list=True)
if li:
return u", ".join(set(li))
# sometime it is only defined in the authors list
if u"700" in self:
if isinstance(self[u"700"], dict) and "u" in self[u"700"]:
if isinstance(self[u"700"]["u"], unicode):
return self[u"700"]["u"]
elif isinstance(self[u"700"]["u"], list):
return u", ".join(self[u"700"]["u"])
elif isinstance(self[u"700"], list):
if "u" in self[u"700"][0]:
if isinstance(self[u"700"][0]["u"], unicode):
return self[u"700"][0]["u"]
elif isinstance(self[u"700"][0]["u"], list):
return u", ".join(self[u"700"][0]["u"])
return u""
return u", ".join(iter_author_affiliations(self).next())
def institutes(self):
"""The list of institute signing the publication.
Note:
Name of institute are given by the affiliation key 100(700)u.
Returns:
list: the list is sort in alphabetic order.
"""
li = []
myset = set()
# each entry can be a string or a list when the author has
# several affiliations
for field in (u"100", u"700"):
for el in self._get(field, "u", force_list=True):
if isinstance(el, list):
li.extend(el)
else:
li.append(el)
# remove duplicate entries
li = list(set(li))
for elt in iter_author_affiliations(self):
myset.update(elt)
# sort institute in alphabetic order
li.sort()
myli = list(myset)
myli.sort()
return li
return myli
def is_affiliations(self):
"""``True`` when affiliations are defined for authors.
......@@ -414,25 +261,8 @@ class RecordPubli(Record):
bool:
"""
if u"700" not in self and u"100" not in self:
return False
for field in (u"100", u"700"):
if field in self:
dictionaries = self[field]
if isinstance(dictionaries, dict):
dictionaries = [dictionaries]
for di in dictionaries:
if isinstance(di, dict):
if "u" in di:
continue
else:
return False
else:
return False
return True
func = lambda x: len(x) == 0
return len(list(ifilter(func, iter_author_affiliations(self)))) == 0
def is_authors(self):
"""``True`` when authors are defined.
......
......@@ -80,6 +80,14 @@ def test_id(record):
assert record.id() == "1951625"
def test_institutes(record):
institutes = record.institutes()
assert len(institutes) == 89
assert institutes[0] == u'AGH-UST, Cracow'
assert institutes[-1] == u'Zurich U.'
def test_is_article(record):
assert isinstance(record, RecordPubli)
assert record.is_published() == True
......
......@@ -18,7 +18,8 @@ Note:
import itertools
import pytest
from invenio_tools import (iter_author_affiliation_keys,
from invenio_tools import (iter_author_affiliations,
iter_author_affiliation_keys,
iter_author_fields,
iter_author_items,
iter_author_names,
......@@ -30,35 +31,38 @@ def record():
return load_record('cds.cern.ch', 1951625)
def test_iter_author_fields(record):
assert type(iter_author_fields(record)) == itertools.chain
def test_iter_names(record):
assert type(iter_author_names(record)) == itertools.imap
def test_iter_affiliations(record):
assert type(iter_author_affiliations(record)) == itertools.imap
def test_iter_affiliation_keys(record):
assert type(iter_author_affiliation_keys(record)) == itertools.imap
def test_names(record):
names = list(iter_author_names(record))
def test_iter_author_fields(record):
assert type(iter_author_fields(record)) == itertools.chain
def test_iter_author_names(record):
assert type(iter_author_names(record)) == itertools.imap
assert len(names) == 704
assert names[0] == "Aaij, Roel"
assert names[1] == "Adeva, Bernardo"
assert names[344] == "Le Gac, Renaud"
assert names[-1] == "Zvyagin, Alexander"
def test_affiliations(record):
affiliations = [tuple(el) for el in iter_author_affiliation_keys(record)]
affiliations = list(iter_author_affiliations(record))
assert len(affiliations) == 704
assert affiliations[0] == ("NIKHEF, Amsterdam",)
assert affiliations[-8] == (u"Heidelberg, Max Planck Inst.", u"Lebedev Inst.")
def test_affiliation_keys(record):
keys = [tuple(el) for el in iter_author_affiliation_keys(record)]
assert len(keys) == 704
assert keys[0] == ("NIKHEF, Amsterdam",)
assert keys[-8] == (u"Heidelberg, Max Planck Inst.", u"Lebedev Inst.")
def test_items(record):
items = [(el[0], tuple(el[1])) for el in iter_author_items(record)]
......@@ -68,3 +72,13 @@ def test_items(record):
assert len(items) == 704
assert items[0] == (u'Aaij, Roel', (u'NIKHEF, Amsterdam',))
assert items[-8] == (u'Zavertyaev, Mikhail', (u'Heidelberg, Max Planck Inst.', u'Lebedev Inst.'))
def test_names(record):
names = list(iter_author_names(record))
assert len(names) == 704
assert names[0] == "Aaij, Roel"
assert names[1] == "Adeva, Bernardo"
assert names[344] == "Le Gac, Renaud"
assert names[-1] == "Zvyagin, Alexander"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment