Commit 088f072c authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Major refactoring of the authors and affiliation methods.

- Non-conformity where the first author is in u100 and not in u700
  is now handle. it affects the  method authors_as_list, and all
  methods to find authors affiliations.
- Special treatment of the authors in RecordThesis since the field
  u700 contains the names of Thesis director.
parent 6b8fe4f7
...@@ -70,19 +70,16 @@ class RecordPubli(Record): ...@@ -70,19 +70,16 @@ class RecordPubli(Record):
""" """
authors = [] authors = []
first_author = self.first_author()
# NOTE # a single author
# the content of the 700 field depend on the record type.
# For thesis it also contains the name of the director
if u"700" in self and isinstance(self[u"700"], dict): if u"700" in self and isinstance(self[u"700"], dict):
if not ("e" in self[u"700"] and self[u"700"]["e"] == THESIS_DIR): if "a" in self[u"700"]:
authors.append(self[u"700"]["a"]) authors.append(self[u"700"]["a"])
# a list of authors
elif u"700" in self and isinstance(self[u"700"], list): elif u"700" in self and isinstance(self[u"700"], list):
for di in self[u"700"]: for di in self[u"700"]:
if "e" in di and di["e"] == THESIS_DIR:
continue
if "a" in di: if "a" in di:
author = di["a"] author = di["a"]
...@@ -91,7 +88,7 @@ class RecordPubli(Record): ...@@ -91,7 +88,7 @@ class RecordPubli(Record):
# but it can be a list, e.g inspirehep.net/138663: # but it can be a list, e.g inspirehep.net/138663:
# [u'Zuniga, J.', u'(the A.N.T.ARES. Collaboration)'] # [u'Zuniga, J.', u'(the A.N.T.ARES. Collaboration)']
if isinstance(author, unicode): if isinstance(author, unicode):
authors.append(di["a"]) authors.append(author)
elif isinstance(author, list): elif isinstance(author, list):
for elt in author: for elt in author:
...@@ -99,6 +96,14 @@ class RecordPubli(Record): ...@@ -99,6 +96,14 @@ class RecordPubli(Record):
authors.append(elt) authors.append(elt)
break break
# the first author is defined not the other one
elif first_author:
authors.append(first_author)
# sometime the first author is missing
if first_author != authors[0]:
authors.insert(0, first_author)
return authors return authors
def collaboration(self): def collaboration(self):
...@@ -147,54 +152,53 @@ class RecordPubli(Record): ...@@ -147,54 +152,53 @@ class RecordPubli(Record):
depending on whether the first argument is considered smaller than, depending on whether the first argument is considered smaller than,
equal to, or larger than the second one. equal to, or larger than the second one.
@rtype: unicode or None @rtype: unicode
@return: @return:
- Author names are separated by ", ". - Author names are separated by ", ".
- Author are sorted according to the function C{cmpFct}. - Author are sorted according to the function C{cmpFct}.
- C{None} when authors are not found. - Empty string when authors are not found.
""" """
# authors not defined # authors not defined
if (u"100" not in self) and (u"700" not in self): if not self.is_authors():
return None return u""
# compile the searching criteria authors = []
regex = re.compile(pattern) regex = re.compile(pattern)
# single author in the author list # standard case
if u"700" in self and isinstance(self[u"700"], dict): data_authors = (self[u"700"] if u"700" in self else [])
if "u" not in self[u"700"]: if isinstance(data_authors, dict):
return None data_authors = [data_authors]
s = self[u"700"]["u"]
if isinstance(s, list):
s = ', '.join(s)
if regex.search(s): # to cover the case in which the first author is not in self[u"700"]
if "a" in self[u"700"]: data_first_author = (self[u"100"] if u"100" in self else [])
return self[u"700"]["a"] if isinstance(data_first_author, dict):
data_first_author = [data_first_author]
return None # scan
for elements in (data_authors, data_first_author):
for di in elements:
# list of authors # no affiliation
elif u"700" in self and isinstance(self[u"700"], list):
authors = []
for di in self[u"700"]:
if 'u' not in di: if 'u' not in di:
return None return u""
s = di['u'] affiliations = di['u']
if isinstance(di['u'], list): if isinstance(affiliations, list):
s = ', '.join(di['u']) affiliations = u", ".join(affiliations)
if regex.search(s): # affiliation match
if regex.search(affiliations):
if "a" in di: if "a" in di:
authors.append(di['a']) authors.append(di["a"])
# remove duplicate entries and sort
authors = list(set(authors))
if cmpFct: if cmpFct:
authors.sort(key=cmpFct) authors.sort(key=cmpFct)
return u', '.join(authors) return u", ".join(authors)
def first_author(self): def first_author(self):
"""The name of the first author. """The name of the first author.
...@@ -204,7 +208,27 @@ class RecordPubli(Record): ...@@ -204,7 +208,27 @@ class RecordPubli(Record):
- Empty string when the first author is not defined. - Empty string when the first author is not defined.
""" """
return self._get(u"100", "a") # standard case
value = self._get(u"100", "a")
if value:
# PROTECTION
# It happens that the first author is duplicate, remove it
if isinstance(value, list):
value = ", ".join(set(value))
return value
# sometime it is only defined in the authors list
if u"700" in self:
if isinstance(self[u"700"], dict) and "a" in self[u"700"]:
return self[u"700"]["a"]
elif isinstance(self[u"700"], list):
if "a" in self[u"700"][0]:
return self[u"700"][0]["a"]
return u""
def first_author_institutes(self): def first_author_institutes(self):
"""The institute(s) associated to the first author. """The institute(s) associated to the first author.
...@@ -215,8 +239,30 @@ class RecordPubli(Record): ...@@ -215,8 +239,30 @@ class RecordPubli(Record):
- The string is empty when institutes are not defined. - The string is empty when institutes are not defined.
""" """
# standard case
# PROTECTION
# sometime the first author is duplicate -- remove duplicate
li = self._get(u"100", "u", force_list=True) li = self._get(u"100", "u", force_list=True)
return u', '.join(li) if li:
return u", ".join(set(li))
# sometime it is only defined in the authors list
if u"700" in self:
if isinstance(self[u"700"], dict) and "u" in self[u"700"]:
if isinstance(self[u"700"]["u"], unicode):
return self[u"700"]["u"]
elif isinstance(self[u"700"]["u"], list):
return u", ".join(self[u"700"]["u"])
elif isinstance(self[u"700"], list):
if "u" in self[u"700"][0]:
if isinstance(self[u"700"][0]["u"], unicode):
return self[u"700"][0]["u"]
elif isinstance(self[u"700"][0]["u"], list):
return u", ".join(self[u"700"][0]["u"])
return u""
def institutes(self): def institutes(self):
"""The list of institute signing the publication. """The list of institute signing the publication.
...@@ -230,7 +276,8 @@ class RecordPubli(Record): ...@@ -230,7 +276,8 @@ class RecordPubli(Record):
# each entry can be a string or a list when the author has # each entry can be a string or a list when the author has
# several affiliations # several affiliations
for el in self._get(u"700", "u", force_list=True): for field in (u"100", u"700"):
for el in self._get(field, "u", force_list=True):
if isinstance(el, list): if isinstance(el, list):
li.extend(el) li.extend(el)
else: else:
...@@ -276,26 +323,25 @@ class RecordPubli(Record): ...@@ -276,26 +323,25 @@ class RecordPubli(Record):
@return: @return:
""" """
if u"700" not in self: if u"700" not in self and u"100" not in self:
return False return False
# dict case for field in (u"100", u"700"):
if isinstance(self[u"700"], dict): if field in self:
return "u" in self[u"700"] dictionaries = self[field]
if isinstance(dictionaries, dict):
dictionaries = [dictionaries]
# list case for di in dictionaries:
elif isinstance(self[u"700"], list): if isinstance(di, dict):
for el in self[u"700"]: if "u" in di:
if isinstance(el, dict):
if "u" in el:
continue continue
else: else:
return False return False
else: else:
return False return False
return True
return False return True
def is_authors(self): def is_authors(self):
"""C{True} when authors are defined. """C{True} when authors are defined.
......
...@@ -20,6 +20,59 @@ class RecordThesis(RecordPubli): ...@@ -20,6 +20,59 @@ class RecordThesis(RecordPubli):
----------------------+---------+---------- ----------------------+---------+----------
""" """
def authors_as_list(self):
"""The list of author(s) signing the publication.
@rtype: list
@return:
- The list is empty when authors are not defined.
"""
authors = []
first_author = self.first_author()
# NOTE
# the content of the 700 field depend on the record type.
# For thesis it also contains the name of the director
if u"700" in self and isinstance(self[u"700"], dict):
if not ("e" in self[u"700"] and self[u"700"]["e"] == THESIS_DIR):
if "a" in self[u"700"]:
authors.append(self[u"700"]["a"])
elif u"700" in self and isinstance(self[u"700"], list):
for di in self[u"700"]:
if "e" in di and di["e"] == THESIS_DIR:
continue
if "a" in di:
author = di["a"]
# PROTECTION
# in most of the case the author is a string
# but it can be a list, e.g inspirehep.net/138663:
# [u'Zuniga, J.', u'(the A.N.T.ARES. Collaboration)']
if isinstance(author, unicode):
authors.append(di["a"])
elif isinstance(author, list):
for elt in author:
if REG_AUTHOR.match(elt):
authors.append(elt)
break
# the first author is defined not the other one
elif u"100" in self:
authors.append(first_author)
# sometime the first author is missing
if len(authors) == 0:
authors = [first_author]
elif first_author != authors[0]:
authors.insert(0, first_author)
return authors
def these_defense(self): def these_defense(self):
"""The defense date for a master/phd thesis. """The defense date for a master/phd thesis.
......
...@@ -31,15 +31,27 @@ def record(): ...@@ -31,15 +31,27 @@ def record():
return load_record('cds.cern.ch', 1951625) return load_record('cds.cern.ch', 1951625)
def test_affiliations(record):
assert record.is_affiliations() == True
assert record.is_affiliation_for_all() == True
institutes = record.institutes()
assert institutes[0] == "AGH-UST, Cracow"
assert institutes[44] == "MIT"
assert institutes[-1] == "Zurich U."
def test_authors(record): def test_authors(record):
assert record.is_authors() == True assert record.is_authors() == True
authors = record.authors_as_list() authors = record.authors_as_list()
assert len(authors) == 703 assert len(authors) == 704
assert authors[0] == "Adeva, Bernardo" assert authors[0] == "Aaij, Roel"
assert authors[343] == "Le Gac, Renaud" assert authors[1] == "Adeva, Bernardo"
assert authors[344] == "Le Gac, Renaud"
assert authors[-1] == "Zvyagin, Alexander" assert authors[-1] == "Zvyagin, Alexander"
...@@ -51,7 +63,7 @@ def test_first_author(record): ...@@ -51,7 +63,7 @@ def test_first_author(record):
assert record.first_author() == "Aaij, Roel" assert record.first_author() == "Aaij, Roel"
def test_first_institutes(record): def test_first_author_institutes(record):
assert record.first_author_institutes() == "NIKHEF, Amsterdam" assert record.first_author_institutes() == "NIKHEF, Amsterdam"
...@@ -63,18 +75,6 @@ def test_id(record): ...@@ -63,18 +75,6 @@ def test_id(record):
assert record.id() == "1951625" assert record.id() == "1951625"
def test_institutes(record):
assert record.is_affiliations() == True
assert record.is_affiliation_for_all() == True
institutes = record.institutes()
assert institutes[0] == "AGH-UST, Cracow"
assert institutes[44] == "MIT"
assert institutes[-1] == "Zurich U."
def test_is_article(record): def test_is_article(record):
assert isinstance(record, RecordPubli) assert isinstance(record, RecordPubli)
assert record.is_published() == True assert record.is_published() == True
......
...@@ -30,12 +30,24 @@ def record(): ...@@ -30,12 +30,24 @@ def record():
return load_record('inspirehep.net', 1319638) return load_record('inspirehep.net', 1319638)
def test_affiliations(record):
assert record.is_affiliations() == True
assert record.is_affiliation_for_all() == True
institutes = record.institutes()
assert institutes[0] == "AGH-UST, Cracow"
assert institutes[44] == "MIT"
assert institutes[-1] == "Zurich U."
def test_authors(record): def test_authors(record):
authors = record.authors_as_list() authors = record.authors_as_list()
assert len(authors) == 703 assert len(authors) == 704
assert authors[0] == "Adeva, Bernardo" assert authors[0] == "Aaij, Roel"
assert authors[343] == "Le Gac, Renaud" assert authors[1] == "Adeva, Bernardo"
assert authors[344] == "Le Gac, Renaud"
assert authors[-1] == "Zvyagin, Alexander" assert authors[-1] == "Zvyagin, Alexander"
...@@ -58,16 +70,6 @@ def test_id(record): ...@@ -58,16 +70,6 @@ def test_id(record):
assert record.id() == "1319638" assert record.id() == "1319638"
def test_institutes(record):
institutes = record.institutes()
assert institutes[0] == "AGH-UST, Cracow"
assert institutes[44] == "MIT"
assert institutes[-1] == "Zurich U."
assert record.is_affiliation_for_all() == True
def test_is_article(record): def test_is_article(record):
assert isinstance(record, RecordPubli) assert isinstance(record, RecordPubli)
assert record.is_published() == True assert record.is_published() == True
......
...@@ -26,8 +26,14 @@ def record(): ...@@ -26,8 +26,14 @@ def record():
return load_record('cds.cern.ch', 1411352) return load_record('cds.cern.ch', 1411352)
def test_affiliations(record):
assert record.is_affiliations() == True
assert record.is_affiliation_for_all() == True
assert record.institutes() == ['Marseille, CPPM']
def test_authors(record): def test_authors(record):
assert record.authors() == "" assert record.authors() == "Leroy, Olivier"
def test_collaboration(record): def test_collaboration(record):
...@@ -70,7 +76,7 @@ def test_first_author(record): ...@@ -70,7 +76,7 @@ def test_first_author(record):
assert record.first_author() == "Leroy, Olivier" assert record.first_author() == "Leroy, Olivier"
def test_first_institutes(record): def test_first_authors_institutes(record):
assert record.first_author_institutes() == "Marseille, CPPM" assert record.first_author_institutes() == "Marseille, CPPM"
...@@ -82,10 +88,6 @@ def test_id(record): ...@@ -82,10 +88,6 @@ def test_id(record):
assert record.id() == "1411352" assert record.id() == "1411352"
def test_institutes(record):
assert record.institutes() == []
assert record.is_affiliation_for_all() == False
def test_is_proceeding(record): def test_is_proceeding(record):
assert isinstance(record, RecordConf) assert isinstance(record, RecordConf)
assert record.is_published() == True assert record.is_published() == True
......
...@@ -28,8 +28,15 @@ def record(): ...@@ -28,8 +28,15 @@ def record():
return load_record('inspirehep.net', 1276938) return load_record('inspirehep.net', 1276938)
def test_affiliations(record):
assert record.is_affiliations() == False
assert record.is_affiliation_for_all() == False
assert record.institutes() == []
def test_authors(record): def test_authors(record):
assert record.authors() == "" assert record.is_authors() == True
assert record.authors() == "Hubaut, F."
def test_collaboration(record): def test_collaboration(record):
...@@ -84,13 +91,6 @@ def test_id(record): ...@@ -84,13 +91,6 @@ def test_id(record):
assert record.id() == "1276938" assert record.id() == "1276938"
def test_institutes(record):
institutes = record.institutes()
assert record.institutes() == []
assert record.is_affiliation_for_all() == False
def test_is_proceeding(record): def test_is_proceeding(record):
assert isinstance(record, RecordConf) assert isinstance(record, RecordConf)
assert record.is_published() == False assert record.is_published() == False
......
...@@ -24,8 +24,14 @@ def record(): ...@@ -24,8 +24,14 @@ def record():
return load_record('cds.cern.ch', 1550918) return load_record('cds.cern.ch', 1550918)
def test_affiliations(record):
assert record.is_affiliations() == True
assert record.is_affiliation_for_all() == True
assert record.institutes() == [u'CPPM, Aix-Marseille Université, CNRS/IN2P3, Marseille, France']
def test_authors(record): def test_authors(record):
assert record.authors() == "" assert record.authors() == "Le Gac, R"
def test_collaboration(record):