Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit 088f072c authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Major refactoring of the authors and affiliation methods.

- Non-conformity where the first author is in u100 and not in u700
  is now handle. it affects the  method authors_as_list, and all
  methods to find authors affiliations.
- Special treatment of the authors in RecordThesis since the field
  u700 contains the names of Thesis director.
parent 6b8fe4f7
......@@ -70,19 +70,16 @@ class RecordPubli(Record):
"""
authors = []
first_author = self.first_author()
# NOTE
# the content of the 700 field depend on the record type.
# For thesis it also contains the name of the director
# a single author
if u"700" in self and isinstance(self[u"700"], dict):
if not ("e" in self[u"700"] and self[u"700"]["e"] == THESIS_DIR):
if "a" in self[u"700"]:
authors.append(self[u"700"]["a"])
# a list of authors
elif u"700" in self and isinstance(self[u"700"], list):
for di in self[u"700"]:
if "e" in di and di["e"] == THESIS_DIR:
continue
if "a" in di:
author = di["a"]
......@@ -91,7 +88,7 @@ class RecordPubli(Record):
# but it can be a list, e.g inspirehep.net/138663:
# [u'Zuniga, J.', u'(the A.N.T.ARES. Collaboration)']
if isinstance(author, unicode):
authors.append(di["a"])
authors.append(author)
elif isinstance(author, list):
for elt in author:
......@@ -99,6 +96,14 @@ class RecordPubli(Record):
authors.append(elt)
break
# the first author is defined not the other one
elif first_author:
authors.append(first_author)
# sometime the first author is missing
if first_author != authors[0]:
authors.insert(0, first_author)
return authors
def collaboration(self):
......@@ -147,54 +152,53 @@ class RecordPubli(Record):
depending on whether the first argument is considered smaller than,
equal to, or larger than the second one.
@rtype: unicode or None
@rtype: unicode
@return:
- Author names are separated by ", ".
- Author are sorted according to the function C{cmpFct}.
- C{None} when authors are not found.
- Empty string when authors are not found.
"""
# authors not defined
if (u"100" not in self) and (u"700" not in self):
return None
if not self.is_authors():
return u""
# compile the searching criteria
authors = []
regex = re.compile(pattern)
# single author in the author list
if u"700" in self and isinstance(self[u"700"], dict):
if "u" not in self[u"700"]:
return None
# standard case
data_authors = (self[u"700"] if u"700" in self else [])
if isinstance(data_authors, dict):
data_authors = [data_authors]
s = self[u"700"]["u"]
if isinstance(s, list):
s = ', '.join(s)
# to cover the case in which the first author is not in self[u"700"]
data_first_author = (self[u"100"] if u"100" in self else [])
if isinstance(data_first_author, dict):
data_first_author = [data_first_author]
if regex.search(s):
if "a" in self[u"700"]:
return self[u"700"]["a"]
# scan
for elements in (data_authors, data_first_author):
for di in elements:
return None
# list of authors
elif u"700" in self and isinstance(self[u"700"], list):
authors = []
for di in self[u"700"]:
# no affiliation
if 'u' not in di:
return None
return u""
s = di['u']
if isinstance(di['u'], list):
s = ', '.join(di['u'])
affiliations = di['u']
if isinstance(affiliations, list):
affiliations = u", ".join(affiliations)
if regex.search(s):
# affiliation match
if regex.search(affiliations):
if "a" in di:
authors.append(di['a'])
authors.append(di["a"])
if cmpFct:
authors.sort(key=cmpFct)
# remove duplicate entries and sort
authors = list(set(authors))
if cmpFct:
authors.sort(key=cmpFct)
return u', '.join(authors)
return u", ".join(authors)
def first_author(self):
"""The name of the first author.
......@@ -204,7 +208,27 @@ class RecordPubli(Record):
- Empty string when the first author is not defined.
"""
return self._get(u"100", "a")
# standard case
value = self._get(u"100", "a")
if value:
# PROTECTION
# It happens that the first author is duplicate, remove it
if isinstance(value, list):
value = ", ".join(set(value))
return value
# sometime it is only defined in the authors list
if u"700" in self:
if isinstance(self[u"700"], dict) and "a" in self[u"700"]:
return self[u"700"]["a"]
elif isinstance(self[u"700"], list):
if "a" in self[u"700"][0]:
return self[u"700"][0]["a"]
return u""
def first_author_institutes(self):
"""The institute(s) associated to the first author.
......@@ -215,8 +239,30 @@ class RecordPubli(Record):
- The string is empty when institutes are not defined.
"""
# standard case
# PROTECTION
# sometime the first author is duplicate -- remove duplicate
li = self._get(u"100", "u", force_list=True)
return u', '.join(li)
if li:
return u", ".join(set(li))
# sometime it is only defined in the authors list
if u"700" in self:
if isinstance(self[u"700"], dict) and "u" in self[u"700"]:
if isinstance(self[u"700"]["u"], unicode):
return self[u"700"]["u"]
elif isinstance(self[u"700"]["u"], list):
return u", ".join(self[u"700"]["u"])
elif isinstance(self[u"700"], list):
if "u" in self[u"700"][0]:
if isinstance(self[u"700"][0]["u"], unicode):
return self[u"700"][0]["u"]
elif isinstance(self[u"700"][0]["u"], list):
return u", ".join(self[u"700"][0]["u"])
return u""
def institutes(self):
"""The list of institute signing the publication.
......@@ -230,11 +276,12 @@ class RecordPubli(Record):
# each entry can be a string or a list when the author has
# several affiliations
for el in self._get(u"700", "u", force_list=True):
if isinstance(el, list):
li.extend(el)
else:
li.append(el)
for field in (u"100", u"700"):
for el in self._get(field, "u", force_list=True):
if isinstance(el, list):
li.extend(el)
else:
li.append(el)
# remove duplicate entries
li = list(set(li))
......@@ -276,26 +323,25 @@ class RecordPubli(Record):
@return:
"""
if u"700" not in self:
if u"700" not in self and u"100" not in self:
return False
# dict case
if isinstance(self[u"700"], dict):
return "u" in self[u"700"]
# list case
elif isinstance(self[u"700"], list):
for el in self[u"700"]:
if isinstance(el, dict):
if "u" in el:
continue
else:
return False
else:
return False
return True
for field in (u"100", u"700"):
if field in self:
dictionaries = self[field]
if isinstance(dictionaries, dict):
dictionaries = [dictionaries]
for di in dictionaries:
if isinstance(di, dict):
if "u" in di:
continue
else:
return False
else:
return False
return False
return True
def is_authors(self):
"""C{True} when authors are defined.
......
......@@ -20,6 +20,59 @@ class RecordThesis(RecordPubli):
----------------------+---------+----------
"""
def authors_as_list(self):
"""The list of author(s) signing the publication.
@rtype: list
@return:
- The list is empty when authors are not defined.
"""
authors = []
first_author = self.first_author()
# NOTE
# the content of the 700 field depend on the record type.
# For thesis it also contains the name of the director
if u"700" in self and isinstance(self[u"700"], dict):
if not ("e" in self[u"700"] and self[u"700"]["e"] == THESIS_DIR):
if "a" in self[u"700"]:
authors.append(self[u"700"]["a"])
elif u"700" in self and isinstance(self[u"700"], list):
for di in self[u"700"]:
if "e" in di and di["e"] == THESIS_DIR:
continue
if "a" in di:
author = di["a"]
# PROTECTION
# in most of the case the author is a string
# but it can be a list, e.g inspirehep.net/138663:
# [u'Zuniga, J.', u'(the A.N.T.ARES. Collaboration)']
if isinstance(author, unicode):
authors.append(di["a"])
elif isinstance(author, list):
for elt in author:
if REG_AUTHOR.match(elt):
authors.append(elt)
break
# the first author is defined not the other one
elif u"100" in self:
authors.append(first_author)
# sometime the first author is missing
if len(authors) == 0:
authors = [first_author]
elif first_author != authors[0]:
authors.insert(0, first_author)
return authors
def these_defense(self):
"""The defense date for a master/phd thesis.
......
......@@ -31,15 +31,27 @@ def record():
return load_record('cds.cern.ch', 1951625)
def test_affiliations(record):
assert record.is_affiliations() == True
assert record.is_affiliation_for_all() == True
institutes = record.institutes()
assert institutes[0] == "AGH-UST, Cracow"
assert institutes[44] == "MIT"
assert institutes[-1] == "Zurich U."
def test_authors(record):
assert record.is_authors() == True
authors = record.authors_as_list()
assert len(authors) == 703
assert authors[0] == "Adeva, Bernardo"
assert authors[343] == "Le Gac, Renaud"
assert len(authors) == 704
assert authors[0] == "Aaij, Roel"
assert authors[1] == "Adeva, Bernardo"
assert authors[344] == "Le Gac, Renaud"
assert authors[-1] == "Zvyagin, Alexander"
......@@ -51,7 +63,7 @@ def test_first_author(record):
assert record.first_author() == "Aaij, Roel"
def test_first_institutes(record):
def test_first_author_institutes(record):
assert record.first_author_institutes() == "NIKHEF, Amsterdam"
......@@ -63,18 +75,6 @@ def test_id(record):
assert record.id() == "1951625"
def test_institutes(record):
assert record.is_affiliations() == True
assert record.is_affiliation_for_all() == True
institutes = record.institutes()
assert institutes[0] == "AGH-UST, Cracow"
assert institutes[44] == "MIT"
assert institutes[-1] == "Zurich U."
def test_is_article(record):
assert isinstance(record, RecordPubli)
assert record.is_published() == True
......
......@@ -30,12 +30,24 @@ def record():
return load_record('inspirehep.net', 1319638)
def test_affiliations(record):
assert record.is_affiliations() == True
assert record.is_affiliation_for_all() == True
institutes = record.institutes()
assert institutes[0] == "AGH-UST, Cracow"
assert institutes[44] == "MIT"
assert institutes[-1] == "Zurich U."
def test_authors(record):
authors = record.authors_as_list()
assert len(authors) == 703
assert authors[0] == "Adeva, Bernardo"
assert authors[343] == "Le Gac, Renaud"
assert len(authors) == 704
assert authors[0] == "Aaij, Roel"
assert authors[1] == "Adeva, Bernardo"
assert authors[344] == "Le Gac, Renaud"
assert authors[-1] == "Zvyagin, Alexander"
......@@ -58,16 +70,6 @@ def test_id(record):
assert record.id() == "1319638"
def test_institutes(record):
institutes = record.institutes()
assert institutes[0] == "AGH-UST, Cracow"
assert institutes[44] == "MIT"
assert institutes[-1] == "Zurich U."
assert record.is_affiliation_for_all() == True
def test_is_article(record):
assert isinstance(record, RecordPubli)
assert record.is_published() == True
......
......@@ -26,8 +26,14 @@ def record():
return load_record('cds.cern.ch', 1411352)
def test_affiliations(record):
assert record.is_affiliations() == True
assert record.is_affiliation_for_all() == True
assert record.institutes() == ['Marseille, CPPM']
def test_authors(record):
assert record.authors() == ""
assert record.authors() == "Leroy, Olivier"
def test_collaboration(record):
......@@ -70,7 +76,7 @@ def test_first_author(record):
assert record.first_author() == "Leroy, Olivier"
def test_first_institutes(record):
def test_first_authors_institutes(record):
assert record.first_author_institutes() == "Marseille, CPPM"
......@@ -82,10 +88,6 @@ def test_id(record):
assert record.id() == "1411352"
def test_institutes(record):
assert record.institutes() == []
assert record.is_affiliation_for_all() == False
def test_is_proceeding(record):
assert isinstance(record, RecordConf)
assert record.is_published() == True
......
......@@ -28,8 +28,15 @@ def record():
return load_record('inspirehep.net', 1276938)
def test_affiliations(record):
assert record.is_affiliations() == False
assert record.is_affiliation_for_all() == False
assert record.institutes() == []
def test_authors(record):
assert record.authors() == ""
assert record.is_authors() == True
assert record.authors() == "Hubaut, F."
def test_collaboration(record):
......@@ -84,13 +91,6 @@ def test_id(record):
assert record.id() == "1276938"
def test_institutes(record):
institutes = record.institutes()
assert record.institutes() == []
assert record.is_affiliation_for_all() == False
def test_is_proceeding(record):
assert isinstance(record, RecordConf)
assert record.is_published() == False
......
......@@ -24,8 +24,14 @@ def record():
return load_record('cds.cern.ch', 1550918)
def test_affiliations(record):
assert record.is_affiliations() == True
assert record.is_affiliation_for_all() == True
assert record.institutes() == [u'CPPM, Aix-Marseille Université, CNRS/IN2P3, Marseille, France']
def test_authors(record):
assert record.authors() == ""
assert record.authors() == "Le Gac, R"
def test_collaboration(record):
......@@ -68,7 +74,7 @@ def test_first_author(record):
assert record.first_author() == "Le Gac, R"
def test_first_institutes(record):
def test_first_author_institutes(record):
assert record.first_author_institutes() == u"CPPM, Aix-Marseille Université, CNRS/IN2P3, Marseille, France"
......@@ -80,13 +86,6 @@ def test_id(record):
assert record.id() == "1550918"
def test_institutes(record):
institutes = record.institutes()
assert record.institutes() == []
assert record.is_affiliation_for_all() == False
def test_is_proceeding(record):
assert isinstance(record, RecordConf)
assert record.is_published() == False
......
......@@ -28,8 +28,18 @@ def record():
return load_record('cds.cern.ch', 1632177)
def test_affiliations(record):
assert record.is_affiliations() == True
assert record.is_affiliation_for_all() == True
institutes = record.institutes()
assert institutes[0] == "Marseille, CPPM"
assert institutes[1] == "Shandong U."
def test_authors(record):
assert record.authors() == ""
assert record.authors() == "Chen, Liming"
def test_collaboration(record):
......@@ -52,15 +62,6 @@ def test_id(record):
assert record.id() == "1632177"
def test_institutes(record):
institutes = record.institutes()
assert institutes[0] == "Marseille, CPPM"
assert institutes[1] == "Shandong U."
assert record.is_affiliation_for_all() == True
def test_is_phd(record):
assert isinstance(record, RecordThesis)
assert record.is_published() == False
......
......@@ -12,8 +12,8 @@ def test_protection_authors_as_list():
"""[u'Zuniga, J.', u'(the A.N.T.ARES. Collaboration)']"""
record = load_record('inspirehep.net', 1386663)
authors = record.authors_as_list()
assert len(authors) == 139
assert authors[128] == u"Zuniga, J."
assert len(authors) == 140
assert authors[129] == u"Zuniga, J."
def test_protection_oai():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment