Commit 5db30cca authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Update to pass all invenio_tools tests.

parent c4bad634
......@@ -87,9 +87,6 @@ class InvenioStore(object):
raise CdsException(str(he))
r.encoding = "utf-8"
print()
print(200, r.text)
return r.text
def get_ids(self, **kwargs):
......
......@@ -134,7 +134,7 @@ class RecordConf(RecordPubli):
is not defined.
"""
li = self._get("8564", "", force_list=True)
li = self._get("8564", "u", force_list=True)
# protection
# from time to time this field contains the reference to the pdf file
......
......@@ -69,7 +69,7 @@ class RecordInst(Record):
it is not defined.
"""
return self._get("110", "b")
return self._get("110", "a")
def rex(self):
""" Regular expression to search authors affiliate to the institute.
......
......@@ -151,6 +151,9 @@ class RecordPubli(Record):
# - "a": author name
# - "e": phd director (equal to "dir.")
# - "u": affiliation(s)
#
# NOTE
# 191119: PhD Director move in field 701
df = df.drop(columns.difference(["a", "e", "u"]), axis="columns")
# add columns first_name, last_name and fmt_name
......
......@@ -4,6 +4,7 @@
from .base import THESIS_DIR
from filters import CLEAN_THESIS_DEFENSE
from .recordpubli import RecordPubli
from pandas import DataFrame
def is_thesis_dir(x):
......@@ -39,20 +40,21 @@ class RecordThesis(RecordPubli):
list: the list is empty when authors are not defined.
"""
# for a thesis, the author field 700 contains names of author
# as well as directors. The latter have to be removed.
df = self["700"]
query = df.e != THESIS_DIR
# for a thesis, the author field 700 contains names of author
# as well as directors. The latter have to be removed.
if "e" in df.columns:
df = df[df.e != THESIS_DIR]
if sort:
li = (df.loc[query, ["last_name", "fmt_name"]]
li = (df[["last_name", "fmt_name"]]
.sort_values(by="last_name")
.fmt_name
.tolist())
else:
li = (df.loc[query].fmt_name
li = (df.fmt_name
.sort_index()
.tolist())
......@@ -100,9 +102,17 @@ class RecordThesis(RecordPubli):
# for a thesis, the author field 700 field contains
# names of the director as well as the name of authors
df = self["700"]
query = df.e == THESIS_DIR
df = df.loc[query]
if "e" in df.columns:
query = df.e == THESIS_DIR
df = df.loc[query]
# discover in 2019, that director's name move in the field 701
elif "701" in self:
df = DataFrame(self["701"])
if "e" in df.columns:
query = df.e == THESIS_DIR
df = (df.loc[query]
.assign(fmt_name=lambda x: x.a))
return (sep.join(df.fmt_name) if len(df) > 0 else "")
......
......@@ -21,16 +21,16 @@ def test_get_ids():
rec_ids = store.get_ids(**kwargs)
rec_ids.sort()
ref_2015_ids = [
ref_2019_ids = [
1750838, 1755550, 1951383, 1951424, 1955544, 1966993, 1967222,
1967422, 1968989, 1969197, 1970675, 1970690, 1972201, 1975522,
1975714, 1978281, 1978798, 1981106, 1983198, 1987883, 1996441,
2000543, 2002385, 2003252, 2003792, 2003793, 2003794, 2004586,
2002385, 2003252, 2003792, 2003793, 2003794, 2004586,
2004591, 2005510, 2007377, 2011387, 2012165, 2012990, 2014715,
2014733, 2014836, 2016239, 2016711, 2019534, 2019536, 2020686,
2021262, 2029609, 2029820, 2030417, 2033887, 2033891, 2038937,
2040342, 2045144, 2047219, 2048426, 2048427, 2048812, 2049870,
2055598, 2057916, 2059561, 2060452]
assert len(rec_ids) == 60
assert rec_ids == ref_2015_ids
assert len(rec_ids) == 59
assert rec_ids == ref_2019_ids
......@@ -97,7 +97,7 @@ def test_id(record):
def test_institutes(record):
institutes = record.institutes()
assert len(institutes) == 89
assert len(institutes) == 90
assert institutes[0] == "AGH-UST, Cracow"
assert institutes[-1] == "Zurich U."
......@@ -132,7 +132,7 @@ def test_submitted(record):
def test_report_number(record):
assert record.report_number() == "CERN-PH-EP-2014-221, LHCB-PAPER-2014-047"
assert record.report_number() == "CERN-PH-EP-2014-221, CERN-PH-EP-2014-221, LHCB-PAPER-2014-047, LHCB-PAPER-2014-047"
def test_title(record):
......
......@@ -96,8 +96,8 @@ def test_is_proceeding(record):
def test_oai(record):
assert record.oai() == "oai:inspirehep.net:1276938"
assert record.oai_url() == "http://inspirehep.net/record/1276938"
assert record.oai() == "oai:inspirehep.net:1276938, oai:cds.cern.ch:1546370"
assert record.oai_url() == "http://inspirehep.net/record/1276938, http://cds.cern.ch/record/1546370"
def test_paper_reference(record):
......@@ -132,7 +132,7 @@ def test_reference_conference_talk(record):
def test_report_number(record):
assert record.report_number() == ""
assert record.report_number() == "ATL-PHYS-PROC-2013-108"
def test_submitted(record):
......
......@@ -14,8 +14,8 @@ def institute():
def test_name(institute):
assert institute.name() == ''
assert institute.name() == "Laboratoire de l'Accélérateur Linéaire (LAL)"
def test_rex(institute):
assert institute.rex() == r'Orsay, LAL|LAL, Orsay'
\ No newline at end of file
assert institute.rex() == r"Orsay, LAL|LAL, Orsay|Laboratoire de l'Accélérateur Linéaire (LAL)"
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment