Commit 5db30cca authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Update to pass all invenio_tools tests.

parent c4bad634
...@@ -87,9 +87,6 @@ class InvenioStore(object): ...@@ -87,9 +87,6 @@ class InvenioStore(object):
raise CdsException(str(he)) raise CdsException(str(he))
r.encoding = "utf-8" r.encoding = "utf-8"
print()
print(200, r.text)
return r.text return r.text
def get_ids(self, **kwargs): def get_ids(self, **kwargs):
......
...@@ -134,7 +134,7 @@ class RecordConf(RecordPubli): ...@@ -134,7 +134,7 @@ class RecordConf(RecordPubli):
is not defined. is not defined.
""" """
li = self._get("8564", "", force_list=True) li = self._get("8564", "u", force_list=True)
# protection # protection
# from time to time this field contains the reference to the pdf file # from time to time this field contains the reference to the pdf file
......
...@@ -69,7 +69,7 @@ class RecordInst(Record): ...@@ -69,7 +69,7 @@ class RecordInst(Record):
it is not defined. it is not defined.
""" """
return self._get("110", "b") return self._get("110", "a")
def rex(self): def rex(self):
""" Regular expression to search authors affiliate to the institute. """ Regular expression to search authors affiliate to the institute.
......
...@@ -151,6 +151,9 @@ class RecordPubli(Record): ...@@ -151,6 +151,9 @@ class RecordPubli(Record):
# - "a": author name # - "a": author name
# - "e": phd director (equal to "dir.") # - "e": phd director (equal to "dir.")
# - "u": affiliation(s) # - "u": affiliation(s)
#
# NOTE
# 191119: PhD Director move in field 701
df = df.drop(columns.difference(["a", "e", "u"]), axis="columns") df = df.drop(columns.difference(["a", "e", "u"]), axis="columns")
# add columns first_name, last_name and fmt_name # add columns first_name, last_name and fmt_name
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
from .base import THESIS_DIR from .base import THESIS_DIR
from filters import CLEAN_THESIS_DEFENSE from filters import CLEAN_THESIS_DEFENSE
from .recordpubli import RecordPubli from .recordpubli import RecordPubli
from pandas import DataFrame
def is_thesis_dir(x): def is_thesis_dir(x):
...@@ -39,20 +40,21 @@ class RecordThesis(RecordPubli): ...@@ -39,20 +40,21 @@ class RecordThesis(RecordPubli):
list: the list is empty when authors are not defined. list: the list is empty when authors are not defined.
""" """
# for a thesis, the author field 700 contains names of author
# as well as directors. The latter have to be removed.
df = self["700"] df = self["700"]
query = df.e != THESIS_DIR # for a thesis, the author field 700 contains names of author
# as well as directors. The latter have to be removed.
if "e" in df.columns:
df = df[df.e != THESIS_DIR]
if sort: if sort:
li = (df.loc[query, ["last_name", "fmt_name"]] li = (df[["last_name", "fmt_name"]]
.sort_values(by="last_name") .sort_values(by="last_name")
.fmt_name .fmt_name
.tolist()) .tolist())
else: else:
li = (df.loc[query].fmt_name li = (df.fmt_name
.sort_index() .sort_index()
.tolist()) .tolist())
...@@ -100,9 +102,17 @@ class RecordThesis(RecordPubli): ...@@ -100,9 +102,17 @@ class RecordThesis(RecordPubli):
# for a thesis, the author field 700 field contains # for a thesis, the author field 700 field contains
# names of the director as well as the name of authors # names of the director as well as the name of authors
df = self["700"] df = self["700"]
if "e" in df.columns:
query = df.e == THESIS_DIR query = df.e == THESIS_DIR
df = df.loc[query] df = df.loc[query]
# discover in 2019, that director's name move in the field 701
elif "701" in self:
df = DataFrame(self["701"])
if "e" in df.columns:
query = df.e == THESIS_DIR
df = (df.loc[query]
.assign(fmt_name=lambda x: x.a))
return (sep.join(df.fmt_name) if len(df) > 0 else "") return (sep.join(df.fmt_name) if len(df) > 0 else "")
......
...@@ -21,16 +21,16 @@ def test_get_ids(): ...@@ -21,16 +21,16 @@ def test_get_ids():
rec_ids = store.get_ids(**kwargs) rec_ids = store.get_ids(**kwargs)
rec_ids.sort() rec_ids.sort()
ref_2015_ids = [ ref_2019_ids = [
1750838, 1755550, 1951383, 1951424, 1955544, 1966993, 1967222, 1750838, 1755550, 1951383, 1951424, 1955544, 1966993, 1967222,
1967422, 1968989, 1969197, 1970675, 1970690, 1972201, 1975522, 1967422, 1968989, 1969197, 1970675, 1970690, 1972201, 1975522,
1975714, 1978281, 1978798, 1981106, 1983198, 1987883, 1996441, 1975714, 1978281, 1978798, 1981106, 1983198, 1987883, 1996441,
2000543, 2002385, 2003252, 2003792, 2003793, 2003794, 2004586, 2002385, 2003252, 2003792, 2003793, 2003794, 2004586,
2004591, 2005510, 2007377, 2011387, 2012165, 2012990, 2014715, 2004591, 2005510, 2007377, 2011387, 2012165, 2012990, 2014715,
2014733, 2014836, 2016239, 2016711, 2019534, 2019536, 2020686, 2014733, 2014836, 2016239, 2016711, 2019534, 2019536, 2020686,
2021262, 2029609, 2029820, 2030417, 2033887, 2033891, 2038937, 2021262, 2029609, 2029820, 2030417, 2033887, 2033891, 2038937,
2040342, 2045144, 2047219, 2048426, 2048427, 2048812, 2049870, 2040342, 2045144, 2047219, 2048426, 2048427, 2048812, 2049870,
2055598, 2057916, 2059561, 2060452] 2055598, 2057916, 2059561, 2060452]
assert len(rec_ids) == 60 assert len(rec_ids) == 59
assert rec_ids == ref_2015_ids assert rec_ids == ref_2019_ids
...@@ -97,7 +97,7 @@ def test_id(record): ...@@ -97,7 +97,7 @@ def test_id(record):
def test_institutes(record): def test_institutes(record):
institutes = record.institutes() institutes = record.institutes()
assert len(institutes) == 89 assert len(institutes) == 90
assert institutes[0] == "AGH-UST, Cracow" assert institutes[0] == "AGH-UST, Cracow"
assert institutes[-1] == "Zurich U." assert institutes[-1] == "Zurich U."
...@@ -132,7 +132,7 @@ def test_submitted(record): ...@@ -132,7 +132,7 @@ def test_submitted(record):
def test_report_number(record): def test_report_number(record):
assert record.report_number() == "CERN-PH-EP-2014-221, LHCB-PAPER-2014-047" assert record.report_number() == "CERN-PH-EP-2014-221, CERN-PH-EP-2014-221, LHCB-PAPER-2014-047, LHCB-PAPER-2014-047"
def test_title(record): def test_title(record):
......
...@@ -96,8 +96,8 @@ def test_is_proceeding(record): ...@@ -96,8 +96,8 @@ def test_is_proceeding(record):
def test_oai(record): def test_oai(record):
assert record.oai() == "oai:inspirehep.net:1276938" assert record.oai() == "oai:inspirehep.net:1276938, oai:cds.cern.ch:1546370"
assert record.oai_url() == "http://inspirehep.net/record/1276938" assert record.oai_url() == "http://inspirehep.net/record/1276938, http://cds.cern.ch/record/1546370"
def test_paper_reference(record): def test_paper_reference(record):
...@@ -132,7 +132,7 @@ def test_reference_conference_talk(record): ...@@ -132,7 +132,7 @@ def test_reference_conference_talk(record):
def test_report_number(record): def test_report_number(record):
assert record.report_number() == "" assert record.report_number() == "ATL-PHYS-PROC-2013-108"
def test_submitted(record): def test_submitted(record):
......
...@@ -14,8 +14,8 @@ def institute(): ...@@ -14,8 +14,8 @@ def institute():
def test_name(institute): def test_name(institute):
assert institute.name() == '' assert institute.name() == "Laboratoire de l'Accélérateur Linéaire (LAL)"
def test_rex(institute): def test_rex(institute):
assert institute.rex() == r'Orsay, LAL|LAL, Orsay' assert institute.rex() == r"Orsay, LAL|LAL, Orsay|Laboratoire de l'Accélérateur Linéaire (LAL)"
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment