Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit aa2c3bed authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Upgrade to handle thesis supervisor in RecordHepThesis and ReportCdsThesis

parent 7799693c
......@@ -72,8 +72,6 @@ class AuthorsMixin(object):
+---------------+--------------------------------+
| last_name | family name |
+---------------+--------------------------------+
| role | equal to dir. for phd director |
+---------------+--------------------------------+
"""
......
......@@ -229,8 +229,6 @@ class RecordCdsPubli(dict, AuthorsMixin, PublicationInfoMixin):
+---------------+--------------------------------+
| last_name | family name |
+---------------+--------------------------------+
| relator_name | equal to dir. for phd director |
+---------------+--------------------------------+
Note:
After running this method, the attribute ``df_authors`` is defined.
......@@ -252,7 +250,7 @@ class RecordCdsPubli(dict, AuthorsMixin, PublicationInfoMixin):
data = self["authors"]
data = (data if isinstance(data, list) else [data])
df = DataFrame(data)
df = (DataFrame(data).assign(role=""))
# drop useless columns
refcols = ["affiliation",
......
......@@ -155,27 +155,29 @@ class RecordCdsThesis(RecordCdsPubli):
"""
return self._get("dissertation_note", "diploma")
def these_directors(self, sep=", "):
def these_directors(self, sep=", ", fmt="F. Last"):
"""The list of director(s)
Note:
Supervisor was defined in the field ``relator_name``
but is not available anymore. Therefore this method is a dummy one
which is kept to preserve the record interface.
Args:
sep (str):
separator between names
fmt (str):
format the name of supervisor
Possible values are "F. Last" and "Last F."
Returns:
str:
* Names are separated by the ``sep`` argument.
* Empty string when it is not defined.
"""
# for a thesis, the author field 700 field contains
# names of the director as well as the name of authors
df = self.df_authors
if "relator_name" in df:
query = df.relator_name == THESIS_DIR
df = df.loc[query]
return (sep.join(df.fmt_name) if len(df) > 0 else "")
else:
return ""
return ""
def these_universities(self):
"""The university(ies) delivering the thesis diploma.
......
......@@ -89,8 +89,6 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin):
+---------------+--------------------------------+
| last_name | family name |
+---------------+--------------------------------+
| role | equal to dir. for phd director |
+---------------+--------------------------------+
Note:
After running this method, the attribute ``df_authors`` is defined.
......@@ -107,8 +105,7 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin):
"first_name",
"fmt_name",
"full_name",
"last_name",
"role"]
"last_name"]
self.df_authors = DataFrame([[""] * len(cols)], columns=cols)
return
......@@ -119,8 +116,9 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin):
if "affiliations" in author:
affiliations = [elt["value"] for elt in author["affiliations"]]
role = \
(author["inspire_roles"] if "inspire_roles" in author else [])
# remove thesis supervisor
if len(author.get("inspire_roles", [])) > 0:
continue
full_name = author["full_name"]
idx = full_name.find(",")
......@@ -131,8 +129,7 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin):
"first_name": first_name.strip(),
"fmt_name": full_name,
"full_name": full_name,
"last_name": last_name.strip(),
"role": ", ".join(role)}
"last_name": last_name.strip()}
data.append(dct)
......
""" store_tools.recordhepthesis
"""
from .authorsmixin import to_initial
from .base import (AFF_CPPM,
MSG_WELL_FORMED_DATE,
REG_DATE_YYYYMM,
......@@ -23,44 +24,6 @@ class RecordHepThesis(RecordHepPubli):
"""
def authors_as_list(self, sort=False):
"""The list of author(s) signing the publication.
Note:
supersede the base class since the authors field contains
the author as well as names of director.
Args:
sort (bool): sort authors by first name when true.
Returns:
list:
the list is empty when authors are not defined.
"""
# for a thesis, the authors field contains names of author
# as well as those of directors. The latter have to be removed.
df = self.df_authors
if "role" in df:
df = df[df.role.str.len() == 0]
if sort:
li = (df[["last_name", "fmt_name"]]
.sort_values(by="last_name")
.fmt_name
.tolist())
else:
li = (df.fmt_name
.sort_index()
.tolist())
if len(li) == 1 and li[0] == "":
li = []
return li
def check_submitted_date(self):
"""Check that submitted date is either ``YYYY-MM`` or ``YYYY-MM-DD``.
......@@ -78,7 +41,7 @@ class RecordHepThesis(RecordHepPubli):
# recover by using the defense date
val = self.these_defense()
if REG_DATE_YYYYMM.match(val):
self["thesis_info"]["defense_date"] = val
self["preprint_date"] = val
else:
raise CheckException(MSG_WELL_FORMED_DATE)
......@@ -138,27 +101,44 @@ class RecordHepThesis(RecordHepPubli):
"""
return self.get("thesis_info", {}).get("degree_type", "")
def these_directors(self, sep=", "):
def these_directors(self, sep=", ", fmt="F. Last"):
"""The list of director(s)
Args:
sep (str):
separator between names
fmt (str):
format the name of supervisor
Possible values are "F. Last" and "Last F."
Returns:
str:
* Names are separated by the ``sep`` argument.
* Empty string when it is not defined.
"""
# for a thesis, the author field 700 field contains
# names of the director as well as the name of authors
df = self.df_authors
lst = []
# supervisor are in the list of author with a role equal to supervisor
for author in self.get("authors", []):
if "role" in df:
query = df.role.str.len() > 0
df = df.loc[query]
if len(author.get("inspire_roles", [])) == 0:
continue
return (sep.join(df.fmt_name) if len(df) > 0 else "")
full_name = author.get("full_name")
else:
return ""
# name of supervisor is encoded ad "Last, First"
if fmt in ("F. Last", "Last F."):
last, first = full_name.split(",")
first = to_initial(first.strip())
full_name = (
f"{last} {first}" if fmt == "Last F."
else f"{first} {last}")
lst.append(full_name)
return sep.join(lst)
def these_universities(self):
"""The university(ies) delivering the thesis diploma.
......
......@@ -42,8 +42,7 @@ def test_these_level_ins_09003(record):
def test_these_directors_ins_09004(record):
assert record.these_directors(sep=u"|") == \
"He, Mao|Monnier, Emmanuel|Zhu, Chengguang"
assert record.these_directors(sep=u"|") == "M. He|E. Monnier|C. Zhu"
def test_these_universities_ins_09005(record):
......
"""test_18_check_and_fix_thesis_ins
https://inspirehep.net/api/literature/10888032
(same as https://cds.cern.ch/record/1394605)
Test individual method of check and fix process for thesis:
* is with authors form my institute
* standardise name of collaboration
* format authors according to my format
* extract authors form my institute signing the publication
* is submitted date well formed
"""
import pytest
from gluon import current
from harvest_tools import get_rex_institute
from store_tools import load_record
@pytest.fixture(scope="module")
def record():
return load_record("inspirehep.net", 1088032, shelf="literature")
def test_subtype_18001(record):
assert record.subtype() == "thesis"
# ............................................................................
#
# Check and fix implemented in the RecordCdsConf
#
def test_check_authors_18010(record):
# raise CheckException in case of problem
assert record.check_authors() is None
def test_check_my_affiliation_18011(record):
# raise CheckException in case of problem
rex_institute = get_rex_institute(current.db, current.app)
assert record.check_my_affiliation(rex_institute) is None
def test_check_collaboration_18012(record):
assert record.collaboration() == ""
record.check_collaboration(current.db)
assert record.collaboration() == ""
def test_check_format_authors_18013(record):
authors = record.authors_as_list()
assert len(authors) == 1
assert authors[0] == "Khanji, Basem"
record.check_format_authors(fmt="F. Last")
authors = record.authors_as_list()
assert authors[0] == "B. Khanji"
def test_extract_my_authors_18014(record):
# raise CheckException in case of problem
rex_institute = get_rex_institute(current.db, current.app)
assert record.my_authors is None
record.check_format_authors(fmt="F. Last")
assert record.extract_my_authors(rex_institute, sep="|", sort=True) is None
my_authors = record.my_authors
assert my_authors == "B. Khanji"
def test_check_submitted_date_18015(record):
# raise CheckException in case of problem
assert record.submitted() == ""
record.check_submitted_date()
assert record.submitted() == "2011-09-16"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment