Commit aa2c3bed authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Upgrade to handle thesis supervisor in RecordHepThesis and ReportCdsThesis

parent 7799693c
......@@ -72,8 +72,6 @@ class AuthorsMixin(object):
+---------------+--------------------------------+
| last_name | family name |
+---------------+--------------------------------+
| role | equal to dir. for phd director |
+---------------+--------------------------------+
"""
......
......@@ -229,8 +229,6 @@ class RecordCdsPubli(dict, AuthorsMixin, PublicationInfoMixin):
+---------------+--------------------------------+
| last_name | family name |
+---------------+--------------------------------+
| relator_name | equal to dir. for phd director |
+---------------+--------------------------------+
Note:
After running this method, the attribute ``df_authors`` is defined.
......@@ -252,7 +250,7 @@ class RecordCdsPubli(dict, AuthorsMixin, PublicationInfoMixin):
data = self["authors"]
data = (data if isinstance(data, list) else [data])
df = DataFrame(data)
df = (DataFrame(data).assign(role=""))
# drop useless columns
refcols = ["affiliation",
......
......@@ -155,26 +155,28 @@ class RecordCdsThesis(RecordCdsPubli):
"""
return self._get("dissertation_note", "diploma")
def these_directors(self, sep=", "):
def these_directors(self, sep=", ", fmt="F. Last"):
"""The list of director(s)
Note:
Supervisor was defined in the field ``relator_name``
but is not available anymore. Therefore this method is a dummy one
which is kept to preserve the record interface.
Args:
sep (str):
separator between names
fmt (str):
format the name of supervisor
Possible values are "F. Last" and "Last F."
Returns:
str:
* Names are separated by the ``sep`` argument.
* Empty string when it is not defined.
"""
# for a thesis, the author field 700 field contains
# names of the director as well as the name of authors
df = self.df_authors
if "relator_name" in df:
query = df.relator_name == THESIS_DIR
df = df.loc[query]
return (sep.join(df.fmt_name) if len(df) > 0 else "")
else:
return ""
def these_universities(self):
......
......@@ -89,8 +89,6 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin):
+---------------+--------------------------------+
| last_name | family name |
+---------------+--------------------------------+
| role | equal to dir. for phd director |
+---------------+--------------------------------+
Note:
After running this method, the attribute ``df_authors`` is defined.
......@@ -107,8 +105,7 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin):
"first_name",
"fmt_name",
"full_name",
"last_name",
"role"]
"last_name"]
self.df_authors = DataFrame([[""] * len(cols)], columns=cols)
return
......@@ -119,8 +116,9 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin):
if "affiliations" in author:
affiliations = [elt["value"] for elt in author["affiliations"]]
role = \
(author["inspire_roles"] if "inspire_roles" in author else [])
# remove thesis supervisor
if len(author.get("inspire_roles", [])) > 0:
continue
full_name = author["full_name"]
idx = full_name.find(",")
......@@ -131,8 +129,7 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin):
"first_name": first_name.strip(),
"fmt_name": full_name,
"full_name": full_name,
"last_name": last_name.strip(),
"role": ", ".join(role)}
"last_name": last_name.strip()}
data.append(dct)
......
""" store_tools.recordhepthesis
"""
from .authorsmixin import to_initial
from .base import (AFF_CPPM,
MSG_WELL_FORMED_DATE,
REG_DATE_YYYYMM,
......@@ -23,44 +24,6 @@ class RecordHepThesis(RecordHepPubli):
"""
def authors_as_list(self, sort=False):
"""The list of author(s) signing the publication.
Note:
supersede the base class since the authors field contains
the author as well as names of director.
Args:
sort (bool): sort authors by first name when true.
Returns:
list:
the list is empty when authors are not defined.
"""
# for a thesis, the authors field contains names of author
# as well as those of directors. The latter have to be removed.
df = self.df_authors
if "role" in df:
df = df[df.role.str.len() == 0]
if sort:
li = (df[["last_name", "fmt_name"]]
.sort_values(by="last_name")
.fmt_name
.tolist())
else:
li = (df.fmt_name
.sort_index()
.tolist())
if len(li) == 1 and li[0] == "":
li = []
return li
def check_submitted_date(self):
"""Check that submitted date is either ``YYYY-MM`` or ``YYYY-MM-DD``.
......@@ -78,7 +41,7 @@ class RecordHepThesis(RecordHepPubli):
# recover by using the defense date
val = self.these_defense()
if REG_DATE_YYYYMM.match(val):
self["thesis_info"]["defense_date"] = val
self["preprint_date"] = val
else:
raise CheckException(MSG_WELL_FORMED_DATE)
......@@ -138,27 +101,44 @@ class RecordHepThesis(RecordHepPubli):
"""
return self.get("thesis_info", {}).get("degree_type", "")
def these_directors(self, sep=", "):
def these_directors(self, sep=", ", fmt="F. Last"):
"""The list of director(s)
Args:
sep (str):
separator between names
fmt (str):
format the name of supervisor
Possible values are "F. Last" and "Last F."
Returns:
str:
* Names are separated by the ``sep`` argument.
* Empty string when it is not defined.
"""
# for a thesis, the author field 700 field contains
# names of the director as well as the name of authors
df = self.df_authors
lst = []
# supervisor are in the list of author with a role equal to supervisor
for author in self.get("authors", []):
if "role" in df:
query = df.role.str.len() > 0
df = df.loc[query]
if len(author.get("inspire_roles", [])) == 0:
continue
return (sep.join(df.fmt_name) if len(df) > 0 else "")
full_name = author.get("full_name")
else:
return ""
# name of supervisor is encoded ad "Last, First"
if fmt in ("F. Last", "Last F."):
last, first = full_name.split(",")
first = to_initial(first.strip())
full_name = (
f"{last} {first}" if fmt == "Last F."
else f"{first} {last}")
lst.append(full_name)
return sep.join(lst)
def these_universities(self):
"""The university(ies) delivering the thesis diploma.
......
......@@ -42,8 +42,7 @@ def test_these_level_ins_09003(record):
def test_these_directors_ins_09004(record):
assert record.these_directors(sep=u"|") == \
"He, Mao|Monnier, Emmanuel|Zhu, Chengguang"
assert record.these_directors(sep=u"|") == "M. He|E. Monnier|C. Zhu"
def test_these_universities_ins_09005(record):
......
"""test_18_check_and_fix_thesis_ins
https://inspirehep.net/api/literature/10888032
(same as https://cds.cern.ch/record/1394605)
Test individual method of check and fix process for thesis:
* is with authors form my institute
* standardise name of collaboration
* format authors according to my format
* extract authors form my institute signing the publication
* is submitted date well formed
"""
import pytest
from gluon import current
from harvest_tools import get_rex_institute
from store_tools import load_record
@pytest.fixture(scope="module")
def record():
return load_record("inspirehep.net", 1088032, shelf="literature")
def test_subtype_18001(record):
assert record.subtype() == "thesis"
# ............................................................................
#
# Check and fix implemented in the RecordCdsConf
#
def test_check_authors_18010(record):
# raise CheckException in case of problem
assert record.check_authors() is None
def test_check_my_affiliation_18011(record):
# raise CheckException in case of problem
rex_institute = get_rex_institute(current.db, current.app)
assert record.check_my_affiliation(rex_institute) is None
def test_check_collaboration_18012(record):
assert record.collaboration() == ""
record.check_collaboration(current.db)
assert record.collaboration() == ""
def test_check_format_authors_18013(record):
authors = record.authors_as_list()
assert len(authors) == 1
assert authors[0] == "Khanji, Basem"
record.check_format_authors(fmt="F. Last")
authors = record.authors_as_list()
assert authors[0] == "B. Khanji"
def test_extract_my_authors_18014(record):
# raise CheckException in case of problem
rex_institute = get_rex_institute(current.db, current.app)
assert record.my_authors is None
record.check_format_authors(fmt="F. Last")
assert record.extract_my_authors(rex_institute, sep="|", sort=True) is None
my_authors = record.my_authors
assert my_authors == "B. Khanji"
def test_check_submitted_date_18015(record):
# raise CheckException in case of problem
assert record.submitted() == ""
record.check_submitted_date()
assert record.submitted() == "2011-09-16"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment