Commit 33a3b5ce authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Restore recordthesis.py from branch 77-marc-to-json.

parent 97394346
......@@ -3,8 +3,7 @@
"""
from .base import THESIS_DIR
from filters import CLEAN_THESIS_DEFENSE
from .recordpubli import format_names, RecordPubli
from pandas import DataFrame
from .recordpubli import RecordPubli
class RecordThesis(RecordPubli):
......@@ -18,27 +17,6 @@ class RecordThesis(RecordPubli):
+-------------------+-----------------------------------+
"""
def _process_authors(self):
"""Process author and director names
"""
RecordPubli._process_authors(self)
# PhD directors
if "701" not in self:
return
lst = (self["701"] if isinstance(self["701"], list) else [self["701"]])
df = DataFrame(lst)
df1 = df.a.str.split(",", expand=True)
df[["last_name", "first_name"]] = df1[[0, 1]]
df.first_name = df.first_name.str.strip()
df.last_name = df.last_name.str.strip()
df["fmt_name"] = df.a
self["701"] = df
def authors_as_list(self, sort=False):
"""The list of author(s) signing the publication.
......@@ -56,18 +34,18 @@ class RecordThesis(RecordPubli):
"""
# for a thesis, the authors field contains names of author
# as well as those of directors. The latter have to be removed.
df = self["authors"]
df = self[u"authors"]
query = df.relator_name != THESIS_DIR
if sort:
li = (df[["last_name", "fmt_name"]]
li = (df.loc[query, ["last_name", "fmt_name"]]
.sort_values(by="last_name")
.fmt_name
.tolist())
else:
li = (df.fmt_name
li = (df.loc[query].fmt_name
.sort_index()
.tolist())
......@@ -76,79 +54,57 @@ class RecordThesis(RecordPubli):
return li
def reformat_authors(self, fmt="Last, First"):
"""Reformat author and director names.
The default formatting for cds/invenio record is ``Last, First``.
Args:
fmt (str):
define the new format for author names.
Possible values are "First, Last", "F. Last", "Last",
"Last, First" and "Last F."
Raises:
RecordException: if fmt is not valid.
"""
RecordPubli.reformat_authors(self, fmt)
if "701" not in self:
return
self["701"] = format_names(self["701"], fmt)
def these_defense(self):
"""The defence date for a master/phd thesis.
Returns:
str:
unicode:
* The pattern is not standardise
and can varies between records and between stores.
* The filter CLEAN_THESIS_DEFENSE is applied.
"""
val = self._get("dissertation_note", "defense_date")
val = self._get(u"dissertation_note", u"defense_date")
return CLEAN_THESIS_DEFENSE(val)
def these_level(self):
"""The level of the thesis.
Returns:
str:
unicode:
* The value is ``master`` or ``PhD``.
* The value is not standardise and can varies
between records and between stores.
* Empty string when not defined
"""
return self._get("dissertation_note", "diploma")
return self._get(u"dissertation_note", u"diploma")
def these_directors(self, sep=", "):
def these_directors(self, sep=u", "):
"""The list of director(s)
Returns:
str:
unicode:
* Names are separated by the ``sep`` argument.
* Empty string when it is not defined.
"""
# for a thesis, the author field 700 field contains
# names of the director as well as the name of authors
df = self["authors"]
df = self[u"authors"]
query = df.relator_name == THESIS_DIR
df = df.loc[query]
return (sep.join(df.fmt_name) if len(df) > 0 else "")
return (sep.join(df.fmt_name) if len(df) > 0 else u"")
def these_universities(self):
"""The university(ies) delivering the thesis diploma.
Returns:
str:
unicode:
- empty when university(ies) is not defined
- several univeristy are separated by ``&`` character.
"""
return self._get("dissertation_note", "university")
return self._get(u"dissertation_note", u"university")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment