Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
33a3b5ce
Commit
33a3b5ce
authored
Jan 27, 2020
by
LE GAC Renaud
Browse files
Restore recordthesis.py from branch 77-marc-to-json.
parent
97394346
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
58 deletions
+14
-58
modules/invenio_tools/recordthesis.py
modules/invenio_tools/recordthesis.py
+14
-58
No files found.
modules/invenio_tools/recordthesis.py
View file @
33a3b5ce
...
...
@@ -3,8 +3,7 @@
"""
from
.base
import
THESIS_DIR
from
filters
import
CLEAN_THESIS_DEFENSE
from
.recordpubli
import
format_names
,
RecordPubli
from
pandas
import
DataFrame
from
.recordpubli
import
RecordPubli
class
RecordThesis
(
RecordPubli
):
...
...
@@ -18,27 +17,6 @@ class RecordThesis(RecordPubli):
+-------------------+-----------------------------------+
"""
def
_process_authors
(
self
):
"""Process author and director names
"""
RecordPubli
.
_process_authors
(
self
)
# PhD directors
if
"701"
not
in
self
:
return
lst
=
(
self
[
"701"
]
if
isinstance
(
self
[
"701"
],
list
)
else
[
self
[
"701"
]])
df
=
DataFrame
(
lst
)
df1
=
df
.
a
.
str
.
split
(
","
,
expand
=
True
)
df
[[
"last_name"
,
"first_name"
]]
=
df1
[[
0
,
1
]]
df
.
first_name
=
df
.
first_name
.
str
.
strip
()
df
.
last_name
=
df
.
last_name
.
str
.
strip
()
df
[
"fmt_name"
]
=
df
.
a
self
[
"701"
]
=
df
def
authors_as_list
(
self
,
sort
=
False
):
"""The list of author(s) signing the publication.
...
...
@@ -56,18 +34,18 @@ class RecordThesis(RecordPubli):
"""
# for a thesis, the authors field contains names of author
# as well as those of directors. The latter have to be removed.
df
=
self
[
"authors"
]
df
=
self
[
u
"authors"
]
query
=
df
.
relator_name
!=
THESIS_DIR
if
sort
:
li
=
(
df
[
[
"last_name"
,
"fmt_name"
]]
li
=
(
df
.
loc
[
query
,
[
"last_name"
,
"fmt_name"
]]
.
sort_values
(
by
=
"last_name"
)
.
fmt_name
.
tolist
())
else
:
li
=
(
df
.
fmt_name
li
=
(
df
.
loc
[
query
].
fmt_name
.
sort_index
()
.
tolist
())
...
...
@@ -76,79 +54,57 @@ class RecordThesis(RecordPubli):
return
li
def
reformat_authors
(
self
,
fmt
=
"Last, First"
):
"""Reformat author and director names.
The default formatting for cds/invenio record is ``Last, First``.
Args:
fmt (str):
define the new format for author names.
Possible values are "First, Last", "F. Last", "Last",
"Last, First" and "Last F."
Raises:
RecordException: if fmt is not valid.
"""
RecordPubli
.
reformat_authors
(
self
,
fmt
)
if
"701"
not
in
self
:
return
self
[
"701"
]
=
format_names
(
self
[
"701"
],
fmt
)
def
these_defense
(
self
):
"""The defence date for a master/phd thesis.
Returns:
str
:
unicode
:
* The pattern is not standardise
and can varies between records and between stores.
* The filter CLEAN_THESIS_DEFENSE is applied.
"""
val
=
self
.
_get
(
"dissertation_note"
,
"defense_date"
)
val
=
self
.
_get
(
u
"dissertation_note"
,
u
"defense_date"
)
return
CLEAN_THESIS_DEFENSE
(
val
)
def
these_level
(
self
):
"""The level of the thesis.
Returns:
str
:
unicode
:
* The value is ``master`` or ``PhD``.
* The value is not standardise and can varies
between records and between stores.
* Empty string when not defined
"""
return
self
.
_get
(
"dissertation_note"
,
"diploma"
)
return
self
.
_get
(
u
"dissertation_note"
,
u
"diploma"
)
def
these_directors
(
self
,
sep
=
", "
):
def
these_directors
(
self
,
sep
=
u
", "
):
"""The list of director(s)
Returns:
str
:
unicode
:
* Names are separated by the ``sep`` argument.
* Empty string when it is not defined.
"""
# for a thesis, the author field 700 field contains
# names of the director as well as the name of authors
df
=
self
[
"authors"
]
df
=
self
[
u
"authors"
]
query
=
df
.
relator_name
==
THESIS_DIR
df
=
df
.
loc
[
query
]
return
(
sep
.
join
(
df
.
fmt_name
)
if
len
(
df
)
>
0
else
""
)
return
(
sep
.
join
(
df
.
fmt_name
)
if
len
(
df
)
>
0
else
u
""
)
def
these_universities
(
self
):
"""The university(ies) delivering the thesis diploma.
Returns:
str
:
unicode
:
- empty when university(ies) is not defined
- several univeristy are separated by ``&`` character.
"""
return
self
.
_get
(
"dissertation_note"
,
"university"
)
return
self
.
_get
(
u
"dissertation_note"
,
u
"university"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment