Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
limbra
limbra
Commits
aa2c3bed
Commit
aa2c3bed
authored
Jan 16, 2021
by
LE GAC Renaud
Browse files
Upgrade to handle thesis supervisor in RecordHepThesis and ReportCdsThesis
parent
7799693c
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
131 additions
and
77 deletions
+131
-77
modules/store_tools/authorsmixin.py
modules/store_tools/authorsmixin.py
+0
-2
modules/store_tools/recordcdspubli.py
modules/store_tools/recordcdspubli.py
+1
-3
modules/store_tools/recordcdsthesis.py
modules/store_tools/recordcdsthesis.py
+15
-13
modules/store_tools/recordheppubli.py
modules/store_tools/recordheppubli.py
+5
-8
modules/store_tools/recordhepthesis.py
modules/store_tools/recordhepthesis.py
+29
-49
tests/basis/test_09_RecordHepThesis.py
tests/basis/test_09_RecordHepThesis.py
+1
-2
tests/basis/test_18_check_and_fix_thesis_ins.py
tests/basis/test_18_check_and_fix_thesis_ins.py
+80
-0
No files found.
modules/store_tools/authorsmixin.py
View file @
aa2c3bed
...
...
@@ -72,8 +72,6 @@ class AuthorsMixin(object):
+---------------+--------------------------------+
| last_name | family name |
+---------------+--------------------------------+
| role | equal to dir. for phd director |
+---------------+--------------------------------+
"""
...
...
modules/store_tools/recordcdspubli.py
View file @
aa2c3bed
...
...
@@ -229,8 +229,6 @@ class RecordCdsPubli(dict, AuthorsMixin, PublicationInfoMixin):
+---------------+--------------------------------+
| last_name | family name |
+---------------+--------------------------------+
| relator_name | equal to dir. for phd director |
+---------------+--------------------------------+
Note:
After running this method, the attribute ``df_authors`` is defined.
...
...
@@ -252,7 +250,7 @@ class RecordCdsPubli(dict, AuthorsMixin, PublicationInfoMixin):
data
=
self
[
"authors"
]
data
=
(
data
if
isinstance
(
data
,
list
)
else
[
data
])
df
=
DataFrame
(
data
)
df
=
(
DataFrame
(
data
)
.
assign
(
role
=
""
))
# drop useless columns
refcols
=
[
"affiliation"
,
...
...
modules/store_tools/recordcdsthesis.py
View file @
aa2c3bed
...
...
@@ -155,27 +155,29 @@ class RecordCdsThesis(RecordCdsPubli):
"""
return
self
.
_get
(
"dissertation_note"
,
"diploma"
)
def
these_directors
(
self
,
sep
=
", "
):
def
these_directors
(
self
,
sep
=
",
"
,
fmt
=
"F. Last
"
):
"""The list of director(s)
Note:
Supervisor was defined in the field ``relator_name``
but is not available anymore. Therefore this method is a dummy one
which is kept to preserve the record interface.
Args:
sep (str):
separator between names
fmt (str):
format the name of supervisor
Possible values are "F. Last" and "Last F."
Returns:
str:
* Names are separated by the ``sep`` argument.
* Empty string when it is not defined.
"""
# for a thesis, the author field 700 field contains
# names of the director as well as the name of authors
df
=
self
.
df_authors
if
"relator_name"
in
df
:
query
=
df
.
relator_name
==
THESIS_DIR
df
=
df
.
loc
[
query
]
return
(
sep
.
join
(
df
.
fmt_name
)
if
len
(
df
)
>
0
else
""
)
else
:
return
""
return
""
def
these_universities
(
self
):
"""The university(ies) delivering the thesis diploma.
...
...
modules/store_tools/recordheppubli.py
View file @
aa2c3bed
...
...
@@ -89,8 +89,6 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin):
+---------------+--------------------------------+
| last_name | family name |
+---------------+--------------------------------+
| role | equal to dir. for phd director |
+---------------+--------------------------------+
Note:
After running this method, the attribute ``df_authors`` is defined.
...
...
@@ -107,8 +105,7 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin):
"first_name"
,
"fmt_name"
,
"full_name"
,
"last_name"
,
"role"
]
"last_name"
]
self
.
df_authors
=
DataFrame
([[
""
]
*
len
(
cols
)],
columns
=
cols
)
return
...
...
@@ -119,8 +116,9 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin):
if
"affiliations"
in
author
:
affiliations
=
[
elt
[
"value"
]
for
elt
in
author
[
"affiliations"
]]
role
=
\
(
author
[
"inspire_roles"
]
if
"inspire_roles"
in
author
else
[])
# remove thesis supervisor
if
len
(
author
.
get
(
"inspire_roles"
,
[]))
>
0
:
continue
full_name
=
author
[
"full_name"
]
idx
=
full_name
.
find
(
","
)
...
...
@@ -131,8 +129,7 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin):
"first_name"
:
first_name
.
strip
(),
"fmt_name"
:
full_name
,
"full_name"
:
full_name
,
"last_name"
:
last_name
.
strip
(),
"role"
:
", "
.
join
(
role
)}
"last_name"
:
last_name
.
strip
()}
data
.
append
(
dct
)
...
...
modules/store_tools/recordhepthesis.py
View file @
aa2c3bed
""" store_tools.recordhepthesis
"""
from
.authorsmixin
import
to_initial
from
.base
import
(
AFF_CPPM
,
MSG_WELL_FORMED_DATE
,
REG_DATE_YYYYMM
,
...
...
@@ -23,44 +24,6 @@ class RecordHepThesis(RecordHepPubli):
"""
def
authors_as_list
(
self
,
sort
=
False
):
"""The list of author(s) signing the publication.
Note:
supersede the base class since the authors field contains
the author as well as names of director.
Args:
sort (bool): sort authors by first name when true.
Returns:
list:
the list is empty when authors are not defined.
"""
# for a thesis, the authors field contains names of author
# as well as those of directors. The latter have to be removed.
df
=
self
.
df_authors
if
"role"
in
df
:
df
=
df
[
df
.
role
.
str
.
len
()
==
0
]
if
sort
:
li
=
(
df
[[
"last_name"
,
"fmt_name"
]]
.
sort_values
(
by
=
"last_name"
)
.
fmt_name
.
tolist
())
else
:
li
=
(
df
.
fmt_name
.
sort_index
()
.
tolist
())
if
len
(
li
)
==
1
and
li
[
0
]
==
""
:
li
=
[]
return
li
def
check_submitted_date
(
self
):
"""Check that submitted date is either ``YYYY-MM`` or ``YYYY-MM-DD``.
...
...
@@ -78,7 +41,7 @@ class RecordHepThesis(RecordHepPubli):
# recover by using the defense date
val
=
self
.
these_defense
()
if
REG_DATE_YYYYMM
.
match
(
val
):
self
[
"
thesis_info"
][
"defense
_date"
]
=
val
self
[
"
preprint
_date"
]
=
val
else
:
raise
CheckException
(
MSG_WELL_FORMED_DATE
)
...
...
@@ -138,27 +101,44 @@ class RecordHepThesis(RecordHepPubli):
"""
return
self
.
get
(
"thesis_info"
,
{}).
get
(
"degree_type"
,
""
)
def
these_directors
(
self
,
sep
=
", "
):
def
these_directors
(
self
,
sep
=
",
"
,
fmt
=
"F. Last
"
):
"""The list of director(s)
Args:
sep (str):
separator between names
fmt (str):
format the name of supervisor
Possible values are "F. Last" and "Last F."
Returns:
str:
* Names are separated by the ``sep`` argument.
* Empty string when it is not defined.
"""
# for a thesis, the author field 700 field contains
#
names of the director as well as the name of authors
df
=
self
.
df_
authors
lst
=
[]
#
supervisor are in the list of author with a role equal to supervisor
for
author
in
self
.
get
(
"
authors
"
,
[]):
if
"role"
in
df
:
query
=
df
.
role
.
str
.
len
()
>
0
df
=
df
.
loc
[
query
]
if
len
(
author
.
get
(
"inspire_roles"
,
[]))
==
0
:
continue
return
(
sep
.
join
(
df
.
fmt_name
)
if
len
(
df
)
>
0
else
"
"
)
full_name
=
author
.
get
(
"full_name
"
)
else
:
return
""
# name of supervisor is encoded ad "Last, First"
if
fmt
in
(
"F. Last"
,
"Last F."
):
last
,
first
=
full_name
.
split
(
","
)
first
=
to_initial
(
first
.
strip
())
full_name
=
(
f
"
{
last
}
{
first
}
"
if
fmt
==
"Last F."
else
f
"
{
first
}
{
last
}
"
)
lst
.
append
(
full_name
)
return
sep
.
join
(
lst
)
def
these_universities
(
self
):
"""The university(ies) delivering the thesis diploma.
...
...
tests/basis/test_09_RecordHepThesis.py
View file @
aa2c3bed
...
...
@@ -42,8 +42,7 @@ def test_these_level_ins_09003(record):
def
test_these_directors_ins_09004
(
record
):
assert
record
.
these_directors
(
sep
=
u
"|"
)
==
\
"He, Mao|Monnier, Emmanuel|Zhu, Chengguang"
assert
record
.
these_directors
(
sep
=
u
"|"
)
==
"M. He|E. Monnier|C. Zhu"
def
test_these_universities_ins_09005
(
record
):
...
...
tests/basis/test_18_check_and_fix_thesis_ins.py
0 → 100644
View file @
aa2c3bed
"""test_18_check_and_fix_thesis_ins
https://inspirehep.net/api/literature/10888032
(same as https://cds.cern.ch/record/1394605)
Test individual method of check and fix process for thesis:
* is with authors form my institute
* standardise name of collaboration
* format authors according to my format
* extract authors form my institute signing the publication
* is submitted date well formed
"""
import
pytest
from
gluon
import
current
from
harvest_tools
import
get_rex_institute
from
store_tools
import
load_record
@
pytest
.
fixture
(
scope
=
"module"
)
def
record
():
return
load_record
(
"inspirehep.net"
,
1088032
,
shelf
=
"literature"
)
def
test_subtype_18001
(
record
):
assert
record
.
subtype
()
==
"thesis"
# ............................................................................
#
# Check and fix implemented in the RecordCdsConf
#
def
test_check_authors_18010
(
record
):
# raise CheckException in case of problem
assert
record
.
check_authors
()
is
None
def
test_check_my_affiliation_18011
(
record
):
# raise CheckException in case of problem
rex_institute
=
get_rex_institute
(
current
.
db
,
current
.
app
)
assert
record
.
check_my_affiliation
(
rex_institute
)
is
None
def
test_check_collaboration_18012
(
record
):
assert
record
.
collaboration
()
==
""
record
.
check_collaboration
(
current
.
db
)
assert
record
.
collaboration
()
==
""
def
test_check_format_authors_18013
(
record
):
authors
=
record
.
authors_as_list
()
assert
len
(
authors
)
==
1
assert
authors
[
0
]
==
"Khanji, Basem"
record
.
check_format_authors
(
fmt
=
"F. Last"
)
authors
=
record
.
authors_as_list
()
assert
authors
[
0
]
==
"B. Khanji"
def
test_extract_my_authors_18014
(
record
):
# raise CheckException in case of problem
rex_institute
=
get_rex_institute
(
current
.
db
,
current
.
app
)
assert
record
.
my_authors
is
None
record
.
check_format_authors
(
fmt
=
"F. Last"
)
assert
record
.
extract_my_authors
(
rex_institute
,
sep
=
"|"
,
sort
=
True
)
is
None
my_authors
=
record
.
my_authors
assert
my_authors
==
"B. Khanji"
def
test_check_submitted_date_18015
(
record
):
# raise CheckException in case of problem
assert
record
.
submitted
()
==
""
record
.
check_submitted_date
()
assert
record
.
submitted
()
==
"2011-09-16"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment