Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
8b502fef
Commit
8b502fef
authored
Jan 07, 2021
by
LE GAC Renaud
Browse files
Update to add df_authors as attribute of the record
parent
343ca323
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
39 additions
and
36 deletions
+39
-36
modules/store_tools/pluginauthors.py
modules/store_tools/pluginauthors.py
+33
-30
modules/store_tools/recordheppubli.py
modules/store_tools/recordheppubli.py
+2
-2
modules/store_tools/recordpubli.py
modules/store_tools/recordpubli.py
+2
-2
tests/basis/test_04_RecordPubli.py
tests/basis/test_04_RecordPubli.py
+1
-1
tests/basis/test_05_RecordHepPubli.py
tests/basis/test_05_RecordHepPubli.py
+1
-1
No files found.
modules/store_tools/pluginauthors.py
View file @
8b502fef
...
@@ -52,25 +52,28 @@ def to_initial(name):
...
@@ -52,25 +52,28 @@ def to_initial(name):
class
PluginAuthors
(
object
):
class
PluginAuthors
(
object
):
"""Plugin to handle authors in RecordPubli and RecordHepPubli
"""Plugin to handle authors in RecordPubli and RecordHepPubli
Authors and their affiliations are stored in DataFrame with the
Attributes:
following structure:
df_authors (pandas.DataFrame):
+---------------+--------------------------------+
Authors and their affiliations are stored in DataFrame with the
| column | |
following structure:
+===============+================================+
| affiliation | value separated by "|" |
+---------------+--------------------------------+
+---------------+--------------------------------+
| column | |
| first_name | first name |
+===============+================================+
+---------------+--------------------------------+
| affiliation | value separated by "|" |
| fmt_name | formated name |
+---------------+--------------------------------+
+---------------+--------------------------------+
| first_name | first name |
| full_name | Last, First |
+---------------+--------------------------------+
+---------------+--------------------------------+
| fmt_name | formated name |
| last_name | family name |
+---------------+--------------------------------+
+---------------+--------------------------------+
| full_name | Last, First |
+---------------+--------------------------------+
The DataFrame has to be build in the parent
| last_name | family name |
It is retrieved via self["authors"]
+---------------+--------------------------------+
The DataFrame has to be build in the parent
It is retrieved via self.df_authors
"""
"""
...
@@ -108,7 +111,7 @@ class PluginAuthors(object):
...
@@ -108,7 +111,7 @@ class PluginAuthors(object):
* the list is empty when authors are not defined.
* the list is empty when authors are not defined.
"""
"""
df
=
self
[
"
authors
"
]
df
=
self
.
df_
authors
if
sort
:
if
sort
:
li
=
(
df
[[
"last_name"
,
"fmt_name"
]]
li
=
(
df
[[
"last_name"
,
"fmt_name"
]]
...
@@ -142,7 +145,7 @@ class PluginAuthors(object):
...
@@ -142,7 +145,7 @@ class PluginAuthors(object):
- empty string when nothing is found.
- empty string when nothing is found.
"""
"""
df
=
self
[
"
authors
"
]
df
=
self
.
df_
authors
query
=
df
.
affiliation
.
str
.
match
(
pattern
)
query
=
df
.
affiliation
.
str
.
match
(
pattern
)
data
=
df
[
query
]
data
=
df
[
query
]
...
@@ -174,7 +177,7 @@ class PluginAuthors(object):
...
@@ -174,7 +177,7 @@ class PluginAuthors(object):
* The string is empty when nothing is found.
* The string is empty when nothing is found.
"""
"""
df
=
self
[
"
authors
"
]
df
=
self
.
df_
authors
query
=
df
.
fmt_name
.
str
.
contains
(
pattern
)
query
=
df
.
fmt_name
.
str
.
contains
(
pattern
)
...
@@ -213,7 +216,7 @@ class PluginAuthors(object):
...
@@ -213,7 +216,7 @@ class PluginAuthors(object):
* Empty string when authors are not found.
* Empty string when authors are not found.
"""
"""
df
=
self
[
"
authors
"
]
df
=
self
.
df_
authors
query
=
df
.
affiliation
.
str
.
contains
(
pattern
)
query
=
df
.
affiliation
.
str
.
contains
(
pattern
)
...
@@ -237,7 +240,7 @@ class PluginAuthors(object):
...
@@ -237,7 +240,7 @@ class PluginAuthors(object):
empty string when the first author is not defined.
empty string when the first author is not defined.
"""
"""
return
self
[
"
authors
"
]
.
fmt_name
.
iloc
[
0
]
return
self
.
df_
authors
.
fmt_name
.
iloc
[
0
]
def
first_author_institutes
(
self
):
def
first_author_institutes
(
self
):
"""The institute(s) associated to the first author.
"""The institute(s) associated to the first author.
...
@@ -248,7 +251,7 @@ class PluginAuthors(object):
...
@@ -248,7 +251,7 @@ class PluginAuthors(object):
- The string is empty when institutes are not defined.
- The string is empty when institutes are not defined.
"""
"""
val
=
self
[
"
authors
"
]
.
affiliation
.
iloc
[
0
]
val
=
self
.
df_
authors
.
affiliation
.
iloc
[
0
]
return
(
""
if
val
==
NaN
else
val
)
return
(
""
if
val
==
NaN
else
val
)
def
institutes
(
self
):
def
institutes
(
self
):
...
@@ -259,7 +262,7 @@ class PluginAuthors(object):
...
@@ -259,7 +262,7 @@ class PluginAuthors(object):
the list is sort in alphabetic order.
the list is sort in alphabetic order.
"""
"""
df
=
self
[
"
authors
"
]
df
=
self
.
df_
authors
# expand multi-affiliation (one per column)
# expand multi-affiliation (one per column)
df
=
df
.
affiliation
.
str
.
split
(
"|"
,
expand
=
True
)
df
=
df
.
affiliation
.
str
.
split
(
"|"
,
expand
=
True
)
...
@@ -285,7 +288,7 @@ class PluginAuthors(object):
...
@@ -285,7 +288,7 @@ class PluginAuthors(object):
bool:
bool:
"""
"""
df
=
self
[
"
authors
"
]
df
=
self
.
df_
authors
if
len
(
df
)
==
1
and
df
.
affiliation
.
iloc
[
0
]
==
""
:
if
len
(
df
)
==
1
and
df
.
affiliation
.
iloc
[
0
]
==
""
:
return
False
return
False
...
@@ -299,7 +302,7 @@ class PluginAuthors(object):
...
@@ -299,7 +302,7 @@ class PluginAuthors(object):
bool:
bool:
"""
"""
df
=
self
[
"
authors
"
]
df
=
self
.
df_
authors
query
=
df
.
affiliation
.
isin
([
""
,
NaN
])
query
=
df
.
affiliation
.
isin
([
""
,
NaN
])
return
df
.
affiliation
[
query
].
size
==
0
return
df
.
affiliation
[
query
].
size
==
0
...
@@ -311,7 +314,7 @@ class PluginAuthors(object):
...
@@ -311,7 +314,7 @@ class PluginAuthors(object):
bool:
bool:
"""
"""
df
=
self
[
"
authors
"
]
df
=
self
.
df_
authors
cols
=
{
"first_name"
,
"full_name"
,
"last_name"
}
cols
=
{
"first_name"
,
"full_name"
,
"last_name"
}
if
len
(
df
.
columns
.
intersection
(
cols
))
!=
3
:
if
len
(
df
.
columns
.
intersection
(
cols
))
!=
3
:
...
@@ -346,7 +349,7 @@ class PluginAuthors(object):
...
@@ -346,7 +349,7 @@ class PluginAuthors(object):
self
.
_last_fmt_author
=
fmt
self
.
_last_fmt_author
=
fmt
df
=
self
[
"
authors
"
]
df
=
self
.
df_
authors
# ....................................................................
# ....................................................................
#
#
...
...
modules/store_tools/recordheppubli.py
View file @
8b502fef
...
@@ -60,7 +60,7 @@ class RecordHepPubli(RecordHep, PluginAuthors, PluginPublicationInfo):
...
@@ -60,7 +60,7 @@ class RecordHepPubli(RecordHep, PluginAuthors, PluginPublicationInfo):
"fmt_name"
,
"fmt_name"
,
"full_name"
,
"full_name"
,
"last_name"
]
"last_name"
]
self
[
"
authors
"
]
=
DataFrame
([[
""
]
*
len
(
cols
)],
columns
=
cols
)
self
.
df_
authors
=
DataFrame
([[
""
]
*
len
(
cols
)],
columns
=
cols
)
return
return
data
=
[]
data
=
[]
...
@@ -88,7 +88,7 @@ class RecordHepPubli(RecordHep, PluginAuthors, PluginPublicationInfo):
...
@@ -88,7 +88,7 @@ class RecordHepPubli(RecordHep, PluginAuthors, PluginPublicationInfo):
df
=
df
.
drop_duplicates
(
"full_name"
)
df
=
df
.
drop_duplicates
(
"full_name"
)
# replace
# replace
self
[
"
authors
"
]
=
df
self
.
df_
authors
=
df
def
_process_publication_info
(
self
):
def
_process_publication_info
(
self
):
"""Convert publication_info into DataFrame:
"""Convert publication_info into DataFrame:
...
...
modules/store_tools/recordpubli.py
View file @
8b502fef
...
@@ -161,7 +161,7 @@ class RecordPubli(Record, PluginAuthors, PluginPublicationInfo):
...
@@ -161,7 +161,7 @@ class RecordPubli(Record, PluginAuthors, PluginPublicationInfo):
"fmt_name"
,
"fmt_name"
,
"full_name"
,
"full_name"
,
"last_name"
]
"last_name"
]
self
[
"
authors
"
]
=
DataFrame
([[
""
]
*
len
(
cols
)],
columns
=
cols
)
self
.
df_
authors
=
DataFrame
([[
""
]
*
len
(
cols
)],
columns
=
cols
)
return
return
data
=
self
[
"authors"
]
data
=
self
[
"authors"
]
...
@@ -197,7 +197,7 @@ class RecordPubli(Record, PluginAuthors, PluginPublicationInfo):
...
@@ -197,7 +197,7 @@ class RecordPubli(Record, PluginAuthors, PluginPublicationInfo):
df
[
"fmt_name"
]
=
df
.
full_name
df
[
"fmt_name"
]
=
df
.
full_name
# replace
# replace
self
[
"
authors
"
]
=
df
self
.
df_
authors
=
df
def
_process_publication_info
(
self
):
def
_process_publication_info
(
self
):
"""Convert publication_info into DataFrame:
"""Convert publication_info into DataFrame:
...
...
tests/basis/test_04_RecordPubli.py
View file @
8b502fef
...
@@ -39,7 +39,7 @@ def test_constructor_cds_04002(record):
...
@@ -39,7 +39,7 @@ def test_constructor_cds_04002(record):
"""test the method _process_authors and _process_publication_info.
"""test the method _process_authors and _process_publication_info.
"""
"""
authors
=
record
[
"
authors
"
]
authors
=
record
.
df_
authors
assert
isinstance
(
authors
,
pd
.
DataFrame
)
assert
isinstance
(
authors
,
pd
.
DataFrame
)
refcols
=
[
"affiliation"
,
refcols
=
[
"affiliation"
,
...
...
tests/basis/test_05_RecordHepPubli.py
View file @
8b502fef
...
@@ -39,7 +39,7 @@ def test_constructor_ins_05002(record):
...
@@ -39,7 +39,7 @@ def test_constructor_ins_05002(record):
"""test the method _process_authors and _process_publication_info.
"""test the method _process_authors and _process_publication_info.
"""
"""
authors
=
record
[
"
authors
"
]
authors
=
record
.
df_
authors
assert
isinstance
(
authors
,
pd
.
DataFrame
)
assert
isinstance
(
authors
,
pd
.
DataFrame
)
refcols
=
[
"affiliation"
,
refcols
=
[
"affiliation"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment