Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
3dda33aa
Commit
3dda33aa
authored
Jan 11, 2021
by
LE GAC Renaud
Browse files
Improve RecordHepPubli._process_publication_info
parent
6bc19341
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
44 additions
and
5 deletions
+44
-5
modules/store_tools/recordhep.py
modules/store_tools/recordhep.py
+3
-0
modules/store_tools/recordheppubli.py
modules/store_tools/recordheppubli.py
+41
-5
No files found.
modules/store_tools/recordhep.py
View file @
3dda33aa
"""recordhep.py
"""
import
logging
import
pprint
...
...
@@ -20,6 +21,8 @@ class RecordHep(dict):
super
().
__init__
(
recjson
)
self
.
logger
=
logging
.
getLogger
(
"web2py.app.limbra"
)
# meta data
# the authors of my institutes signing the record
# string containing a list of name separated by a comma
...
...
modules/store_tools/recordheppubli.py
View file @
3dda33aa
...
...
@@ -8,6 +8,37 @@ from store_tools.pluginauthors import PluginAuthors
from
store_tools.pluginpublicationinfo
import
PluginPublicationInfo
def
pages
(
row
):
"""Help function to build the pages argument
Args:
row (pandas.Series):
* artid (str)
* page_start (int)
* page_end (int)
Return
str:
* either 23 or 23-45
* empty string when information is missing
"""
artid
=
row
.
get
(
"artid"
,
None
)
pstart
=
row
.
get
(
"page_start"
,
None
)
pend
=
row
.
get
(
"page_end"
,
None
)
if
pstart
is
None
and
pend
is
None
and
artid
is
None
:
return
""
elif
pstart
is
None
and
pend
is
None
:
return
artid
elif
pend
is
None
:
return
f
"
{
pstart
}
"
return
f
"
{
pstart
}
-
{
pend
}
"
class
RecordHepPubli
(
RecordHep
,
PluginAuthors
,
PluginPublicationInfo
):
"""Article, preprint and proceeding from inspirehep.net version 2.
...
...
@@ -137,18 +168,23 @@ class RecordHepPubli(RecordHep, PluginAuthors, PluginPublicationInfo):
"pagination"
]
self
.
df_info
=
DataFrame
([[
""
]
*
len
(
cols
)],
columns
=
cols
)
return
# filter data to keep only row with year information
data
=
[
dct
for
dct
in
data
if
"year"
in
dct
]
# convert data to DataFrame with a well know structure
df
=
(
DataFrame
(
data
)
.
astype
({
"year"
:
str
})
.
rename
(
columns
=
{
"
artid"
:
"pagination
"
,
"journal_
title"
:
"titl
e"
,
"journal_volume"
:
"volume"
}
))
.
rename
(
columns
=
{
"
journal_title"
:
"title
"
,
"journal_
volume"
:
"volum
e"
}
,
errors
=
"ignore"
))
columns
=
df
.
columns
# construction pagination columns
df
[
"pagination"
]
=
df
.
apply
(
pages
,
axis
=
"columns"
)
# erratum -- sort by year and volume
columns
=
df
.
columns
if
set
([
"year"
,
"volume"
]).
issubset
(
columns
):
df
=
df
.
sort_values
([
"year"
,
"volume"
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment