Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit 3dda33aa authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Improve RecordHepPubli._process_publication_info

parent 6bc19341
"""recordhep.py """recordhep.py
""" """
import logging
import pprint import pprint
...@@ -20,6 +21,8 @@ class RecordHep(dict): ...@@ -20,6 +21,8 @@ class RecordHep(dict):
super().__init__(recjson) super().__init__(recjson)
self.logger = logging.getLogger("web2py.app.limbra")
# meta data # meta data
# the authors of my institutes signing the record # the authors of my institutes signing the record
# string containing a list of name separated by a comma # string containing a list of name separated by a comma
......
...@@ -8,6 +8,37 @@ from store_tools.pluginauthors import PluginAuthors ...@@ -8,6 +8,37 @@ from store_tools.pluginauthors import PluginAuthors
from store_tools.pluginpublicationinfo import PluginPublicationInfo from store_tools.pluginpublicationinfo import PluginPublicationInfo
def pages(row):
"""Help function to build the pages argument
Args:
row (pandas.Series):
* artid (str)
* page_start (int)
* page_end (int)
Return
str:
* either 23 or 23-45
* empty string when information is missing
"""
artid = row.get("artid", None)
pstart = row.get("page_start", None)
pend = row.get("page_end", None)
if pstart is None and pend is None and artid is None:
return ""
elif pstart is None and pend is None:
return artid
elif pend is None:
return f"{pstart}"
return f"{pstart}-{pend}"
class RecordHepPubli(RecordHep, PluginAuthors, PluginPublicationInfo): class RecordHepPubli(RecordHep, PluginAuthors, PluginPublicationInfo):
"""Article, preprint and proceeding from inspirehep.net version 2. """Article, preprint and proceeding from inspirehep.net version 2.
...@@ -137,18 +168,23 @@ class RecordHepPubli(RecordHep, PluginAuthors, PluginPublicationInfo): ...@@ -137,18 +168,23 @@ class RecordHepPubli(RecordHep, PluginAuthors, PluginPublicationInfo):
"pagination"] "pagination"]
self.df_info = DataFrame([[""] * len(cols)], columns=cols) self.df_info = DataFrame([[""] * len(cols)], columns=cols)
return return
# filter data to keep only row with year information
data = [dct for dct in data if "year" in dct]
# convert data to DataFrame with a well know structure
df = (DataFrame(data) df = (DataFrame(data)
.astype({"year": str}) .astype({"year": str})
.rename(columns={"artid": "pagination", .rename(columns={"journal_title": "title",
"journal_title": "title", "journal_volume": "volume"},
"journal_volume": "volume"})) errors="ignore"))
columns = df.columns # construction pagination columns
df["pagination"] = df.apply(pages, axis="columns")
# erratum -- sort by year and volume # erratum -- sort by year and volume
columns = df.columns
if set(["year", "volume"]).issubset(columns): if set(["year", "volume"]).issubset(columns):
df = df.sort_values(["year", "volume"]) df = df.sort_values(["year", "volume"])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment