Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit 3dda33aa authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Improve RecordHepPubli._process_publication_info

parent 6bc19341
"""recordhep.py
"""
import logging
import pprint
......@@ -20,6 +21,8 @@ class RecordHep(dict):
super().__init__(recjson)
self.logger = logging.getLogger("web2py.app.limbra")
# meta data
# the authors of my institutes signing the record
# string containing a list of name separated by a comma
......
......@@ -8,6 +8,37 @@ from store_tools.pluginauthors import PluginAuthors
from store_tools.pluginpublicationinfo import PluginPublicationInfo
def pages(row):
"""Help function to build the pages argument
Args:
row (pandas.Series):
* artid (str)
* page_start (int)
* page_end (int)
Return
str:
* either 23 or 23-45
* empty string when information is missing
"""
artid = row.get("artid", None)
pstart = row.get("page_start", None)
pend = row.get("page_end", None)
if pstart is None and pend is None and artid is None:
return ""
elif pstart is None and pend is None:
return artid
elif pend is None:
return f"{pstart}"
return f"{pstart}-{pend}"
class RecordHepPubli(RecordHep, PluginAuthors, PluginPublicationInfo):
"""Article, preprint and proceeding from inspirehep.net version 2.
......@@ -137,18 +168,23 @@ class RecordHepPubli(RecordHep, PluginAuthors, PluginPublicationInfo):
"pagination"]
self.df_info = DataFrame([[""] * len(cols)], columns=cols)
return
# filter data to keep only row with year information
data = [dct for dct in data if "year" in dct]
# convert data to DataFrame with a well know structure
df = (DataFrame(data)
.astype({"year": str})
.rename(columns={"artid": "pagination",
"journal_title": "title",
"journal_volume": "volume"}))
.rename(columns={"journal_title": "title",
"journal_volume": "volume"},
errors="ignore"))
columns = df.columns
# construction pagination columns
df["pagination"] = df.apply(pages, axis="columns")
# erratum -- sort by year and volume
columns = df.columns
if set(["year", "volume"]).issubset(columns):
df = df.sort_values(["year", "volume"])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment