Commit 8b502fef authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Update to add df_authors as attribute of the record

parent 343ca323
...@@ -52,25 +52,28 @@ def to_initial(name): ...@@ -52,25 +52,28 @@ def to_initial(name):
class PluginAuthors(object): class PluginAuthors(object):
"""Plugin to handle authors in RecordPubli and RecordHepPubli """Plugin to handle authors in RecordPubli and RecordHepPubli
Authors and their affiliations are stored in DataFrame with the Attributes:
following structure:
df_authors (pandas.DataFrame):
+---------------+--------------------------------+ Authors and their affiliations are stored in DataFrame with the
| column | | following structure:
+===============+================================+
| affiliation | value separated by "|" | +---------------+--------------------------------+
+---------------+--------------------------------+ | column | |
| first_name | first name | +===============+================================+
+---------------+--------------------------------+ | affiliation | value separated by "|" |
| fmt_name | formated name | +---------------+--------------------------------+
+---------------+--------------------------------+ | first_name | first name |
| full_name | Last, First | +---------------+--------------------------------+
+---------------+--------------------------------+ | fmt_name | formated name |
| last_name | family name | +---------------+--------------------------------+
+---------------+--------------------------------+ | full_name | Last, First |
+---------------+--------------------------------+
The DataFrame has to be build in the parent | last_name | family name |
It is retrieved via self["authors"] +---------------+--------------------------------+
The DataFrame has to be build in the parent
It is retrieved via self.df_authors
""" """
...@@ -108,7 +111,7 @@ class PluginAuthors(object): ...@@ -108,7 +111,7 @@ class PluginAuthors(object):
* the list is empty when authors are not defined. * the list is empty when authors are not defined.
""" """
df = self["authors"] df = self.df_authors
if sort: if sort:
li = (df[["last_name", "fmt_name"]] li = (df[["last_name", "fmt_name"]]
...@@ -142,7 +145,7 @@ class PluginAuthors(object): ...@@ -142,7 +145,7 @@ class PluginAuthors(object):
- empty string when nothing is found. - empty string when nothing is found.
""" """
df = self["authors"] df = self.df_authors
query = df.affiliation.str.match(pattern) query = df.affiliation.str.match(pattern)
data = df[query] data = df[query]
...@@ -174,7 +177,7 @@ class PluginAuthors(object): ...@@ -174,7 +177,7 @@ class PluginAuthors(object):
* The string is empty when nothing is found. * The string is empty when nothing is found.
""" """
df = self["authors"] df = self.df_authors
query = df.fmt_name.str.contains(pattern) query = df.fmt_name.str.contains(pattern)
...@@ -213,7 +216,7 @@ class PluginAuthors(object): ...@@ -213,7 +216,7 @@ class PluginAuthors(object):
* Empty string when authors are not found. * Empty string when authors are not found.
""" """
df = self["authors"] df = self.df_authors
query = df.affiliation.str.contains(pattern) query = df.affiliation.str.contains(pattern)
...@@ -237,7 +240,7 @@ class PluginAuthors(object): ...@@ -237,7 +240,7 @@ class PluginAuthors(object):
empty string when the first author is not defined. empty string when the first author is not defined.
""" """
return self["authors"].fmt_name.iloc[0] return self.df_authors.fmt_name.iloc[0]
def first_author_institutes(self): def first_author_institutes(self):
"""The institute(s) associated to the first author. """The institute(s) associated to the first author.
...@@ -248,7 +251,7 @@ class PluginAuthors(object): ...@@ -248,7 +251,7 @@ class PluginAuthors(object):
- The string is empty when institutes are not defined. - The string is empty when institutes are not defined.
""" """
val = self["authors"].affiliation.iloc[0] val = self.df_authors.affiliation.iloc[0]
return ("" if val == NaN else val) return ("" if val == NaN else val)
def institutes(self): def institutes(self):
...@@ -259,7 +262,7 @@ class PluginAuthors(object): ...@@ -259,7 +262,7 @@ class PluginAuthors(object):
the list is sort in alphabetic order. the list is sort in alphabetic order.
""" """
df = self["authors"] df = self.df_authors
# expand multi-affiliation (one per column) # expand multi-affiliation (one per column)
df = df.affiliation.str.split("|", expand=True) df = df.affiliation.str.split("|", expand=True)
...@@ -285,7 +288,7 @@ class PluginAuthors(object): ...@@ -285,7 +288,7 @@ class PluginAuthors(object):
bool: bool:
""" """
df = self["authors"] df = self.df_authors
if len(df) == 1 and df.affiliation.iloc[0] == "": if len(df) == 1 and df.affiliation.iloc[0] == "":
return False return False
...@@ -299,7 +302,7 @@ class PluginAuthors(object): ...@@ -299,7 +302,7 @@ class PluginAuthors(object):
bool: bool:
""" """
df = self["authors"] df = self.df_authors
query = df.affiliation.isin(["", NaN]) query = df.affiliation.isin(["", NaN])
return df.affiliation[query].size == 0 return df.affiliation[query].size == 0
...@@ -311,7 +314,7 @@ class PluginAuthors(object): ...@@ -311,7 +314,7 @@ class PluginAuthors(object):
bool: bool:
""" """
df = self["authors"] df = self.df_authors
cols = {"first_name", "full_name", "last_name"} cols = {"first_name", "full_name", "last_name"}
if len(df.columns.intersection(cols)) != 3: if len(df.columns.intersection(cols)) != 3:
...@@ -346,7 +349,7 @@ class PluginAuthors(object): ...@@ -346,7 +349,7 @@ class PluginAuthors(object):
self._last_fmt_author = fmt self._last_fmt_author = fmt
df = self["authors"] df = self.df_authors
# .................................................................... # ....................................................................
# #
......
...@@ -60,7 +60,7 @@ class RecordHepPubli(RecordHep, PluginAuthors, PluginPublicationInfo): ...@@ -60,7 +60,7 @@ class RecordHepPubli(RecordHep, PluginAuthors, PluginPublicationInfo):
"fmt_name", "fmt_name",
"full_name", "full_name",
"last_name"] "last_name"]
self["authors"] = DataFrame([[""] * len(cols)], columns=cols) self.df_authors = DataFrame([[""] * len(cols)], columns=cols)
return return
data = [] data = []
...@@ -88,7 +88,7 @@ class RecordHepPubli(RecordHep, PluginAuthors, PluginPublicationInfo): ...@@ -88,7 +88,7 @@ class RecordHepPubli(RecordHep, PluginAuthors, PluginPublicationInfo):
df = df.drop_duplicates("full_name") df = df.drop_duplicates("full_name")
# replace # replace
self["authors"] = df self.df_authors = df
def _process_publication_info(self): def _process_publication_info(self):
"""Convert publication_info into DataFrame: """Convert publication_info into DataFrame:
......
...@@ -161,7 +161,7 @@ class RecordPubli(Record, PluginAuthors, PluginPublicationInfo): ...@@ -161,7 +161,7 @@ class RecordPubli(Record, PluginAuthors, PluginPublicationInfo):
"fmt_name", "fmt_name",
"full_name", "full_name",
"last_name"] "last_name"]
self["authors"] = DataFrame([[""] * len(cols)], columns=cols) self.df_authors = DataFrame([[""] * len(cols)], columns=cols)
return return
data = self["authors"] data = self["authors"]
...@@ -197,7 +197,7 @@ class RecordPubli(Record, PluginAuthors, PluginPublicationInfo): ...@@ -197,7 +197,7 @@ class RecordPubli(Record, PluginAuthors, PluginPublicationInfo):
df["fmt_name"] = df.full_name df["fmt_name"] = df.full_name
# replace # replace
self["authors"] = df self.df_authors = df
def _process_publication_info(self): def _process_publication_info(self):
"""Convert publication_info into DataFrame: """Convert publication_info into DataFrame:
......
...@@ -39,7 +39,7 @@ def test_constructor_cds_04002(record): ...@@ -39,7 +39,7 @@ def test_constructor_cds_04002(record):
"""test the method _process_authors and _process_publication_info. """test the method _process_authors and _process_publication_info.
""" """
authors = record["authors"] authors = record.df_authors
assert isinstance(authors, pd.DataFrame) assert isinstance(authors, pd.DataFrame)
refcols = ["affiliation", refcols = ["affiliation",
......
...@@ -39,7 +39,7 @@ def test_constructor_ins_05002(record): ...@@ -39,7 +39,7 @@ def test_constructor_ins_05002(record):
"""test the method _process_authors and _process_publication_info. """test the method _process_authors and _process_publication_info.
""" """
authors = record["authors"] authors = record.df_authors
assert isinstance(authors, pd.DataFrame) assert isinstance(authors, pd.DataFrame)
refcols = ["affiliation", refcols = ["affiliation",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment