Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
limbra
limbra
Commits
343ca323
Commit
343ca323
authored
Jan 07, 2021
by
LE GAC Renaud
Browse files
Update to move df_info as attribute of the record
parent
7d9e797d
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
37 additions
and
29 deletions
+37
-29
modules/store_tools/pluginpublicationinfo.py
modules/store_tools/pluginpublicationinfo.py
+25
-22
modules/store_tools/recordhepconf.py
modules/store_tools/recordhepconf.py
+5
-0
modules/store_tools/recordheppubli.py
modules/store_tools/recordheppubli.py
+2
-2
modules/store_tools/recordpubli.py
modules/store_tools/recordpubli.py
+2
-2
tests/basis/test_04_RecordPubli.py
tests/basis/test_04_RecordPubli.py
+1
-1
tests/basis/test_05_RecordHepPubli.py
tests/basis/test_05_RecordHepPubli.py
+1
-1
tests/basis/test_13_CheckAndFix_article.py
tests/basis/test_13_CheckAndFix_article.py
+1
-1
No files found.
modules/store_tools/pluginpublicationinfo.py
View file @
343ca323
...
...
@@ -10,23 +10,26 @@ PAPER_REFERENCE_KEYS = {"pagination", "title", "volume", "year"}
class
PluginPublicationInfo
(
object
):
"""Plugin to handle publication_info in RecordPubli and RecordHepPubli
publication information are stored in DataFrame with the
following structure:
+------------+--------------------------------+
| column | |
+============+================================+
| title | abbreviation of the publisher |
+------------+--------------------------------+
| volume | volume |
+------------+--------------------------------+
| year | year of publication |
+------------+--------------------------------+
| pagination | page number or ranges |
+------------+--------------------------------+
Attributes:
df_info (pandas.DataFrame):
publication information are stored in DataFrame with the
following structure:
+------------+--------------------------------+
| column | |
+============+================================+
| title | abbreviation of the publisher |
+------------+--------------------------------+
| volume | volume |
+------------+--------------------------------+
| year | year of publication |
+------------+--------------------------------+
| pagination | page number or ranges |
+------------+--------------------------------+
The DataFrame has to be build in the parent
It is retrieved via self
["
df_info
"]
It is retrieved via self
.
df_info
"""
...
...
@@ -46,7 +49,7 @@ class PluginPublicationInfo(object):
# * the row contains empty string when the record is not published.
# * iloc[0] returns a serie where the index are the column's name
#
columns
=
(
self
[
"
df_info
"
]
.
iloc
[
0
]
columns
=
(
self
.
df_info
.
iloc
[
0
]
.
replace
(
""
,
np
.
nan
)
.
dropna
()
.
index
)
...
...
@@ -60,7 +63,7 @@ class PluginPublicationInfo(object):
bool
"""
df
=
self
[
"
df_info
"
]
df
=
self
.
df_info
return
len
(
df
)
>
1
def
paper_editor
(
self
):
...
...
@@ -71,7 +74,7 @@ class PluginPublicationInfo(object):
empty string when not defined.
"""
df
=
self
[
"
df_info
"
]
df
=
self
.
df_info
editor
=
(
df
.
title
.
iloc
[
0
]
if
"title"
in
df
else
""
)
# add space after the dot Phys.Rev -> Phys. Rev
...
...
@@ -88,7 +91,7 @@ class PluginPublicationInfo(object):
* Empty string when not defined.
"""
df
=
self
[
"
df_info
"
]
df
=
self
.
df_info
return
(
df
.
pagination
.
iloc
[
0
]
if
"pagination"
in
df
else
""
)
def
paper_reference
(
self
):
...
...
@@ -101,7 +104,7 @@ class PluginPublicationInfo(object):
published in a review.
"""
df
=
self
[
"
df_info
"
]
df
=
self
.
df_info
its
=
df
.
columns
.
intersection
({
"title"
,
"volume"
,
...
...
@@ -128,7 +131,7 @@ class PluginPublicationInfo(object):
empty string when nothing is found.
"""
df
=
self
[
"
df_info
"
]
df
=
self
.
df_info
return
(
df
.
volume
.
iloc
[
0
]
if
"volume"
in
df
else
""
)
def
paper_year
(
self
):
...
...
@@ -139,5 +142,5 @@ class PluginPublicationInfo(object):
- Empty string if the year is not defined.
"""
df
=
self
[
"
df_info
"
]
df
=
self
.
df_info
return
(
df
.
year
.
iloc
[
0
]
if
"year"
in
df
else
""
)
modules/store_tools/recordhepconf.py
View file @
343ca323
...
...
@@ -11,6 +11,11 @@ from .recordheppubli import RecordHepPubli
class
RecordHepConf
(
RecordHepPubli
):
"""Conference proceeding from inspirehep.net version 2.
Attributes:
conference (dict):
metadata of the conference
schema is https://inspirehep.net/s…records/conferences.json
"""
...
...
modules/store_tools/recordheppubli.py
View file @
343ca323
...
...
@@ -129,7 +129,7 @@ class RecordHepPubli(RecordHep, PluginAuthors, PluginPublicationInfo):
"year"
,
"pagination"
]
self
[
"
df_info
"
]
=
DataFrame
([[
""
]
*
len
(
cols
)],
columns
=
cols
)
self
.
df_info
=
DataFrame
([[
""
]
*
len
(
cols
)],
columns
=
cols
)
return
...
...
@@ -149,7 +149,7 @@ class RecordHepPubli(RecordHep, PluginAuthors, PluginPublicationInfo):
df
=
df
.
sort_values
(
"year"
)
# replace
self
[
"
df_info
"
]
=
df
self
.
df_info
=
df
def
collaboration
(
self
):
"""The collaboration(s) signing the publication.
...
...
modules/store_tools/recordpubli.py
View file @
343ca323
...
...
@@ -236,7 +236,7 @@ class RecordPubli(Record, PluginAuthors, PluginPublicationInfo):
"year"
,
"pagination"
]
self
[
"
df_info
"
]
=
DataFrame
([[
""
]
*
len
(
cols
)],
columns
=
cols
)
self
.
df_info
=
DataFrame
([[
""
]
*
len
(
cols
)],
columns
=
cols
)
return
...
...
@@ -261,7 +261,7 @@ class RecordPubli(Record, PluginAuthors, PluginPublicationInfo):
df
=
df
.
sort_values
(
"year"
)
# replace
self
[
"
df_info
"
]
=
df
self
.
df_info
=
df
def
collaboration
(
self
):
"""The collaboration(s) signing the publication.
...
...
tests/basis/test_04_RecordPubli.py
View file @
343ca323
...
...
@@ -52,7 +52,7 @@ def test_constructor_cds_04002(record):
assert
len
(
authors
)
==
record
[
"number_of_authors"
]
assert
authors
.
affiliation
.
iloc
[
12
]
==
"INFN, Rome|CERN"
papers
=
record
[
"
df_info
"
]
papers
=
record
.
df_info
assert
isinstance
(
papers
,
pd
.
DataFrame
)
assert
len
(
papers
)
==
1
...
...
tests/basis/test_05_RecordHepPubli.py
View file @
343ca323
...
...
@@ -52,7 +52,7 @@ def test_constructor_ins_05002(record):
assert
len
(
authors
)
==
704
assert
authors
.
affiliation
.
iloc
[
12
]
==
"INFN, Rome|CERN"
papers
=
record
[
"
df_info
"
]
papers
=
record
.
df_info
assert
isinstance
(
papers
,
pd
.
DataFrame
)
assert
len
(
papers
)
==
1
...
...
tests/basis/test_13_CheckAndFix_article.py
View file @
343ca323
...
...
@@ -66,7 +66,7 @@ def test_paper_reference_cds_13004(svc, reccds):
# remove the publisher and volume information
paper_ref
=
reccds
.
paper_reference
()
reccds
[
"
df_info
"
]
.
loc
[
0
,
[
"title"
,
"volume"
]]
=
[
""
,
""
]
reccds
.
df_info
.
loc
[
0
,
[
"title"
,
"volume"
]]
=
[
""
,
""
]
svc
.
paper_reference
(
reccds
)
assert
reccds
.
paper_reference
()
==
paper_ref
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment