Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
5eff26ac
Commit
5eff26ac
authored
Jan 13, 2021
by
LE GAC Renaud
Browse files
Merge RecordHep into RecordHepPubli
parent
adc8d2b5
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
150 additions
and
176 deletions
+150
-176
modules/harvest_tools/checkandfix.py
modules/harvest_tools/checkandfix.py
+4
-4
modules/store_tools/__init__.py
modules/store_tools/__init__.py
+5
-6
modules/store_tools/recordhep.py
modules/store_tools/recordhep.py
+0
-162
modules/store_tools/recordheppubli.py
modules/store_tools/recordheppubli.py
+139
-2
tests/basis/test_03_Record.py
tests/basis/test_03_Record.py
+2
-2
No files found.
modules/harvest_tools/checkandfix.py
View file @
5eff26ac
...
...
@@ -14,11 +14,11 @@ from store_tools import (MSG_NO_CONF,
OAI_URL
,
RecordCds
,
RecordCdsConf
,
RecordHep
,
RecordHepConf
,
RecordHepThesis
,
RecordCdsPubli
,
RecordCdsThesis
,
RecordHepConf
,
RecordHepPubli
,
RecordHepThesis
,
REG_OAI
,
REG_YEAR
)
...
...
@@ -1000,7 +1000,7 @@ class CheckAndFix(object):
else
:
record
[
"prepublication"
]
=
{
"date"
:
date
}
elif
isinstance
(
record
,
RecordHep
):
elif
isinstance
(
record
,
RecordHep
Publi
):
record
[
"preprint_date"
]
=
date
def
temporary_record
(
self
,
record
):
...
...
modules/store_tools/__init__.py
View file @
5eff26ac
...
...
@@ -28,16 +28,15 @@ from .exception import (StoreException,
from
.factory
import
build_record
,
build_store
from
.inspirehepstore
import
InspirehepStore
from
.publicationinfomixin
import
PublicationInfoMixin
from
store_tools.cdsstore
import
CdsStore
from
store_tools.recordcds
import
RecordCds
from
store_tools.recordcdsconf
import
RecordCdsConf
from
.recordhep
import
RecordHep
from
.cdsstore
import
CdsStore
from
.recordcds
import
RecordCds
from
.recordcdsconf
import
RecordCdsConf
from
.recordcdspubli
import
RecordCdsPubli
from
.recordcdsthesis
import
RecordCdsThesis
from
.recordhepconf
import
RecordHepConf
from
.recordhepinst
import
RecordHepInst
from
.recordheppubli
import
RecordHepPubli
from
.recordhepthesis
import
RecordHepThesis
from
store_tools.recordcdspubli
import
RecordCdsPubli
from
store_tools.recordcdsthesis
import
RecordCdsThesis
def
load_record
(
host
,
record_id
,
shelf
=
None
):
...
...
modules/store_tools/recordhep.py
deleted
100644 → 0
View file @
adc8d2b5
"""recordhep.py
"""
import
logging
import
pprint
class
RecordHep
(
dict
):
"""Base class for JSON record coming from inspirehep.net version v2.
Schema for records are documented here:
https://inspire-schemas.readthedocs.io/en/latest/schemas/
Args:
recjson (dict):
meta data from the JSON record returns by the store
"""
def
__init__
(
self
,
recjson
):
super
().
__init__
(
recjson
)
self
.
logger
=
logging
.
getLogger
(
"web2py.app.limbra"
)
# meta data
# the authors of my institutes signing the record
# string containing a list of name separated by a comma
self
.
my_authors
=
""
@
staticmethod
def
_oai_url
(
value
):
"""Build the Open Archive Initiative URL.
Args:
value (str):
OAI identifier, *e.g.* ``oai:host:id``
Returns:
str:
the pattern of the string is `https://host/api/literature/id`
The string is empty when it is not defined or when the value
is not well formed.
"""
def
debug
(
self
):
"""Print the record structure on the standard output.
"""
pprint
.
pprint
(
self
)
def
host
(
self
):
"""The store housing the record.
Returns:
str:
"""
return
"inspirehep.net"
def
id
(
self
):
"""The id of the record in the store.
Returns:
int:
"""
return
self
[
"control_number"
]
def
oai
(
self
):
"""The Open Archive Initiative identifier(s).
Returns:
str:
* the pattern of the identifier is ``oai:host:id``
* primary and secondary OAI identifier are separated by a comma
* an empty string when it is not defined.
"""
lst
=
[
self
.
primary_oai
(),
self
.
secondary_oai
()]
return
", "
.
join
(
lst
).
strip
(
", "
)
def
oai_url
(
self
):
"""The Open Archive Initiative identifier URL(s).
Returns:
str:
* the pattern of the URL is ``http://host/record/id``
* primary and secondary URLs are separated by a comma.
* an empty string when it is not defined
"""
lst
=
[
self
.
primary_oai_url
(),
self
.
secondary_oai_url
()]
return
", "
.
join
(
lst
).
strip
(
", "
)
def
primary_oai
(
self
):
"""The primary Open Archive Initiative identifier.
The primary OAI identifier matches the record identifier.
Returns:
str:
* the pattern is ``oai:host:id``.
* empty string when it is not defined
"""
return
f
"oai:inspirehep.net:
{
self
[
'control_number'
]
}
"
def
primary_oai_url
(
self
):
"""The Open Archive Initiative URL for the primary OAI.
Returns:
str:
* the pattern is ``http://inspirehep.net/record/id``
* empty string when it is not defined
"""
recid
=
self
[
"control_number"
]
return
f
"http://inspirehep.net/record/
{
recid
}
"
def
secondary_oai
(
self
):
"""The secondary OAI identifier.
the secondary OAI identifier corresponds to the record in the
store, *cds.cern.ch*.
Returns:
str:
* the pattern is ``oai:host:id``.
* empty string when it is not defined
"""
if
"external_system_identifiers"
not
in
self
:
return
""
for
elt
in
self
[
"external_system_identifiers"
]:
if
elt
[
"schema"
]
==
"CDS"
:
return
f
"oai:cds.cern.ch:
{
elt
[
'value'
]
}
"
return
""
def
secondary_oai_url
(
self
):
"""The Open Archive Initiative URL for the secondary OAI.
the secondary OAI URL corresponds to the record in the
store, *cds.cern.ch*.
Returns:
str:
* the pattern is ``http://cds.cern.ch/record/id``
* empty string when it is not defined
"""
if
"external_system_identifiers"
not
in
self
:
return
""
for
elt
in
self
[
"external_system_identifiers"
]:
if
elt
[
"schema"
]
==
"CDS"
:
return
f
"http://cds.cern.ch/record/
{
elt
[
'value'
]
}
"
return
""
modules/store_tools/recordheppubli.py
View file @
5eff26ac
...
...
@@ -2,12 +2,12 @@
"""
import
logging
import
pprint
from
.authorsmixin
import
AuthorsMixin
from
filters
import
CLEAN_COLLABORATION
from
pandas
import
DataFrame
from
.publicationinfomixin
import
PublicationInfoMixin
from
.recordhep
import
RecordHep
def
pages
(
row
):
...
...
@@ -41,7 +41,7 @@ def pages(row):
return
f
"
{
pstart
}
-
{
pend
}
"
class
RecordHepPubli
(
RecordHep
,
AuthorsMixin
,
PublicationInfoMixin
):
class
RecordHepPubli
(
dict
,
AuthorsMixin
,
PublicationInfoMixin
):
"""Article, preprint and proceeding from inspirehep.net version 2.
Schema for publication is documented here:
...
...
@@ -59,6 +59,26 @@ class RecordHepPubli(RecordHep, AuthorsMixin, PublicationInfoMixin):
self
.
_process_authors
()
self
.
_process_publication_info
()
# the authors of my institutes signing the record
# string containing a list of name separated by a comma
self
.
my_authors
=
""
@
staticmethod
def
_oai_url
(
value
):
"""Build the Open Archive Initiative URL.
Args:
value (str):
OAI identifier, *e.g.* ``oai:host:id``
Returns:
str:
the pattern of the string is `https://host/api/literature/id`
The string is empty when it is not defined or when the value
is not well formed.
"""
def
_process_authors
(
self
):
"""Convert authors information into DataFrame:
...
...
@@ -224,6 +244,43 @@ class RecordHepPubli(RecordHep, AuthorsMixin, PublicationInfoMixin):
lst
=
[
elt
[
"value"
]
for
elt
in
collaborations
]
return
CLEAN_COLLABORATION
(
", "
.
join
(
lst
))
def
debug
(
self
):
"""Print the record structure on the standard output.
"""
pprint
.
pprint
(
self
)
def
host
(
self
):
"""The store housing the record.
Returns:
str:
"""
return
"inspirehep.net"
def
id
(
self
):
"""The id of the record in the store.
Returns:
int:
"""
return
self
[
"control_number"
]
def
oai
(
self
):
"""The Open Archive Initiative identifier(s).
Returns:
str:
* the pattern of the identifier is ``oai:host:id``
* primary and secondary OAI identifier are separated by a comma
* an empty string when it is not defined.
"""
lst
=
[
self
.
primary_oai
(),
self
.
secondary_oai
()]
return
", "
.
join
(
lst
).
strip
(
", "
)
def
paper_url
(
self
):
"""The URL of the document.
...
...
@@ -253,6 +310,31 @@ class RecordHepPubli(RecordHep, AuthorsMixin, PublicationInfoMixin):
lst
=
[
f
"arXiv:
{
elt
[
'value'
]
}
"
for
elt
in
lst
]
return
", "
.
join
(
lst
)
def
primary_oai
(
self
):
"""The primary Open Archive Initiative identifier.
The primary OAI identifier matches the record identifier.
Returns:
str:
* the pattern is ``oai:host:id``.
* empty string when it is not defined
"""
return
f
"oai:inspirehep.net:
{
self
[
'control_number'
]
}
"
def
primary_oai_url
(
self
):
"""The Open Archive Initiative URL for the primary OAI.
Returns:
str:
* the pattern is ``http://inspirehep.net/record/id``
* empty string when it is not defined
"""
recid
=
self
[
"control_number"
]
return
f
"http://inspirehep.net/record/
{
recid
}
"
def
report_number
(
self
):
"""The report number(s) associated to the publication.
...
...
@@ -271,6 +353,61 @@ class RecordHepPubli(RecordHep, AuthorsMixin, PublicationInfoMixin):
lst
=
[
elt
[
"value"
]
for
elt
in
lst
]
return
", "
.
join
(
lst
)
def
oai_url
(
self
):
"""The Open Archive Initiative identifier URL(s).
Returns:
str:
* the pattern of the URL is ``http://host/record/id``
* primary and secondary URLs are separated by a comma.
* an empty string when it is not defined
"""
lst
=
[
self
.
primary_oai_url
(),
self
.
secondary_oai_url
()]
return
", "
.
join
(
lst
).
strip
(
", "
)
def
secondary_oai
(
self
):
"""The secondary OAI identifier.
the secondary OAI identifier corresponds to the record in the
store, *cds.cern.ch*.
Returns:
str:
* the pattern is ``oai:host:id``.
* empty string when it is not defined
"""
if
"external_system_identifiers"
not
in
self
:
return
""
for
elt
in
self
[
"external_system_identifiers"
]:
if
elt
[
"schema"
]
==
"CDS"
:
return
f
"oai:cds.cern.ch:
{
elt
[
'value'
]
}
"
return
""
def
secondary_oai_url
(
self
):
"""The Open Archive Initiative URL for the secondary OAI.
the secondary OAI URL corresponds to the record in the
store, *cds.cern.ch*.
Returns:
str:
* the pattern is ``http://cds.cern.ch/record/id``
* empty string when it is not defined
"""
if
"external_system_identifiers"
not
in
self
:
return
""
for
elt
in
self
[
"external_system_identifiers"
]:
if
elt
[
"schema"
]
==
"CDS"
:
return
f
"http://cds.cern.ch/record/
{
elt
[
'value'
]
}
"
return
""
def
submitted
(
self
):
"""The date of submission.
...
...
tests/basis/test_03_Record.py
View file @
5eff26ac
...
...
@@ -16,7 +16,7 @@ import pytest
from
store_tools.factory
import
build_store
from
store_tools.recordcds
import
RecordCds
from
store_tools.recordhep
import
RecordHep
from
store_tools.recordhep
publi
import
RecordHep
Publi
@
pytest
.
fixture
(
scope
=
"module"
)
...
...
@@ -30,7 +30,7 @@ def reccds():
def
recins
():
store
=
build_store
(
"inspirehep.net"
,
shelf
=
"literature"
)
recjson
=
store
.
get_record
(
1319638
)
return
RecordHep
(
recjson
)
return
RecordHep
Publi
(
recjson
)
# ............................................................................
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment