Commit 5eff26ac authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Merge RecordHep into RecordHepPubli

parent adc8d2b5
......@@ -14,11 +14,11 @@ from store_tools import (MSG_NO_CONF,
OAI_URL,
RecordCds,
RecordCdsConf,
RecordHep,
RecordHepConf,
RecordHepThesis,
RecordCdsPubli,
RecordCdsThesis,
RecordHepConf,
RecordHepPubli,
RecordHepThesis,
REG_OAI,
REG_YEAR)
......@@ -1000,7 +1000,7 @@ class CheckAndFix(object):
else:
record["prepublication"] = {"date": date}
elif isinstance(record, RecordHep):
elif isinstance(record, RecordHepPubli):
record["preprint_date"] = date
def temporary_record(self, record):
......
......@@ -28,16 +28,15 @@ from .exception import (StoreException,
from .factory import build_record, build_store
from .inspirehepstore import InspirehepStore
from .publicationinfomixin import PublicationInfoMixin
from store_tools.cdsstore import CdsStore
from store_tools.recordcds import RecordCds
from store_tools.recordcdsconf import RecordCdsConf
from .recordhep import RecordHep
from .cdsstore import CdsStore
from .recordcds import RecordCds
from .recordcdsconf import RecordCdsConf
from .recordcdspubli import RecordCdsPubli
from .recordcdsthesis import RecordCdsThesis
from .recordhepconf import RecordHepConf
from .recordhepinst import RecordHepInst
from .recordheppubli import RecordHepPubli
from .recordhepthesis import RecordHepThesis
from store_tools.recordcdspubli import RecordCdsPubli
from store_tools.recordcdsthesis import RecordCdsThesis
def load_record(host, record_id, shelf=None):
......
"""recordhep.py
"""
import logging
import pprint
class RecordHep(dict):
"""Base class for JSON record coming from inspirehep.net version v2.
Schema for records are documented here:
https://inspire-schemas.readthedocs.io/en/latest/schemas/
Args:
recjson (dict):
meta data from the JSON record returns by the store
"""
def __init__(self, recjson):
super().__init__(recjson)
self.logger = logging.getLogger("web2py.app.limbra")
# meta data
# the authors of my institutes signing the record
# string containing a list of name separated by a comma
self.my_authors = ""
@staticmethod
def _oai_url(value):
"""Build the Open Archive Initiative URL.
Args:
value (str):
OAI identifier, *e.g.* ``oai:host:id``
Returns:
str:
the pattern of the string is `https://host/api/literature/id`
The string is empty when it is not defined or when the value
is not well formed.
"""
def debug(self):
"""Print the record structure on the standard output.
"""
pprint.pprint(self)
def host(self):
"""The store housing the record.
Returns:
str:
"""
return "inspirehep.net"
def id(self):
"""The id of the record in the store.
Returns:
int:
"""
return self["control_number"]
def oai(self):
"""The Open Archive Initiative identifier(s).
Returns:
str:
* the pattern of the identifier is ``oai:host:id``
* primary and secondary OAI identifier are separated by a comma
* an empty string when it is not defined.
"""
lst = [self.primary_oai(), self.secondary_oai()]
return ", ".join(lst).strip(", ")
def oai_url(self):
"""The Open Archive Initiative identifier URL(s).
Returns:
str:
* the pattern of the URL is ``http://host/record/id``
* primary and secondary URLs are separated by a comma.
* an empty string when it is not defined
"""
lst = [self.primary_oai_url(), self.secondary_oai_url()]
return ", ".join(lst).strip(", ")
def primary_oai(self):
"""The primary Open Archive Initiative identifier.
The primary OAI identifier matches the record identifier.
Returns:
str:
* the pattern is ``oai:host:id``.
* empty string when it is not defined
"""
return f"oai:inspirehep.net:{self['control_number']}"
def primary_oai_url(self):
"""The Open Archive Initiative URL for the primary OAI.
Returns:
str:
* the pattern is ``http://inspirehep.net/record/id``
* empty string when it is not defined
"""
recid = self["control_number"]
return f"http://inspirehep.net/record/{recid}"
def secondary_oai(self):
"""The secondary OAI identifier.
the secondary OAI identifier corresponds to the record in the
store, *cds.cern.ch*.
Returns:
str:
* the pattern is ``oai:host:id``.
* empty string when it is not defined
"""
if "external_system_identifiers" not in self:
return ""
for elt in self["external_system_identifiers"]:
if elt["schema"] == "CDS":
return f"oai:cds.cern.ch:{elt['value']}"
return ""
def secondary_oai_url(self):
"""The Open Archive Initiative URL for the secondary OAI.
the secondary OAI URL corresponds to the record in the
store, *cds.cern.ch*.
Returns:
str:
* the pattern is ``http://cds.cern.ch/record/id``
* empty string when it is not defined
"""
if "external_system_identifiers" not in self:
return ""
for elt in self["external_system_identifiers"]:
if elt["schema"] == "CDS":
return f"http://cds.cern.ch/record/{elt['value']}"
return ""
......@@ -2,12 +2,12 @@
"""
import logging
import pprint
from .authorsmixin import AuthorsMixin
from filters import CLEAN_COLLABORATION
from pandas import DataFrame
from .publicationinfomixin import PublicationInfoMixin
from .recordhep import RecordHep
def pages(row):
......@@ -41,7 +41,7 @@ def pages(row):
return f"{pstart}-{pend}"
class RecordHepPubli(RecordHep, AuthorsMixin, PublicationInfoMixin):
class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin):
"""Article, preprint and proceeding from inspirehep.net version 2.
Schema for publication is documented here:
......@@ -59,6 +59,26 @@ class RecordHepPubli(RecordHep, AuthorsMixin, PublicationInfoMixin):
self._process_authors()
self._process_publication_info()
# the authors of my institutes signing the record
# string containing a list of name separated by a comma
self.my_authors = ""
@staticmethod
def _oai_url(value):
"""Build the Open Archive Initiative URL.
Args:
value (str):
OAI identifier, *e.g.* ``oai:host:id``
Returns:
str:
the pattern of the string is `https://host/api/literature/id`
The string is empty when it is not defined or when the value
is not well formed.
"""
def _process_authors(self):
"""Convert authors information into DataFrame:
......@@ -224,6 +244,43 @@ class RecordHepPubli(RecordHep, AuthorsMixin, PublicationInfoMixin):
lst = [elt["value"] for elt in collaborations]
return CLEAN_COLLABORATION(", ".join(lst))
def debug(self):
"""Print the record structure on the standard output.
"""
pprint.pprint(self)
def host(self):
"""The store housing the record.
Returns:
str:
"""
return "inspirehep.net"
def id(self):
"""The id of the record in the store.
Returns:
int:
"""
return self["control_number"]
def oai(self):
"""The Open Archive Initiative identifier(s).
Returns:
str:
* the pattern of the identifier is ``oai:host:id``
* primary and secondary OAI identifier are separated by a comma
* an empty string when it is not defined.
"""
lst = [self.primary_oai(), self.secondary_oai()]
return ", ".join(lst).strip(", ")
def paper_url(self):
"""The URL of the document.
......@@ -253,6 +310,31 @@ class RecordHepPubli(RecordHep, AuthorsMixin, PublicationInfoMixin):
lst = [f"arXiv:{elt['value']}" for elt in lst]
return ", ".join(lst)
def primary_oai(self):
"""The primary Open Archive Initiative identifier.
The primary OAI identifier matches the record identifier.
Returns:
str:
* the pattern is ``oai:host:id``.
* empty string when it is not defined
"""
return f"oai:inspirehep.net:{self['control_number']}"
def primary_oai_url(self):
"""The Open Archive Initiative URL for the primary OAI.
Returns:
str:
* the pattern is ``http://inspirehep.net/record/id``
* empty string when it is not defined
"""
recid = self["control_number"]
return f"http://inspirehep.net/record/{recid}"
def report_number(self):
"""The report number(s) associated to the publication.
......@@ -271,6 +353,61 @@ class RecordHepPubli(RecordHep, AuthorsMixin, PublicationInfoMixin):
lst = [elt["value"] for elt in lst]
return ", ".join(lst)
def oai_url(self):
"""The Open Archive Initiative identifier URL(s).
Returns:
str:
* the pattern of the URL is ``http://host/record/id``
* primary and secondary URLs are separated by a comma.
* an empty string when it is not defined
"""
lst = [self.primary_oai_url(), self.secondary_oai_url()]
return ", ".join(lst).strip(", ")
def secondary_oai(self):
"""The secondary OAI identifier.
the secondary OAI identifier corresponds to the record in the
store, *cds.cern.ch*.
Returns:
str:
* the pattern is ``oai:host:id``.
* empty string when it is not defined
"""
if "external_system_identifiers" not in self:
return ""
for elt in self["external_system_identifiers"]:
if elt["schema"] == "CDS":
return f"oai:cds.cern.ch:{elt['value']}"
return ""
def secondary_oai_url(self):
"""The Open Archive Initiative URL for the secondary OAI.
the secondary OAI URL corresponds to the record in the
store, *cds.cern.ch*.
Returns:
str:
* the pattern is ``http://cds.cern.ch/record/id``
* empty string when it is not defined
"""
if "external_system_identifiers" not in self:
return ""
for elt in self["external_system_identifiers"]:
if elt["schema"] == "CDS":
return f"http://cds.cern.ch/record/{elt['value']}"
return ""
def submitted(self):
"""The date of submission.
......
......@@ -16,7 +16,7 @@ import pytest
from store_tools.factory import build_store
from store_tools.recordcds import RecordCds
from store_tools.recordhep import RecordHep
from store_tools.recordheppubli import RecordHepPubli
@pytest.fixture(scope="module")
......@@ -30,7 +30,7 @@ def reccds():
def recins():
store = build_store("inspirehep.net", shelf="literature")
recjson = store.get_record(1319638)
return RecordHep(recjson)
return RecordHepPubli(recjson)
# ............................................................................
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment