Commit a0dd169b authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Update RecordCdsConfPaper and tests to user ConfMixin

parent 655a173d
...@@ -33,10 +33,7 @@ from .exception import (CheckException, ...@@ -33,10 +33,7 @@ from .exception import (CheckException,
StoreException, StoreException,
ToolException) ToolException)
from .factory import (add_conference_data, from .factory import build_record, build_store
build_record,
build_store,
get_conference_data)
from .inspirehepstore import InspirehepStore from .inspirehepstore import InspirehepStore
from .publicationinfomixin import PublicationInfoMixin from .publicationinfomixin import PublicationInfoMixin
from .cdsstore import CdsStore from .cdsstore import CdsStore
......
...@@ -92,11 +92,11 @@ class ConfMixin(object): ...@@ -92,11 +92,11 @@ class ConfMixin(object):
* empty string when it is not defined * empty string when it is not defined
""" """
for elt in self["publication_info"]: conference = self.conference
if "cnum" in elt: if conference is None:
return elt["cnum"] return ""
return "" return conference.get("cnum", "")
def conference_location(self): def conference_location(self):
"""The conference location. """The conference location.
......
...@@ -6,17 +6,12 @@ from .base import (CDS, ...@@ -6,17 +6,12 @@ from .base import (CDS,
is_conference, is_conference,
is_institute, is_institute,
is_thesis, is_thesis,
MSG_INV_CONF, MSG_NO_SHELF)
MSG_INV_CONF_KEY,
MSG_NO_CONF,
MSG_NO_SHELF,
REG_CONF)
from datetime import datetime
from .exception import RecordException, StoreException from .exception import RecordException, StoreException
from .cdsstore import CdsStore from .cdsstore import CdsStore
from .inspirehepstore import InspirehepStore, SHELFS from .inspirehepstore import InspirehepStore, SHELFS
from store_tools.recordcdsconfpaper import RecordCdsConfPaper from .recordcdsconfpaper import RecordCdsConfPaper
from .recordhepconfpaper import RecordHepConfPaper from .recordhepconfpaper import RecordHepConfPaper
from .recordheppubli import RecordHepPubli from .recordheppubli import RecordHepPubli
from .recordhepinst import RecordHepInst from .recordhepinst import RecordHepInst
...@@ -34,138 +29,6 @@ MSG_ERROR_INST = \ ...@@ -34,138 +29,6 @@ MSG_ERROR_INST = \
MSG_FAIL_UPCAST = "Failed to upcast the JSON record" MSG_FAIL_UPCAST = "Failed to upcast the JSON record"
def add_conference_data(recjson):
"""Add the conference data to the recjson.
Note:
Encoding of conference information depends on the store.
It adds the following field and subfield::
+---------------+-----------------------------------------------+
| field | subfield |
+---------------+-----------------------------------------------+
| meeting_name | closing_date, coference_code, country, date, |
| | location, opening_date, year |
| meeting_note | recid, url |
+---------------+-----------------------------------------------+
Args:
recjson (dict):
record data (MarcJSON)
Note:
* Fields are not added when there is no conference identifier and
no conference key in the recjson.
* The method CheckAndFix.is_conference will identify that case.
"""
# ........................................................................
#
# Retrieve conference identifier and the host
# - the algorithm depend on the store
# - for cds use aleph_linking_page
# - for inspire use publication_info.cnum
#
conf_id, conf_key, host = None, None, None
if "aleph_linking_page" in recjson:
di = recjson["aleph_linking_page"]
conf_id = di["sysno"]
conf_key = di.get("up_link", None)
host = "cds.cern.ch"
elif "publication_info" in recjson:
data = recjson["publication_info"]
data = (data if isinstance(data, list) else [data])
for di in data:
if "cnum" in di:
conf_key = di["cnum"]
host = "inspirehep.net"
break
if conf_id is None and conf_key is None:
return
# ........................................................................
#
# Get conference data
#
if conf_id is not None:
conf_id = (conf_id if isinstance(conf_id, int) else int(conf_id))
kwargs = dict(conf_id=conf_id)
else:
kwargs = dict(key=conf_key)
try:
confjson = get_conference_data(host, **kwargs)
except StoreException:
return
# ........................................................................
#
# Add conference data to the recjson (cds.cern.ch)
#
if host in CDS:
# extract the conference url
# - information is in confjson[url]
# - in most of the case it is a dictionary
# - when it is a list take the first entry which is for the home
# page while the second one is for the proceeding (cds 2270940)
# - in other case the url is not defined (cds 2258914)
confurl = ""
if "url" in confjson:
obj = confjson["url"]
confurl = (obj["url"] if isinstance(obj, dict) else obj[0]["url"])
recjson["meeting_name"] = confjson["meeting_name"]
recjson["meeting_note"] = {"recid": confjson["recid"], "url": confurl}
# ........................................................................
#
# Add conference data to the recjson (inspirehep.net)
#
elif host in INS:
# location of the conference
address = [el for el in confjson["addresses"] if el.get("country")][0]
# date of the conference 6-12 Dec 2010
start, end = confjson["opening_date"], confjson["closing_date"]
ds = datetime.strptime(start, "%Y-%m-%d")
de = datetime.strptime(end, "%Y-%m-%d")
if ds.month == de.month:
sdate = f"{ds.day}-{de.day} " + ds.strftime("%b %Y")
else:
sdate = f"{ds.strftime('%-d %b')} - {de.strftime('%-d %b %Y')}"
# URL of the conference (take the first value)
urls = confjson.get("urls")
if urls is None:
url = ""
elif isinstance(urls, list) and len(urls) > 0:
url = urls[0]["value"]
else:
url = "???"
# add
recjson["meeting_name"] = [{
"closing_date": end,
"coference_code": confjson["cnum"],
"country": address["country_code"],
"date": sdate,
"location": f"{address['cities'][0]}, {address['country']}",
"meeting": confjson["titles"][0]["title"],
"opening_date": start,
"year": confjson["opening_date"][:4]}]
recjson["meeting_note"] = {
"recid": confjson["control_number"],
"url": url}
def build_record(recjson, shelf=None): def build_record(recjson, shelf=None):
"""Transform a JSON object into a record """Transform a JSON object into a record
...@@ -185,8 +48,9 @@ def build_record(recjson, shelf=None): ...@@ -185,8 +48,9 @@ def build_record(recjson, shelf=None):
Return Return
Record: Record:
either RecordCdsConfPaper, RecordHepConfPaper, RecodHepPubli, RecordHepInst, either RecordCdsConfPaper, RecordHepConfPaper, RecodHepPubli,
RecordHepThesis, RecordHepInst, RecordCdsPubli or RecordCdsThesis RecordHepInst, RecordHepThesis, RecordHepInst, RecordCdsPubli
or RecordCdsThesis
Raises: Raises:
RecordException RecordException
...@@ -198,7 +62,6 @@ def build_record(recjson, shelf=None): ...@@ -198,7 +62,6 @@ def build_record(recjson, shelf=None):
# #
if shelf is None: if shelf is None:
if is_conference(recjson): if is_conference(recjson):
add_conference_data(recjson)
upcast_record = RecordCdsConfPaper(recjson) upcast_record = RecordCdsConfPaper(recjson)
elif is_institute(recjson): elif is_institute(recjson):
...@@ -268,110 +131,3 @@ def build_store(host=None, shelf=None): ...@@ -268,110 +131,3 @@ def build_store(host=None, shelf=None):
raise StoreException(MSG_NO_SHELF % (shelf, host)) raise StoreException(MSG_NO_SHELF % (shelf, host))
return store return store
def get_conference_data(host, conf_id=None, key=None):
"""Get the conference data identified by its id or key.
Args:
host (str):
possible values are ``store``, ``store.cern.ch``, ``inspirehep``
or ``inspirehep.net``.
conf_id (int):
the conference identifier in the store.
This is the preferred way.
key (str): the conference key in the store.
Returns:
dict:
The conference data (MarcJSON).
Raises:
StoreException:
- conference record with a wrong identifier
- conference not found
"""
store = build_store(host, shelf="conferences")
# ........................................................................
#
# search by id in cds.cern.ch
#
if conf_id is not None and host in CDS:
recjson = store.get_record(conf_id)
if recjson["recid"] != conf_id:
raise StoreException(MSG_INV_CONF)
return recjson
# ........................................................................
#
# search by key in cds.cern.ch
#
if key is not None and host in CDS:
ids = store.get_ids(p=key)
for conf_id in ids:
recjson = store.get_record(conf_id)
if match_conference_key(recjson, key):
return recjson
raise StoreException(MSG_NO_CONF)
# ........................................................................
#
# search by id in inspirehep.net
#
if conf_id is not None and host in INS:
return store.get_record(conf_id)
# ........................................................................
#
# search by key in inspirehep.net
#
if key is not None and host in INS:
key = key.replace("/", "-")
if not REG_CONF.match(key):
raise StoreException(MSG_INV_CONF_KEY)
obj = store.search(q=f"cnum:{key}")
try:
recjson = obj[0]["metadata"]
except (KeyError, TypeError):
raise StoreException(MSG_NO_CONF)
if recjson["cnum"] != key:
raise StoreException(MSG_NO_CONF)
return recjson
def match_conference_key(recjson, conf_key):
"""Return ``True`` when the record corresponds to a conference identified
by its key.
Args:
recjson (dict):
record formatted MarcJSON.
conf_key (str):
conference key
Returns
bool:
"""
if "meeting_name" in recjson:
for di in recjson["meeting_name"]:
subfield = "coference_code"
if subfield in di and di[subfield] == conf_key:
return True
return False
...@@ -3,15 +3,15 @@ ...@@ -3,15 +3,15 @@
""" """
import re import re
from .base import REG_CONF, REG_YEAR, T4, T6 from .base import T4, T6
from .cdsstore import CdsStore from .cdsstore import CdsStore
from plugin_dbui import CLEAN_SPACES from .confmixin import ConfMixin
from .recordcdspubli import RecordCdsPubli from .recordcdspubli import RecordCdsPubli
REX_DATE8 = re.compile(r"(\d{4})(\d{2})(\d{2})") REX_DATE8 = re.compile(r"(\d{4})(\d{2})(\d{2})")
class RecordCdsConfPaper(RecordCdsPubli): class RecordCdsConfPaper(RecordCdsPubli, ConfMixin):
"""The record describing a conference talk or a proceeding. """The record describing a conference talk or a proceeding.
Attributes: Attributes:
...@@ -64,7 +64,7 @@ class RecordCdsConfPaper(RecordCdsPubli): ...@@ -64,7 +64,7 @@ class RecordCdsConfPaper(RecordCdsPubli):
logger.debug(f"{T6}search by conference by id {conf_id}") logger.debug(f"{T6}search by conference by id {conf_id}")
recjson = store.get_record(conf_id) recjson = store.get_record(conf_id)
if recjson["recid"] != conf_id: if recjson["recid"] != int(conf_id):
logger.debug(f"{T6}failed to retrieve conference by id") logger.debug(f"{T6}failed to retrieve conference by id")
if recjson.get("meeting_name", None) is None: if recjson.get("meeting_name", None) is None:
...@@ -108,6 +108,7 @@ class RecordCdsConfPaper(RecordCdsPubli): ...@@ -108,6 +108,7 @@ class RecordCdsConfPaper(RecordCdsPubli):
return return
city, country = data.get("location", ",").split(",") city, country = data.get("location", ",").split(",")
url = recjson.get("url", {}).get("url", None)
dct = { dct = {
"addresses": [{ "addresses": [{
...@@ -115,9 +116,10 @@ class RecordCdsConfPaper(RecordCdsPubli): ...@@ -115,9 +116,10 @@ class RecordCdsConfPaper(RecordCdsPubli):
"country": country.strip()}], "country": country.strip()}],
"cnum": data.get("coference_code"), "cnum": data.get("coference_code"),
"closing_date": data.get("closing_date", None), "closing_date": data.get("closing_date", None),
"control_number": recjson["recid"],
"opening_date": data.get("opening_date", None), "opening_date": data.get("opening_date", None),
"titles": [{"value": data.get("meeting", None)}], "titles": [{"title": data.get("meeting", None)}],
"urls": [recjson.get("url", {}).get("url", None)], "urls": (None if url is None else [{"value": url}]),
"year": data.get("year", None)} "year": data.get("year", None)}
# date format issue YYYYMMDD to YYYY-MM-DD # date format issue YYYYMMDD to YYYY-MM-DD
...@@ -130,151 +132,3 @@ class RecordCdsConfPaper(RecordCdsPubli): ...@@ -130,151 +132,3 @@ class RecordCdsConfPaper(RecordCdsPubli):
# #
# Append conference data # Append conference data
self.conference = dct self.conference = dct
def conference_country(self):
"""The country where the conference took place.
Returns:
str:
the filter *CLEAN_SPACES* is applied.
The string is empty when the country is not defined.
"""
# NOTE:
# * country is extract from the location since it is defined
# for both cds and inspire store
#
# * The subfield country contains the country code (IT? FR, ..).
# It is only defined for cds
#
location = self.conference_location()
if len(location) == 0:
return ""
return CLEAN_SPACES(location.split(",")[-1])
def conference_dates(self):
"""The dates of the conference.
Returns:
str:
the usual pattern is ``6-5 March 2012`` but it can varies
between records and between stores since it is not
standardise.
"""
# for list assume that the first item is the correct one
val = self._get("meeting_name", "date")
val = (val[0] if isinstance(val, list) and len(val) > 0 else val)
return val
def conference_id(self):
"""The conference identifier used in the store.
Returns:
int or None
"""
if "meeting_note" not in self:
return None
return self["meeting_note"].get("recid")
def conference_key(self):
"""The conference key used in the store.
Returns:
str:
empty string when not defined
"""
# algorithm depends on the store
# CDS
if "aleph_linking_page" in self:
value = self["aleph_linking_page"]["up_link"]
# INSPIRE
elif "publication_info" in self:
df = self["publication_info"]
cnums = df[df.cnum.str.match(REG_CONF.pattern) == True].cnum
if len(cnums) == 1:
value = cnums.iloc[0]
else:
value = ""
return value
def conference_location(self):
"""The conference location.
Returns:
str:
- the pattern is ``town, country``
- empty string when more than one location found
- empty string when not defined
"""
location = self._get("meeting_name", "location", force_list=True)
location = (location[0] if len(location) == 1 else "")
return CLEAN_SPACES(location)
def conference_title(self):
"""The title of the conference.
Returns:
str:
"""
# for list assume that the first item is the correct one
value = self._get("meeting_name", "meeting")
value = (value[0] if isinstance(value, list) else value)
return CLEAN_SPACES(value)
def conference_town(self):
"""The town where the conference took place.
Returns:
str:
empty string when it is not defined.
"""
location = self.conference_location()
if len(location) == 0:
return ""
return CLEAN_SPACES(location.split(",")[0])
def conference_url(self):
"""The URL of the conference home page.
Returns:
str:
select arbitrarily the first URL when severals
are founded. The string is empty string when the URL
is not defined.
"""
if "meeting_note" not in self:
return ""
return self["meeting_note"].get("url", "")
def conference_year(self):
"""The year of the conference.
Returns:
str:
empty string when it is not defined.
"""
# from the conference dates
match = REG_YEAR.search(self.conference_dates())
if match:
return match.group(1)
return ""
...@@ -5,10 +5,8 @@ ...@@ -5,10 +5,8 @@
* Test tools to get / add conference data. * Test tools to get / add conference data.
""" """
from store_tools import (add_conference_data, from store_tools import (build_record,
build_record,
build_store, build_store,
get_conference_data,
is_conference, is_conference,
is_institute, is_institute,
is_thesis, is_thesis,
...@@ -24,75 +22,9 @@ from store_tools import (add_conference_data, ...@@ -24,75 +22,9 @@ from store_tools import (add_conference_data,
# ............................................................................ # ............................................................................
# #
# Conference proceeding and talk # Conference (is_conference, build_record)
# #
def test_get_conference_data_cds_02001(): def test_is_conference_cds_02001():
"""get the conference data from cds.cern.ch.
the map of identifiers and keys is the following:
+------------------+--------------+-----------|
| | cds | inspire |
+------------------+--------------+-----------|
| proceeding recid | 1411352 | 1089237 |
| conference recid | 1181092 | 980401 |
| conference key | rome20101206 | C10-12-06 |
+------------------+--------------+-----------|
"""
# by id
recjson1 = get_conference_data("cds.cern.ch", conf_id=1181092)
assert recjson1["recid"] == 1181092
assert recjson1["meeting_name"][0]["coference_code"] == "rome20101206"