Commit a0dd169b authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Update RecordCdsConfPaper and tests to user ConfMixin

parent 655a173d
......@@ -33,10 +33,7 @@ from .exception import (CheckException,
StoreException,
ToolException)
from .factory import (add_conference_data,
build_record,
build_store,
get_conference_data)
from .factory import build_record, build_store
from .inspirehepstore import InspirehepStore
from .publicationinfomixin import PublicationInfoMixin
from .cdsstore import CdsStore
......
......@@ -92,12 +92,12 @@ class ConfMixin(object):
* empty string when it is not defined
"""
for elt in self["publication_info"]:
if "cnum" in elt:
return elt["cnum"]
conference = self.conference
if conference is None:
return ""
return conference.get("cnum", "")
def conference_location(self):
"""The conference location.
......
......@@ -6,17 +6,12 @@ from .base import (CDS,
is_conference,
is_institute,
is_thesis,
MSG_INV_CONF,
MSG_INV_CONF_KEY,
MSG_NO_CONF,
MSG_NO_SHELF,
REG_CONF)
MSG_NO_SHELF)
from datetime import datetime
from .exception import RecordException, StoreException
from .cdsstore import CdsStore
from .inspirehepstore import InspirehepStore, SHELFS
from store_tools.recordcdsconfpaper import RecordCdsConfPaper
from .recordcdsconfpaper import RecordCdsConfPaper
from .recordhepconfpaper import RecordHepConfPaper
from .recordheppubli import RecordHepPubli
from .recordhepinst import RecordHepInst
......@@ -34,138 +29,6 @@ MSG_ERROR_INST = \
MSG_FAIL_UPCAST = "Failed to upcast the JSON record"
def add_conference_data(recjson):
"""Add the conference data to the recjson.
Note:
Encoding of conference information depends on the store.
It adds the following field and subfield::
+---------------+-----------------------------------------------+
| field | subfield |
+---------------+-----------------------------------------------+
| meeting_name | closing_date, coference_code, country, date, |
| | location, opening_date, year |
| meeting_note | recid, url |
+---------------+-----------------------------------------------+
Args:
recjson (dict):
record data (MarcJSON)
Note:
* Fields are not added when there is no conference identifier and
no conference key in the recjson.
* The method CheckAndFix.is_conference will identify that case.
"""
# ........................................................................
#
# Retrieve conference identifier and the host
# - the algorithm depend on the store
# - for cds use aleph_linking_page
# - for inspire use publication_info.cnum
#
conf_id, conf_key, host = None, None, None
if "aleph_linking_page" in recjson:
di = recjson["aleph_linking_page"]
conf_id = di["sysno"]
conf_key = di.get("up_link", None)
host = "cds.cern.ch"
elif "publication_info" in recjson:
data = recjson["publication_info"]
data = (data if isinstance(data, list) else [data])
for di in data:
if "cnum" in di:
conf_key = di["cnum"]
host = "inspirehep.net"
break
if conf_id is None and conf_key is None:
return
# ........................................................................
#
# Get conference data
#
if conf_id is not None:
conf_id = (conf_id if isinstance(conf_id, int) else int(conf_id))
kwargs = dict(conf_id=conf_id)
else:
kwargs = dict(key=conf_key)
try:
confjson = get_conference_data(host, **kwargs)
except StoreException:
return
# ........................................................................
#
# Add conference data to the recjson (cds.cern.ch)
#
if host in CDS:
# extract the conference url
# - information is in confjson[url]
# - in most of the case it is a dictionary
# - when it is a list take the first entry which is for the home
# page while the second one is for the proceeding (cds 2270940)
# - in other case the url is not defined (cds 2258914)
confurl = ""
if "url" in confjson:
obj = confjson["url"]
confurl = (obj["url"] if isinstance(obj, dict) else obj[0]["url"])
recjson["meeting_name"] = confjson["meeting_name"]
recjson["meeting_note"] = {"recid": confjson["recid"], "url": confurl}
# ........................................................................
#
# Add conference data to the recjson (inspirehep.net)
#
elif host in INS:
# location of the conference
address = [el for el in confjson["addresses"] if el.get("country")][0]
# date of the conference 6-12 Dec 2010
start, end = confjson["opening_date"], confjson["closing_date"]
ds = datetime.strptime(start, "%Y-%m-%d")
de = datetime.strptime(end, "%Y-%m-%d")
if ds.month == de.month:
sdate = f"{ds.day}-{de.day} " + ds.strftime("%b %Y")
else:
sdate = f"{ds.strftime('%-d %b')} - {de.strftime('%-d %b %Y')}"
# URL of the conference (take the first value)
urls = confjson.get("urls")
if urls is None:
url = ""
elif isinstance(urls, list) and len(urls) > 0:
url = urls[0]["value"]
else:
url = "???"
# add
recjson["meeting_name"] = [{
"closing_date": end,
"coference_code": confjson["cnum"],
"country": address["country_code"],
"date": sdate,
"location": f"{address['cities'][0]}, {address['country']}",
"meeting": confjson["titles"][0]["title"],
"opening_date": start,
"year": confjson["opening_date"][:4]}]
recjson["meeting_note"] = {
"recid": confjson["control_number"],
"url": url}
def build_record(recjson, shelf=None):
"""Transform a JSON object into a record
......@@ -185,8 +48,9 @@ def build_record(recjson, shelf=None):
Return
Record:
either RecordCdsConfPaper, RecordHepConfPaper, RecodHepPubli, RecordHepInst,
RecordHepThesis, RecordHepInst, RecordCdsPubli or RecordCdsThesis
either RecordCdsConfPaper, RecordHepConfPaper, RecodHepPubli,
RecordHepInst, RecordHepThesis, RecordHepInst, RecordCdsPubli
or RecordCdsThesis
Raises:
RecordException
......@@ -198,7 +62,6 @@ def build_record(recjson, shelf=None):
#
if shelf is None:
if is_conference(recjson):
add_conference_data(recjson)
upcast_record = RecordCdsConfPaper(recjson)
elif is_institute(recjson):
......@@ -268,110 +131,3 @@ def build_store(host=None, shelf=None):
raise StoreException(MSG_NO_SHELF % (shelf, host))
return store
def get_conference_data(host, conf_id=None, key=None):
"""Get the conference data identified by its id or key.
Args:
host (str):
possible values are ``store``, ``store.cern.ch``, ``inspirehep``
or ``inspirehep.net``.
conf_id (int):
the conference identifier in the store.
This is the preferred way.
key (str): the conference key in the store.
Returns:
dict:
The conference data (MarcJSON).
Raises:
StoreException:
- conference record with a wrong identifier
- conference not found
"""
store = build_store(host, shelf="conferences")
# ........................................................................
#
# search by id in cds.cern.ch
#
if conf_id is not None and host in CDS:
recjson = store.get_record(conf_id)
if recjson["recid"] != conf_id:
raise StoreException(MSG_INV_CONF)
return recjson
# ........................................................................
#
# search by key in cds.cern.ch
#
if key is not None and host in CDS:
ids = store.get_ids(p=key)
for conf_id in ids:
recjson = store.get_record(conf_id)
if match_conference_key(recjson, key):
return recjson
raise StoreException(MSG_NO_CONF)
# ........................................................................
#
# search by id in inspirehep.net
#
if conf_id is not None and host in INS:
return store.get_record(conf_id)
# ........................................................................
#
# search by key in inspirehep.net
#
if key is not None and host in INS:
key = key.replace("/", "-")
if not REG_CONF.match(key):
raise StoreException(MSG_INV_CONF_KEY)
obj = store.search(q=f"cnum:{key}")
try:
recjson = obj[0]["metadata"]
except (KeyError, TypeError):
raise StoreException(MSG_NO_CONF)
if recjson["cnum"] != key:
raise StoreException(MSG_NO_CONF)
return recjson
def match_conference_key(recjson, conf_key):
"""Return ``True`` when the record corresponds to a conference identified
by its key.
Args:
recjson (dict):
record formatted MarcJSON.
conf_key (str):
conference key
Returns
bool:
"""
if "meeting_name" in recjson:
for di in recjson["meeting_name"]:
subfield = "coference_code"
if subfield in di and di[subfield] == conf_key:
return True
return False
......@@ -3,15 +3,15 @@
"""
import re
from .base import REG_CONF, REG_YEAR, T4, T6
from .base import T4, T6
from .cdsstore import CdsStore
from plugin_dbui import CLEAN_SPACES
from .confmixin import ConfMixin
from .recordcdspubli import RecordCdsPubli
REX_DATE8 = re.compile(r"(\d{4})(\d{2})(\d{2})")
class RecordCdsConfPaper(RecordCdsPubli):
class RecordCdsConfPaper(RecordCdsPubli, ConfMixin):
"""The record describing a conference talk or a proceeding.
Attributes:
......@@ -64,7 +64,7 @@ class RecordCdsConfPaper(RecordCdsPubli):
logger.debug(f"{T6}search by conference by id {conf_id}")
recjson = store.get_record(conf_id)
if recjson["recid"] != conf_id:
if recjson["recid"] != int(conf_id):
logger.debug(f"{T6}failed to retrieve conference by id")
if recjson.get("meeting_name", None) is None:
......@@ -108,6 +108,7 @@ class RecordCdsConfPaper(RecordCdsPubli):
return
city, country = data.get("location", ",").split(",")
url = recjson.get("url", {}).get("url", None)
dct = {
"addresses": [{
......@@ -115,9 +116,10 @@ class RecordCdsConfPaper(RecordCdsPubli):
"country": country.strip()}],
"cnum": data.get("coference_code"),
"closing_date": data.get("closing_date", None),
"control_number": recjson["recid"],
"opening_date": data.get("opening_date", None),
"titles": [{"value": data.get("meeting", None)}],
"urls": [recjson.get("url", {}).get("url", None)],
"titles": [{"title": data.get("meeting", None)}],
"urls": (None if url is None else [{"value": url}]),
"year": data.get("year", None)}
# date format issue YYYYMMDD to YYYY-MM-DD
......@@ -130,151 +132,3 @@ class RecordCdsConfPaper(RecordCdsPubli):
#
# Append conference data
self.conference = dct
def conference_country(self):
"""The country where the conference took place.
Returns:
str:
the filter *CLEAN_SPACES* is applied.
The string is empty when the country is not defined.
"""
# NOTE:
# * country is extract from the location since it is defined
# for both cds and inspire store
#
# * The subfield country contains the country code (IT? FR, ..).
# It is only defined for cds
#
location = self.conference_location()
if len(location) == 0:
return ""
return CLEAN_SPACES(location.split(",")[-1])
def conference_dates(self):
"""The dates of the conference.
Returns:
str:
the usual pattern is ``6-5 March 2012`` but it can varies
between records and between stores since it is not
standardise.
"""
# for list assume that the first item is the correct one
val = self._get("meeting_name", "date")
val = (val[0] if isinstance(val, list) and len(val) > 0 else val)
return val
def conference_id(self):
"""The conference identifier used in the store.
Returns:
int or None
"""
if "meeting_note" not in self:
return None
return self["meeting_note"].get("recid")
def conference_key(self):
"""The conference key used in the store.
Returns:
str:
empty string when not defined
"""
# algorithm depends on the store
# CDS
if "aleph_linking_page" in self:
value = self["aleph_linking_page"]["up_link"]
# INSPIRE
elif "publication_info" in self:
df = self["publication_info"]
cnums = df[df.cnum.str.match(REG_CONF.pattern) == True].cnum
if len(cnums) == 1:
value = cnums.iloc[0]
else:
value = ""
return value
def conference_location(self):
"""The conference location.
Returns:
str:
- the pattern is ``town, country``
- empty string when more than one location found
- empty string when not defined
"""
location = self._get("meeting_name", "location", force_list=True)
location = (location[0] if len(location) == 1 else "")
return CLEAN_SPACES(location)
def conference_title(self):
"""The title of the conference.
Returns:
str:
"""
# for list assume that the first item is the correct one
value = self._get("meeting_name", "meeting")
value = (value[0] if isinstance(value, list) else value)
return CLEAN_SPACES(value)
def conference_town(self):
"""The town where the conference took place.
Returns:
str:
empty string when it is not defined.
"""
location = self.conference_location()
if len(location) == 0:
return ""
return CLEAN_SPACES(location.split(",")[0])
def conference_url(self):
"""The URL of the conference home page.
Returns:
str:
select arbitrarily the first URL when severals
are founded. The string is empty string when the URL
is not defined.
"""
if "meeting_note" not in self:
return ""
return self["meeting_note"].get("url", "")
def conference_year(self):
"""The year of the conference.
Returns:
str:
empty string when it is not defined.
"""
# from the conference dates
match = REG_YEAR.search(self.conference_dates())
if match:
return match.group(1)
return ""
......@@ -5,10 +5,8 @@
* Test tools to get / add conference data.
"""
from store_tools import (add_conference_data,
build_record,
from store_tools import (build_record,
build_store,
get_conference_data,
is_conference,
is_institute,
is_thesis,
......@@ -24,75 +22,9 @@ from store_tools import (add_conference_data,
# ............................................................................
#
# Conference proceeding and talk
# Conference (is_conference, build_record)
#
def test_get_conference_data_cds_02001():
"""get the conference data from cds.cern.ch.
the map of identifiers and keys is the following:
+------------------+--------------+-----------|
| | cds | inspire |
+------------------+--------------+-----------|
| proceeding recid | 1411352 | 1089237 |
| conference recid | 1181092 | 980401 |
| conference key | rome20101206 | C10-12-06 |
+------------------+--------------+-----------|
"""
# by id
recjson1 = get_conference_data("cds.cern.ch", conf_id=1181092)
assert recjson1["recid"] == 1181092
assert recjson1["meeting_name"][0]["coference_code"] == "rome20101206"
# by keys
recjson2 = get_conference_data("cds.cern.ch", key="rome20101206")
assert recjson2["recid"] == 1181092
assert recjson2["meeting_name"][0]["coference_code"] == "rome20101206"
assert recjson1 == recjson2
# v1.4.0 remove obsolete test_get_conference_data_ins_02002
def test_add_conference_data_cds_02003():
store = build_store("cds.cern.ch")
recjson = store.get_record(1411352)
add_conference_data(recjson)
assert "meeting_name" in recjson
assert "meeting_note" in recjson
assert recjson["meeting_name"][0]["coference_code"] == "rome20101206"
assert recjson["meeting_note"]["url"] == \
"http://www.roma1.infn.it/discrete10"
assert recjson["meeting_note"]["recid"] == 1181092
def test_add_conference_data_cds_02004():
# ........................................................................
#
# EXCEPTION
#
store = build_store("cds.cern.ch")
# no conference URL
recjson = store.get_record(2258914)
add_conference_data(recjson)
assert recjson["meeting_note"]["url"] == ""
# several conference URLs (home page, proceeding)
recjson = store.get_record(2270940)
add_conference_data(recjson)
assert recjson["meeting_note"]["url"] == \
"http://indico.ihep.ac.cn/event/5221/overview"
# v1.4.0 remove obsolete test_add_conference_data_ins_02005
def test_conference_cds_02006():
def test_is_conference_cds_02001():
store = build_store("cds.cern.ch")
recjson = store.get_record(1411352)
......@@ -105,7 +37,7 @@ def test_conference_cds_02006():
assert isinstance(record, RecordCdsConfPaper)
def test_conference_ins_02007():
def test_is_conference_ins_02002():
store = build_store("inspirehep.net", shelf="literature")
recjson = store.get_record(1276938)
......@@ -118,7 +50,7 @@ def test_conference_ins_02007():
assert isinstance(record, RecordHepConfPaper)
def test_talk_cds_02008():