Commit 35b24bcb authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Update invenio factory with the tool to get / add conference_data.

parent 34a60765
......@@ -6,7 +6,10 @@ import re
ARXIV = "arXiv"
ARXIV_PDF = "http://arxiv.org/pdf/"
MSG_INV_CONF = "Reject invalid conference information"
MSG_INV_CONF_KEY = "Reject invalid conference key"
MSG_NO_CONF = "Reject no conference information"
MSG_NO_CONF_ID_KEY = "Reject no conference identifier and key"
MSG_NO_COUNTRY = "Reject invalid country"
MSG_NO_HOST = "Reject no host information in record"
MSG_NO_PUBLISHER = "Reject invalid publisher"
......@@ -25,6 +28,7 @@ REG_ARXIV_NUMBER = re.compile("\d+\.\d+")
# group(3) is the part of the first name after the separator (" ", "-")
REG_AUTHOR = re.compile(r"^([\w\- ]+), (\w+)\.?[\- ]*(\w+)*\.?$", re.UNICODE)
REG_CONF = re.compile("^C\d+-\d+-\d+(\.\d+)?$")
REG_OAI = re.compile(r"oai:([a-z\.]+):([\d]+)")
REG_YEAR = re.compile(r"(\d{4})")
......
......@@ -2,18 +2,107 @@
""" invenio_tools.factory
"""
from base import is_conference, is_institute, is_thesis
from base import (is_conference,
is_institute,
is_thesis,
MSG_INV_CONF,
MSG_INV_CONF_KEY,
MSG_NO_CONF,
MSG_NO_CONF_ID_KEY,
REG_CONF,
REG_OAI)
from exception import CdsException
from inveniostore import InvenioStore
from recordconf import RecordConf
from recordinst import RecordInst
from recordpubli import RecordPubli
from recordthesis import RecordThesis
def build_record(rec_json):
def add_conference_data(recjson):
"""Add the conference data to the recjson.
It adds the following field and subfield::
+---------------+-----------------------------------------------+
| field | subfield |
+---------------+-----------------------------------------------+
| meeting_name | closing_date, coference_code, country, date, |
| | location, opening_date, year |
| meeting | recid, url |
+---------------+-----------------------------------------------+
Args:
recjson (dict): record data (MarcJSON)
Raise:
CdsException:
- no conference identifier and key in the recjson
- conference recjson found but with a wrong identifier
- conference not found
"""
# ........................................................................
#
# Retrieve conference identifier
# - the algorithm depend on the store
# - for cds use aleph_linking_page
# - for inspire use publication_info.cnum
#
conf_id, conf_key = None, None
if u"aleph_linking_page" in recjson:
di = recjson[u"aleph_linking_page"]
conf_id = di[u"sysno"]
conf_key = di[u"up_link"]
elif u"publication_info" in recjson:
data = recjson[u"publication_info"]
data = (data if isinstance(data, list) else [data])
for di in data:
if u"cnum" in di:
conf_key = di[u"cnum"]
break
if conf_id is None and conf_key is None:
raise CdsException(MSG_NO_CONF_ID_KEY)
# ........................................................................
#
# Get conference data
#
# extract the host name
if u"oai" in recjson:
oai = recjson[u"oai"][u"value"]
elif u"FIXME_OAI" in recjson:
oai = recjson[u"FIXME_OAI"][u"id"]
host = REG_OAI.match(oai).group(1)
# get the data
if conf_id is not None:
confjson = get_conference_data(host, conf_id=conf_id)
else:
confjson = get_conference_data(host, key=conf_key)
# ........................................................................
#
# Add conference data to the recjson
#
recjson[u"meeting_name"] = confjson[u"meeting_name"]
recjson[u"meeting"] = {
u"recid": confjson[u"recid"],
u"url": confjson[u"url"][u"url"]}
def build_record(recjson):
"""Transform a JSON object into a record
Args:
rec_json (dict):
recjson (dict):
record data in a JSON format.
Return
......@@ -23,17 +112,113 @@ def build_record(rec_json):
Raises:
"""
if is_conference(rec_json):
upcast_record = RecordConf(rec_json)
# self._add_conference_data(upcast_record)
if is_conference(recjson):
add_conference_data(recjson)
upcast_record = RecordConf(recjson)
elif is_institute(rec_json):
upcast_record = RecordInst(rec_json)
elif is_institute(recjson):
upcast_record = RecordInst(recjson)
elif is_thesis(rec_json):
upcast_record = RecordThesis(rec_json)
elif is_thesis(recjson):
upcast_record = RecordThesis(recjson)
else:
upcast_record = RecordPubli(rec_json)
upcast_record = RecordPubli(recjson)
return upcast_record
def get_conference_data(host, conf_id=None, key=None):
"""Get the conference data identified by its id or key.
Args:
host (unicode):
possible values are ``cds.cern.ch`` or ``inspirehep.net``.
conf_id (unicode):
the conference identifier in the store.
This is the preferred way.
key (unicode): the conference key in the store.
Returns:
dict:
The conference data (MarcJSON).
Raises:
CdsException:
- conference record with a wrong identifier
- conference not found
"""
cds = InvenioStore(host)
# ........................................................................
#
# search by id
#
if conf_id is not None:
recjson = cds.get_record(conf_id)
if str(recjson["recid"]) != conf_id:
raise CdsException(MSG_INV_CONF)
return recjson
# ........................................................................
#
# search by key in cds.cern.ch
#
if key is not None and host == "cds.cern.ch":
ids = cds.get_ids(p=key)
for conf_id in ids:
recjson = cds.get_record(conf_id)
if match_conference_key(recjson, key):
return recjson
raise CdsException(MSG_NO_CONF)
# ........................................................................
#
# search by key in inspirehep.net
#
if key is not None and host == "inspirehep.net":
key = key.replace("/", "-")
if not REG_CONF.match(key):
raise CdsException(MSG_INV_CONF_KEY)
ids = cds.get_ids(cc="Conferences", p="111__g:%s" % key)
for conf_id in ids:
recjson = cds.get_record(conf_id)
if match_conference_key(recjson, key):
return recjson
raise CdsException(MSG_NO_CONF)
def match_conference_key(recjson, conf_key):
"""Return ``True`` when the record corresponds to a conference identified
by its key.
Args:
recjson (dict):
record formatted MarcJSON.
conf_key (unicode):
conference key
Returns
bool:
"""
if u"meeting_name" in recjson:
for di in recjson[u"meeting_name"]:
subfield = u"coference_code"
if subfield in di and di[subfield] == conf_key:
return True
return False
......@@ -196,9 +196,8 @@ class InvenioStore(object):
rec_id (int): record identifier in the store.
Returns:
str: the XML string is compliant with
the `MARC <http://www.loc.gov/marc/>`_ standard.
Use Marc12.__call__ to decode it.
dict:
the record data (MarcJSON).
Raises:
CdsException:
......
......@@ -74,7 +74,7 @@ class RecordPubli(Record):
The main ``field`` and ``subfield`` are::
+---------------------------------+----------------------------------+
| field (cds) | subfield |
| field | subfield |
+---------------------------------+----------------------------------+
| FIXME_OAI (inspire) | id |
| abstract | |
......
......@@ -3,17 +3,29 @@
* Test tools to introspect the type of record.
* Test tools to upcast the Record from the JSON object.
* Test tools to get / add conference data.
"""
from invenio_tools.base import is_conference, is_institute, is_thesis
from invenio_tools.factory import build_record
from invenio_tools.base import (is_conference,
is_institute,
is_thesis)
from invenio_tools.factory import (add_conference_data,
build_record,
get_conference_data)
from invenio_tools.inveniostore import InvenioStore
from invenio_tools.record import Record
from invenio_tools.recordconf import RecordConf
from invenio_tools.recordinst import RecordInst
from invenio_tools.recordpubli import RecordPubli
from invenio_tools.recordthesis import RecordThesis
# ............................................................................
#
# Section to test introspection and instantiation
#
def test_conference_cds():
store = InvenioStore("cds.cern.ch")
......@@ -105,3 +117,100 @@ def test_thesis_cds():
record = build_record(recjson)
assert isinstance(record, RecordThesis)
# ............................................................................
#
# Section to test tool to get and add conference data
#
def test_get_conference_data():
""" check the different approach to get the conference data
using the same proceeding, in both store cds.cern.ch and inspirehep.net.
the map of identifiers and keys is the following:
+------------------+--------------+-----------|
| | cds | inspire |
+------------------+--------------+-----------|
| proceeding recid | 1411352 | 1089237 |
| conference recid | 1181092 | 980401 |
| conference key | rome20101206 | C10-12-06 |
+------------------+--------------+-----------|
"""
# ........................................................................
#
# CDS
#
# by id
recjson1 = get_conference_data("cds.cern.ch", conf_id=1181092)
assert recjson1["recid"] == 1181092
assert recjson1["meeting_name"][0]["coference_code"] == "rome20101206"
# by keys
recjson2 = get_conference_data("cds.cern.ch", key="rome20101206")
assert recjson2["recid"] == 1181092
assert recjson2["meeting_name"][0]["coference_code"] == "rome20101206"
assert recjson1 == recjson2
# ........................................................................
#
# INSPIRE
#
# by id
recjson3 = get_conference_data("inspirehep.net", conf_id=980401)
assert recjson3["recid"] == 980401
assert recjson3["meeting_name"][0]["coference_code"] == "C10-12-06"
# by key
recjson4 = get_conference_data("inspirehep.net", key="C10-12-06")
assert recjson4["recid"] == 980401
assert recjson4["meeting_name"][0]["coference_code"] == "C10-12-06"
assert recjson3 == recjson4
def test_add_conference_data():
""" check the different approach to get the conference data
using the same proceeding, in both store cds.cern.ch and inspirehep.net.
the map of identifiers and keys is the following:
+------------------+--------------+-----------|
| | cds | inspire |
+------------------+--------------+-----------|
| proceeding recid | 1411352 | 1089237 |
| conference recid | 1181092 | 980401 |
| conference key | rome20101206 | C10-12-06 |
+------------------+--------------+-----------|
"""
# ........................................................................
#
# CDS
#
store = InvenioStore("cds.cern.ch")
recjson = store.get_record(1411352)
add_conference_data(recjson)
assert "meeting_name" in recjson
assert "meeting" in recjson
assert recjson["meeting_name"][0]["coference_code"] == "rome20101206"
assert recjson["meeting"]["url"] == "http://www.roma1.infn.it/discrete10"
assert recjson["meeting"]["recid"] == 1181092
# ........................................................................
#
# INSPIRE
#
store = InvenioStore("inspirehep.net")
recjson = store.get_record(1089237)
add_conference_data(recjson)
assert "meeting_name" in recjson
assert "meeting" in recjson
assert recjson["meeting_name"][0]["coference_code"] == "C10-12-06"
assert recjson["meeting"]["url"] == "http://www.roma1.infn.it/discrete10"
assert recjson["meeting"]["recid"] == 980401
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment