Commit 0300a974 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

update invenio_tools.factory and its tests

parent 3259f1e4
""" invenio_tools.factory """ invenio_tools.factory
""" """
import requests import re
from .base import (is_conference, from .base import (is_conference,
is_institute, is_institute,
...@@ -9,10 +9,9 @@ from .base import (is_conference, ...@@ -9,10 +9,9 @@ from .base import (is_conference,
MSG_INV_CONF, MSG_INV_CONF,
MSG_INV_CONF_KEY, MSG_INV_CONF_KEY,
MSG_NO_CONF, MSG_NO_CONF,
MSG_NO_CONF_ID_KEY, REG_CONF)
REG_CONF,
REG_OAI)
from datetime import datetime
from .exception import CdsException from .exception import CdsException
from .inveniostore import CDS, INS, InvenioStore from .inveniostore import CDS, INS, InvenioStore
from .recordconf import RecordConf from .recordconf import RecordConf
...@@ -20,6 +19,9 @@ from .recordinst import RecordInst ...@@ -20,6 +19,9 @@ from .recordinst import RecordInst
from .recordpubli import RecordPubli from .recordpubli import RecordPubli
from .recordthesis import RecordThesis from .recordthesis import RecordThesis
REX_T = "\$\$t([\w, ]+)"
REX_U = "\$\$u([\w, ]+)"
def add_affiliation_keys(recjson): def add_affiliation_keys(recjson):
"""A the affiliation keys to the record describing an institute: """A the affiliation keys to the record describing an institute:
...@@ -38,30 +40,29 @@ def add_affiliation_keys(recjson): ...@@ -38,30 +40,29 @@ def add_affiliation_keys(recjson):
| corporate_note | identifier, futur_identifier, name | | corporate_note | identifier, futur_identifier, name |
+----------------+------------------------------------+ +----------------+------------------------------------+
Args Args:
recjson (dict): record data (MarcJSON) recjson (dict):
record data (MarcJSON)
""" """
url = "https://inspirehep.net/record/%i" % recjson["recid"] store = InvenioStore("inspirehep", shelf="institutions")
rep = requests.get(url, params={"ot": "110", "of": "txt"})
# decode the string: '000recid 110__ $$aXXX$$bYYY$$tZZZ\n' url = f"https://old.inspirehep.net/record/{recjson['recid']}"
txt = rep.text.replace("\n", "")
li = txt[txt.find("$"):].split("$$")
di = {} rep = store.interrogate(url, ot="110", of="txt")
for el in li: txt = rep.text
if len(el) == 0:
continue
di[el[0:1]] = el[1:]
recjson["corporate_note"] = {"identifier": di["u"], recjson["corporate_note"] = {
"future_identifier": di["t"]} "identifier": re.search(REX_U, txt).group(1),
"future_identifier": re.search(REX_T, txt).group(1)}
def add_conference_data(recjson): def add_conference_data(recjson):
"""Add the conference data to the recjson. """Add the conference data to the recjson.
Note:
Encoding of conference information depends on the store.
It adds the following field and subfield:: It adds the following field and subfield::
+---------------+-----------------------------------------------+ +---------------+-----------------------------------------------+
| field | subfield | | field | subfield |
...@@ -72,7 +73,8 @@ def add_conference_data(recjson): ...@@ -72,7 +73,8 @@ def add_conference_data(recjson):
+---------------+-----------------------------------------------+ +---------------+-----------------------------------------------+
Args: Args:
recjson (dict): record data (MarcJSON) recjson (dict):
record data (MarcJSON)
Note: Note:
* Fields are not added when there is no conference identifier and * Fields are not added when there is no conference identifier and
...@@ -125,25 +127,53 @@ def add_conference_data(recjson): ...@@ -125,25 +127,53 @@ def add_conference_data(recjson):
except CdsException: except CdsException:
return return
# ........................................................................
#
# Add conference data to the recjson (cds.cern.ch)
# #
# extract the conference url if host in CDS:
# * information is in confjson[url] # extract the conference url
# * in most of the case it is a dictionary # - information is in confjson[url]
# * it happen that it is a list. The first entry is for the conference # - in most of the case it is a dictionary
# home page while the second one is for the proceeding (cds 2270940) # - when it is a list take the first entry which is for the
# - in other case the url is not defined (cds 2258914) # home page while the second one is for the proceeding (cds 2270940)
# - in other case the url is not defined (cds 2258914)
confurl = "" confurl = ""
if "url" in confjson: if "url" in confjson:
obj = confjson["url"] obj = confjson["url"]
confurl = (obj["url"] if isinstance(obj, dict) else obj[0]["url"]) confurl = (obj["url"] if isinstance(obj, dict) else obj[0]["url"])
recjson["meeting_name"] = confjson["meeting_name"]
recjson["meeting_note"] = {"recid": confjson["recid"], "url": confurl}
# ........................................................................ # ........................................................................
# #
# Add conference data to the recjson # Add conference data to the recjson (inspirehep.net)
# #
recjson["meeting_name"] = confjson["meeting_name"] elif host in INS:
recjson["meeting_note"] = {"recid": confjson["recid"], "url": confurl} address = confjson["addresses"][0]
start, end = confjson["opening_date"], confjson["closing_date"]
ds = datetime.strptime(start, "%Y-%m-%d")
de = datetime.strptime(end, "%Y-%m-%d")
if ds.month == de.month:
sdate = f"{ds.day:02} - {de.day:02} " + ds.strftime("%b %Y")
else:
sdate = f"{ds.strftime('%d %b')} - {de.strftime('%d %b %Y')}"
recjson["meeting_name"] = [{
"closing_date": end,
"coference_code": confjson["cnum"],
"country": address["country_code"],
"date": sdate,
"location": f"{address['country']}, {address['cities'][0]}",
"opening_date": start,
"year": confjson["opening_date"][:4]}]
recjson["meeting_note"] = {
"recid": confjson["control_number"],
"url": confjson["urls"][0]["value"]}
def build_record(recjson): def build_record(recjson):
......
...@@ -114,7 +114,7 @@ def test_add_conference_data_cds_02004(): ...@@ -114,7 +114,7 @@ def test_add_conference_data_cds_02004():
def test_add_conference_data_ins_02005(): def test_add_conference_data_ins_02005():
store = InvenioStore("inspirehep.net") store = InvenioStore("inspirehep.net", shelf="literature")
recjson = store.get_record(1089237) recjson = store.get_record(1089237)
add_conference_data(recjson) add_conference_data(recjson)
...@@ -143,7 +143,7 @@ def test_conference_cds_02006(): ...@@ -143,7 +143,7 @@ def test_conference_cds_02006():
def test_conference_ins_02007(): def test_conference_ins_02007():
store = InvenioStore("inspirehep.net") store = InvenioStore("inspirehep.net", shelf="literature")
recjson = store.get_record(1276938) recjson = store.get_record(1276938)
assert is_conference(recjson) assert is_conference(recjson)
...@@ -173,7 +173,7 @@ def test_talk_cds_02008(): ...@@ -173,7 +173,7 @@ def test_talk_cds_02008():
# #
def test_add_affiliation_keys_ins_02009(): def test_add_affiliation_keys_ins_02009():
"""CPPM""" """CPPM"""
store = InvenioStore("inspirehep.net") store = InvenioStore("inspirehep.net", shelf="institutions")
recjson = store.get_record(902989) recjson = store.get_record(902989)
add_affiliation_keys(recjson) add_affiliation_keys(recjson)
...@@ -185,7 +185,7 @@ def test_add_affiliation_keys_ins_02009(): ...@@ -185,7 +185,7 @@ def test_add_affiliation_keys_ins_02009():
def test_institute_ins_02010(): def test_institute_ins_02010():
"""CPPM""" """CPPM"""
store = InvenioStore("inspirehep.net") store = InvenioStore("inspirehep.net", shelf="institutions")
recjson = store.get_record(902989) recjson = store.get_record(902989)
assert not is_conference(recjson) assert not is_conference(recjson)
...@@ -217,7 +217,7 @@ def test_article_cds_02011(): ...@@ -217,7 +217,7 @@ def test_article_cds_02011():
def test_article_inspirehep_02012(): def test_article_inspirehep_02012():
"""Precision luminosity measurements at LHCb""" """Precision luminosity measurements at LHCb"""
store = InvenioStore("inspirehep.net") store = InvenioStore("inspirehep.net", shelf="literature")
recjson = store.get_record(1319638) recjson = store.get_record(1319638)
assert not is_conference(recjson) assert not is_conference(recjson)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment