Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit e9e676b2 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Migrate RecordConf.

parent 35b24bcb
......@@ -28,7 +28,8 @@ REG_ARXIV_NUMBER = re.compile("\d+\.\d+")
# group(3) is the part of the first name after the separator (" ", "-")
REG_AUTHOR = re.compile(r"^([\w\- ]+), (\w+)\.?[\- ]*(\w+)*\.?$", re.UNICODE)
REG_CONF = re.compile("^C\d+-\d+-\d+(\.\d+)?$")
REG_DATE = re.compile(r"(\d{4}-\d{2}-\d{2})")
REG_CONF = re.compile("^C\d+-\d+-\d+(?:\.\d+)?$")
REG_OAI = re.compile(r"oai:([a-z\.]+):([\d]+)")
REG_YEAR = re.compile(r"(\d{4})")
......
""" invenio_tools.recordconf
"""
import re
from .base import REG_YEAR
from datetime import datetime
from .base import REG_CONF, REG_YEAR
from plugin_dbui import CLEAN_SPACES
from .recordpubli import RecordPubli
REG_DATE = re.compile(r"(\d{4}-\d{2}-\d{2})")
class RecordConf(RecordPubli):
"""The MARC record describing a conference talk or a proceeding.
The relation between methods and MARC fields are the following::
+-----------------------+---------+----------+
| | CDS | INSPIREP |
+-----------------------+---------+----------+
| conference date | 111 d | None |
| conference end | 111 z | 111 y |
| conference key | 111 g | |
| conference location | 111 c | |
| conference title | 111 a | |
| conference start | None | 111 x |
| conference URL | 8564 u | |
| conference year | 111 f | |
| ref. conf. id | 962 b | |
| ref. conf. key | 962 n | 773 w |
| ref. conf. proceeding | 7870 w | |
| ref. conf. talk | 7870 w | |
+-----------------------+---------+----------+
"""The record describing a conference talk or a proceeding.
Additional field describing the conference data are::
+---------------+-----------------------------------------------+
| field | subfield |
+---------------+-----------------------------------------------+
| meeting_name | closing_date, coference_code, country, date, |
| | location, opening_date, year |
| meeting | recid, url |
+---------------+-----------------------------------------------+
"""
def conference_dates(self):
"""The dates of the conference.
def conference_country(self):
"""The country where the conference took place.
Returns:
str: the usual pattern is ``6-5 March 2012`` but it can varies
between records and between stores since it is not
standardise.
unicode:
the filter *CLEAN_SPACES* is applied.
The string is empty when the country is not defined.
"""
if self.host().startswith("inspirehep"):
start = self._get("111", "x")
end = self._get("111", "y")
# NOTE:
# * country is extract from the location since it is defined
# for both cds and inspire store
#
# * The subfield country contains the country code (IT? FR, ..).
# It is only defined for cds
#
location = self.conference_location()
if not (REG_DATE.match(start) and REG_DATE.match(end)):
return ""
if len(location) == 0:
return u""
ds = datetime.strptime(start, "%Y-%m-%d")
de = datetime.strptime(end, "%Y-%m-%d")
return CLEAN_SPACES(location.split(",")[-1])
if ds.month == de.month:
value = "%i-%i %s" % (ds.day, de.day, ds.strftime("%b %Y"))
else:
tpl = (ds.strftime("%d %b"), de.strftime("%d %b %Y"))
value = "%s - %s" % tpl
def conference_dates(self):
"""The dates of the conference.
else:
value = self._get("111", "d")
Returns:
unicode:
the usual pattern is ``6-5 March 2012`` but it can varies
between records and between stores since it is not
standardise.
return value
"""
# for list assume that the first item is the correct one
val = self._get(u"meeting_name", u"date")
val = (val[0] if isinstance(val, list) and len(val) > 0 else val)
return val
def conference_country(self):
"""The country where the conference took place.
def conference_id(self):
"""The conference identifier used in the store.
Returns:
str: the filter *CLEAN_SPACES* is applied. The string is empty
when the country is not defined.
int:
"""
loc = self.conference_location()
if loc:
return CLEAN_SPACES(loc.split(",")[-1])
return ""
return self[u"meeting"][u"recid"]
def conference_key(self):
"""The conference key used in the store.
Returns:
str:
unicode:
- empty string when not defined
"""
return self._get("111", "g")
# algorithm depends on the store
# CDS
if u"aleph_linking_page" in self:
value = self[u"aleph_linking_page"][u"up_link"]
# INSPIRE
elif u"publication_info" in self:
df = self[u"publication_info"]
cnums = df[df.cnum.str.match(REG_CONF.pattern) == True].cnum
if len(cnums) == 1:
value = cnums.iloc[0]
else:
value = u""
return value
def conference_location(self):
"""The conference location.
Returns:
str: the pattern is ``town, country``
unicode:
- the pattern is ``town, country``
- empty string when more than one location found
- empty string when not defined
"""
location = self._get("111", "c")
# protection against
# ["NOW 2012", "Conca Specchiulla, Otranto, Lecce, Italy"]
if isinstance(location, list) and len(location) == 2:
location = location[1]
location = self._get(u"meeting_name", u"location", force_list=True)
location = (location[0] if len(location) == 1 else u"")
return CLEAN_SPACES(location)
......@@ -111,122 +114,49 @@ class RecordConf(RecordPubli):
str:
"""
return CLEAN_SPACES(self._get("111", "a"))
# for list assume that the first item is the correct one
value = self._get(u"meeting_name", u"meeting")
value = (value[0] if isinstance(value, list) else value)
return CLEAN_SPACES(value)
def conference_town(self):
"""The town where the conference took place.
Returns:
str: empty string when it is not defined.
unicode:
empty string when it is not defined.
"""
loc = self.conference_location()
if loc:
return CLEAN_SPACES(loc.split(",")[0])
return ""
location = self.conference_location()
if len(location) == 0:
return u""
return CLEAN_SPACES(location.split(",")[0])
def conference_url(self):
"""The URL of the conference home page.
Returns:
str: select arbitrarily the first URL when severals
unicode:
select arbitrarily the first URL when severals
are founded. The string is empty string when the URL
is not defined.
"""
li = self._get("8564", "u", force_list=True)
# protection
# from time to time this field contains the reference to the pdf file
val = []
for el in li:
if not el.endswith("pdf"):
val.append(el)
# if more than one URL is associated to the record
# select arbitrarily the first one
if val:
return val[0]
return ""
return self[u"meeting"][u"url"]
def conference_year(self):
"""The year of the conference.
Returns:
str:
unicode:
empty string when it is not defined.
"""
year = self._get("111", "f")
if year:
return year
# recovery from conference dates
# from the conference dates
match = REG_YEAR.search(self.conference_dates())
if match:
return match.group(1)
return ""
def reference_conference_id(self):
"""The *record id* of the conference when the record is a proceeding
or a conference talk.
Returns:
str: empty string when it is not defined.
"""
return self._get("962", "b")
def reference_conference_key(self):
"""The conference *key* when the record is a proceeding
or a conference talk.
Returns:
str: empty string when it is not defined.
"""
val = ""
# the location of this values depends on the store
# cds.cern.ch (962, n) and inspirehep.net (773,w).
#
# NOTE: for the later the field can be a dictionary or
# a list of dictionary. Two type exist. One for the
# proceeding, the other containing the conference key.
# all topologies exist, proc, conf, proc+conf
#
if self.host().startswith("inspirehep") and "773" in self:
if isinstance(self["773"], dict) and "w" in self["773"]:
val = self["773"]["w"]
elif isinstance(self["773"], list):
for di in self["773"]:
if "w" in di:
val = di["w"]
elif "962" in self and "n" in self["962"]:
val = self["962"]["n"]
return val
def reference_conference_proceeding(self):
"""The *record id* of the proceeding when the record is a
conference talk.
Returns:
str: the *record id* of the proceeding in the store.
"""
return self._get("7870", "w")
def reference_conference_talk(self):
"""The *record id* of the conference talk when the record
is a proceeding.
Returns:
str: the *record id* of the talk in the store
"""
return self._get("7870", "w")
return u""
# -*- coding: utf-8 -*-
"""test_05_RecordConf
Test all methods of the RecordConf class for a given proceeding:
http://cds.cern.ch/record/1411352.
(same as http://inspirehep.net/record/1089237)
Prospects for CP violation in B0s --> J/psiphi from first LHCb data
O. Leroy, Symposium on Prospects in the Physics of Discrete Symmetries,
Rome, Italy, 6 - 11 Dec 2010
J. Phys.: Conf. Ser. 335 (2011) 012042
No correction are applied to the record.
Allow to test the brute force decoding with its mistakes.
Note:
* Only the first authors is defined
* The submitted date is 05 Jan 2012
* conference date 6 - 11 Dec 2010
"""
import pytest
from invenio_tools import load_record
@pytest.fixture(scope="module")
def reccds():
return load_record("cds.cern.ch", 1411352)
@pytest.fixture(scope="module")
def recins():
return load_record("inspirehep.net", 1089237)
def test_conference_location(reccds, recins):
assert reccds.conference_location() == "Rome, Italy"
assert recins.conference_location() == "Rome, Italy"
def test_conference_country(reccds, recins):
assert reccds.conference_country() == "Italy"
assert recins.conference_country() == "Italy"
def test_conference_dates(reccds, recins):
assert reccds.conference_dates() == "6 - 11 Dec 2010"
assert recins.conference_dates() == "6-11 Dec 2010"
def test_conference_id(reccds, recins):
assert reccds.conference_id() == 1181092
assert recins.conference_id() == 980401
def test_conference_key(reccds, recins):
assert reccds.conference_key() == "rome20101206"
assert recins.conference_key() == "C10-12-06"
def test_conference_title(reccds, recins):
assert reccds.conference_title() == \
"Symposium on Prospects in the Physics of Discrete Symmetries"
assert recins.conference_title() == \
"2nd Symposium on Prospects in the Physics of Discrete Symmetries"
def test_conference_town(reccds, recins):
assert reccds.conference_town() == "Rome"
assert recins.conference_town() == "Rome"
def test_conference_url(reccds, recins):
assert reccds.conference_url() == "http://www.roma1.infn.it/discrete10"
assert recins.conference_url() == "http://www.roma1.infn.it/discrete10"
def test_conference_year(reccds, recins):
assert reccds.conference_year() == "2010"
assert recins.conference_year() == "2010"
# def test_reference_conference_id(reccds):
# from pprint import pprint
# pprint(reccds["meeting_name"])
# pprint(recins["meeting_name"])
# assert reccds.reference_conference_id() == "1181092"
#
#
# def test_reference_conference_key(reccds):
# assert reccds.reference_conference_key() == "rome20101206"
#
#
# def test_reference_conference_proceeding(reccds):
# assert reccds.reference_conference_proceeding() == "1313736"
#
#
# def test_reference_conference_talk(reccds):
# assert reccds.reference_conference_talk() == "1313736"
......@@ -39,38 +39,6 @@ def test_collaboration(record):
assert record.collaboration() == ""
def test_conference_country(record):
assert record.conference_country() == "Italy"
def test_conference_dates(record):
assert record.conference_dates() == "6 - 11 Dec 2010"
def test_conference_key(record):
assert record.conference_key() == "rome20101206"
def test_conference_location(record):
assert record.conference_location() == "Rome, Italy"
def test_conference_title(record):
assert record.conference_title() == "Symposium on Prospects in the Physics of Discrete Symmetries"
def test_conference_town(record):
assert record.conference_town() == "Rome"
def test_conference_url(record):
assert record.conference_url() == "http://www.roma1.infn.it/discrete10"
def test_conference_year(record):
assert record.conference_year() == "2010"
def test_first_author(record):
assert record.first_author() == "Leroy, Olivier"
......@@ -116,22 +84,6 @@ def test_submitted(record):
assert record.submitted() == ["05 Jan 2012"]
def test_reference_conference_id(record):
assert record.reference_conference_id() == "1181092"
def test_reference_conference_key(record):
assert record.reference_conference_key() == "rome20101206"
def test_reference_conference_proceeding(record):
assert record.reference_conference_proceeding() == "1313736"
def test_reference_conference_talk(record):
assert record.reference_conference_talk() == "1313736"
def test_report_number(record):
assert record.report_number() == "CERN-LHCb-PROC-2012-001, LHCb-PROC-2012-001"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment