Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
limbra
limbra
Commits
a0dd169b
Commit
a0dd169b
authored
Jan 15, 2021
by
LE GAC Renaud
Browse files
Update RecordCdsConfPaper and tests to user ConfMixin
parent
655a173d
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
57 additions
and
509 deletions
+57
-509
modules/store_tools/__init__.py
modules/store_tools/__init__.py
+1
-4
modules/store_tools/confmixin.py
modules/store_tools/confmixin.py
+4
-4
modules/store_tools/factory.py
modules/store_tools/factory.py
+5
-249
modules/store_tools/recordcdsconfpaper.py
modules/store_tools/recordcdsconfpaper.py
+8
-154
tests/basis/test_02_factory_tools.py
tests/basis/test_02_factory_tools.py
+8
-79
tests/basis/test_06_RecordCdsConfPaper.py
tests/basis/test_06_RecordCdsConfPaper.py
+19
-11
tests/basis/test_07_RecordHepConfPaper.py
tests/basis/test_07_RecordHepConfPaper.py
+12
-8
No files found.
modules/store_tools/__init__.py
View file @
a0dd169b
...
...
@@ -33,10 +33,7 @@ from .exception import (CheckException,
StoreException
,
ToolException
)
from
.factory
import
(
add_conference_data
,
build_record
,
build_store
,
get_conference_data
)
from
.factory
import
build_record
,
build_store
from
.inspirehepstore
import
InspirehepStore
from
.publicationinfomixin
import
PublicationInfoMixin
from
.cdsstore
import
CdsStore
...
...
modules/store_tools/confmixin.py
View file @
a0dd169b
...
...
@@ -92,11 +92,11 @@ class ConfMixin(object):
* empty string when it is not defined
"""
for
elt
in
self
[
"publication_info"
]:
if
"cnum"
in
elt
:
return
elt
[
"cnum"
]
conference
=
self
.
conference
if
conference
is
None
:
return
""
return
""
return
conference
.
get
(
"cnum"
,
""
)
def
conference_location
(
self
):
"""The conference location.
...
...
modules/store_tools/factory.py
View file @
a0dd169b
...
...
@@ -6,17 +6,12 @@ from .base import (CDS,
is_conference
,
is_institute
,
is_thesis
,
MSG_INV_CONF
,
MSG_INV_CONF_KEY
,
MSG_NO_CONF
,
MSG_NO_SHELF
,
REG_CONF
)
MSG_NO_SHELF
)
from
datetime
import
datetime
from
.exception
import
RecordException
,
StoreException
from
.cdsstore
import
CdsStore
from
.inspirehepstore
import
InspirehepStore
,
SHELFS
from
store_tools
.recordcdsconfpaper
import
RecordCdsConfPaper
from
.recordcdsconfpaper
import
RecordCdsConfPaper
from
.recordhepconfpaper
import
RecordHepConfPaper
from
.recordheppubli
import
RecordHepPubli
from
.recordhepinst
import
RecordHepInst
...
...
@@ -34,138 +29,6 @@ MSG_ERROR_INST = \
MSG_FAIL_UPCAST
=
"Failed to upcast the JSON record"
def
add_conference_data
(
recjson
):
"""Add the conference data to the recjson.
Note:
Encoding of conference information depends on the store.
It adds the following field and subfield::
+---------------+-----------------------------------------------+
| field | subfield |
+---------------+-----------------------------------------------+
| meeting_name | closing_date, coference_code, country, date, |
| | location, opening_date, year |
| meeting_note | recid, url |
+---------------+-----------------------------------------------+
Args:
recjson (dict):
record data (MarcJSON)
Note:
* Fields are not added when there is no conference identifier and
no conference key in the recjson.
* The method CheckAndFix.is_conference will identify that case.
"""
# ........................................................................
#
# Retrieve conference identifier and the host
# - the algorithm depend on the store
# - for cds use aleph_linking_page
# - for inspire use publication_info.cnum
#
conf_id
,
conf_key
,
host
=
None
,
None
,
None
if
"aleph_linking_page"
in
recjson
:
di
=
recjson
[
"aleph_linking_page"
]
conf_id
=
di
[
"sysno"
]
conf_key
=
di
.
get
(
"up_link"
,
None
)
host
=
"cds.cern.ch"
elif
"publication_info"
in
recjson
:
data
=
recjson
[
"publication_info"
]
data
=
(
data
if
isinstance
(
data
,
list
)
else
[
data
])
for
di
in
data
:
if
"cnum"
in
di
:
conf_key
=
di
[
"cnum"
]
host
=
"inspirehep.net"
break
if
conf_id
is
None
and
conf_key
is
None
:
return
# ........................................................................
#
# Get conference data
#
if
conf_id
is
not
None
:
conf_id
=
(
conf_id
if
isinstance
(
conf_id
,
int
)
else
int
(
conf_id
))
kwargs
=
dict
(
conf_id
=
conf_id
)
else
:
kwargs
=
dict
(
key
=
conf_key
)
try
:
confjson
=
get_conference_data
(
host
,
**
kwargs
)
except
StoreException
:
return
# ........................................................................
#
# Add conference data to the recjson (cds.cern.ch)
#
if
host
in
CDS
:
# extract the conference url
# - information is in confjson[url]
# - in most of the case it is a dictionary
# - when it is a list take the first entry which is for the home
# page while the second one is for the proceeding (cds 2270940)
# - in other case the url is not defined (cds 2258914)
confurl
=
""
if
"url"
in
confjson
:
obj
=
confjson
[
"url"
]
confurl
=
(
obj
[
"url"
]
if
isinstance
(
obj
,
dict
)
else
obj
[
0
][
"url"
])
recjson
[
"meeting_name"
]
=
confjson
[
"meeting_name"
]
recjson
[
"meeting_note"
]
=
{
"recid"
:
confjson
[
"recid"
],
"url"
:
confurl
}
# ........................................................................
#
# Add conference data to the recjson (inspirehep.net)
#
elif
host
in
INS
:
# location of the conference
address
=
[
el
for
el
in
confjson
[
"addresses"
]
if
el
.
get
(
"country"
)][
0
]
# date of the conference 6-12 Dec 2010
start
,
end
=
confjson
[
"opening_date"
],
confjson
[
"closing_date"
]
ds
=
datetime
.
strptime
(
start
,
"%Y-%m-%d"
)
de
=
datetime
.
strptime
(
end
,
"%Y-%m-%d"
)
if
ds
.
month
==
de
.
month
:
sdate
=
f
"
{
ds
.
day
}
-
{
de
.
day
}
"
+
ds
.
strftime
(
"%b %Y"
)
else
:
sdate
=
f
"
{
ds
.
strftime
(
'%-d %b'
)
}
-
{
de
.
strftime
(
'%-d %b %Y'
)
}
"
# URL of the conference (take the first value)
urls
=
confjson
.
get
(
"urls"
)
if
urls
is
None
:
url
=
""
elif
isinstance
(
urls
,
list
)
and
len
(
urls
)
>
0
:
url
=
urls
[
0
][
"value"
]
else
:
url
=
"???"
# add
recjson
[
"meeting_name"
]
=
[{
"closing_date"
:
end
,
"coference_code"
:
confjson
[
"cnum"
],
"country"
:
address
[
"country_code"
],
"date"
:
sdate
,
"location"
:
f
"
{
address
[
'cities'
][
0
]
}
,
{
address
[
'country'
]
}
"
,
"meeting"
:
confjson
[
"titles"
][
0
][
"title"
],
"opening_date"
:
start
,
"year"
:
confjson
[
"opening_date"
][:
4
]}]
recjson
[
"meeting_note"
]
=
{
"recid"
:
confjson
[
"control_number"
],
"url"
:
url
}
def
build_record
(
recjson
,
shelf
=
None
):
"""Transform a JSON object into a record
...
...
@@ -185,8 +48,9 @@ def build_record(recjson, shelf=None):
Return
Record:
either RecordCdsConfPaper, RecordHepConfPaper, RecodHepPubli, RecordHepInst,
RecordHepThesis, RecordHepInst, RecordCdsPubli or RecordCdsThesis
either RecordCdsConfPaper, RecordHepConfPaper, RecodHepPubli,
RecordHepInst, RecordHepThesis, RecordHepInst, RecordCdsPubli
or RecordCdsThesis
Raises:
RecordException
...
...
@@ -198,7 +62,6 @@ def build_record(recjson, shelf=None):
#
if
shelf
is
None
:
if
is_conference
(
recjson
):
add_conference_data
(
recjson
)
upcast_record
=
RecordCdsConfPaper
(
recjson
)
elif
is_institute
(
recjson
):
...
...
@@ -268,110 +131,3 @@ def build_store(host=None, shelf=None):
raise
StoreException
(
MSG_NO_SHELF
%
(
shelf
,
host
))
return
store
def
get_conference_data
(
host
,
conf_id
=
None
,
key
=
None
):
"""Get the conference data identified by its id or key.
Args:
host (str):
possible values are ``store``, ``store.cern.ch``, ``inspirehep``
or ``inspirehep.net``.
conf_id (int):
the conference identifier in the store.
This is the preferred way.
key (str): the conference key in the store.
Returns:
dict:
The conference data (MarcJSON).
Raises:
StoreException:
- conference record with a wrong identifier
- conference not found
"""
store
=
build_store
(
host
,
shelf
=
"conferences"
)
# ........................................................................
#
# search by id in cds.cern.ch
#
if
conf_id
is
not
None
and
host
in
CDS
:
recjson
=
store
.
get_record
(
conf_id
)
if
recjson
[
"recid"
]
!=
conf_id
:
raise
StoreException
(
MSG_INV_CONF
)
return
recjson
# ........................................................................
#
# search by key in cds.cern.ch
#
if
key
is
not
None
and
host
in
CDS
:
ids
=
store
.
get_ids
(
p
=
key
)
for
conf_id
in
ids
:
recjson
=
store
.
get_record
(
conf_id
)
if
match_conference_key
(
recjson
,
key
):
return
recjson
raise
StoreException
(
MSG_NO_CONF
)
# ........................................................................
#
# search by id in inspirehep.net
#
if
conf_id
is
not
None
and
host
in
INS
:
return
store
.
get_record
(
conf_id
)
# ........................................................................
#
# search by key in inspirehep.net
#
if
key
is
not
None
and
host
in
INS
:
key
=
key
.
replace
(
"/"
,
"-"
)
if
not
REG_CONF
.
match
(
key
):
raise
StoreException
(
MSG_INV_CONF_KEY
)
obj
=
store
.
search
(
q
=
f
"cnum:
{
key
}
"
)
try
:
recjson
=
obj
[
0
][
"metadata"
]
except
(
KeyError
,
TypeError
):
raise
StoreException
(
MSG_NO_CONF
)
if
recjson
[
"cnum"
]
!=
key
:
raise
StoreException
(
MSG_NO_CONF
)
return
recjson
def
match_conference_key
(
recjson
,
conf_key
):
"""Return ``True`` when the record corresponds to a conference identified
by its key.
Args:
recjson (dict):
record formatted MarcJSON.
conf_key (str):
conference key
Returns
bool:
"""
if
"meeting_name"
in
recjson
:
for
di
in
recjson
[
"meeting_name"
]:
subfield
=
"coference_code"
if
subfield
in
di
and
di
[
subfield
]
==
conf_key
:
return
True
return
False
modules/store_tools/recordcdsconfpaper.py
View file @
a0dd169b
...
...
@@ -3,15 +3,15 @@
"""
import
re
from
.base
import
REG_CONF
,
REG_YEAR
,
T4
,
T6
from
.base
import
T4
,
T6
from
.cdsstore
import
CdsStore
from
plugin_dbui
import
CLEAN_SPACES
from
.confmixin
import
ConfMixin
from
.recordcdspubli
import
RecordCdsPubli
REX_DATE8
=
re
.
compile
(
r
"(\d{4})(\d{2})(\d{2})"
)
class
RecordCdsConfPaper
(
RecordCdsPubli
):
class
RecordCdsConfPaper
(
RecordCdsPubli
,
ConfMixin
):
"""The record describing a conference talk or a proceeding.
Attributes:
...
...
@@ -64,7 +64,7 @@ class RecordCdsConfPaper(RecordCdsPubli):
logger
.
debug
(
f
"
{
T6
}
search by conference by id
{
conf_id
}
"
)
recjson
=
store
.
get_record
(
conf_id
)
if
recjson
[
"recid"
]
!=
conf_id
:
if
recjson
[
"recid"
]
!=
int
(
conf_id
)
:
logger
.
debug
(
f
"
{
T6
}
failed to retrieve conference by id"
)
if
recjson
.
get
(
"meeting_name"
,
None
)
is
None
:
...
...
@@ -108,6 +108,7 @@ class RecordCdsConfPaper(RecordCdsPubli):
return
city
,
country
=
data
.
get
(
"location"
,
","
).
split
(
","
)
url
=
recjson
.
get
(
"url"
,
{}).
get
(
"url"
,
None
)
dct
=
{
"addresses"
:
[{
...
...
@@ -115,9 +116,10 @@ class RecordCdsConfPaper(RecordCdsPubli):
"country"
:
country
.
strip
()}],
"cnum"
:
data
.
get
(
"coference_code"
),
"closing_date"
:
data
.
get
(
"closing_date"
,
None
),
"control_number"
:
recjson
[
"recid"
],
"opening_date"
:
data
.
get
(
"opening_date"
,
None
),
"titles"
:
[{
"
valu
e"
:
data
.
get
(
"meeting"
,
None
)}],
"urls"
:
[
recjson
.
get
(
"url"
,
{}).
get
(
"url"
,
None
)]
,
"titles"
:
[{
"
titl
e"
:
data
.
get
(
"meeting"
,
None
)}],
"urls"
:
(
None
if
url
is
None
else
[{
"value"
:
url
}])
,
"year"
:
data
.
get
(
"year"
,
None
)}
# date format issue YYYYMMDD to YYYY-MM-DD
...
...
@@ -130,151 +132,3 @@ class RecordCdsConfPaper(RecordCdsPubli):
#
# Append conference data
self
.
conference
=
dct
def
conference_country
(
self
):
"""The country where the conference took place.
Returns:
str:
the filter *CLEAN_SPACES* is applied.
The string is empty when the country is not defined.
"""
# NOTE:
# * country is extract from the location since it is defined
# for both cds and inspire store
#
# * The subfield country contains the country code (IT? FR, ..).
# It is only defined for cds
#
location
=
self
.
conference_location
()
if
len
(
location
)
==
0
:
return
""
return
CLEAN_SPACES
(
location
.
split
(
","
)[
-
1
])
def
conference_dates
(
self
):
"""The dates of the conference.
Returns:
str:
the usual pattern is ``6-5 March 2012`` but it can varies
between records and between stores since it is not
standardise.
"""
# for list assume that the first item is the correct one
val
=
self
.
_get
(
"meeting_name"
,
"date"
)
val
=
(
val
[
0
]
if
isinstance
(
val
,
list
)
and
len
(
val
)
>
0
else
val
)
return
val
def
conference_id
(
self
):
"""The conference identifier used in the store.
Returns:
int or None
"""
if
"meeting_note"
not
in
self
:
return
None
return
self
[
"meeting_note"
].
get
(
"recid"
)
def
conference_key
(
self
):
"""The conference key used in the store.
Returns:
str:
empty string when not defined
"""
# algorithm depends on the store
# CDS
if
"aleph_linking_page"
in
self
:
value
=
self
[
"aleph_linking_page"
][
"up_link"
]
# INSPIRE
elif
"publication_info"
in
self
:
df
=
self
[
"publication_info"
]
cnums
=
df
[
df
.
cnum
.
str
.
match
(
REG_CONF
.
pattern
)
==
True
].
cnum
if
len
(
cnums
)
==
1
:
value
=
cnums
.
iloc
[
0
]
else
:
value
=
""
return
value
def
conference_location
(
self
):
"""The conference location.
Returns:
str:
- the pattern is ``town, country``
- empty string when more than one location found
- empty string when not defined
"""
location
=
self
.
_get
(
"meeting_name"
,
"location"
,
force_list
=
True
)
location
=
(
location
[
0
]
if
len
(
location
)
==
1
else
""
)
return
CLEAN_SPACES
(
location
)
def
conference_title
(
self
):
"""The title of the conference.
Returns:
str:
"""
# for list assume that the first item is the correct one
value
=
self
.
_get
(
"meeting_name"
,
"meeting"
)
value
=
(
value
[
0
]
if
isinstance
(
value
,
list
)
else
value
)
return
CLEAN_SPACES
(
value
)
def
conference_town
(
self
):
"""The town where the conference took place.
Returns:
str:
empty string when it is not defined.
"""
location
=
self
.
conference_location
()
if
len
(
location
)
==
0
:
return
""
return
CLEAN_SPACES
(
location
.
split
(
","
)[
0
])
def
conference_url
(
self
):
"""The URL of the conference home page.
Returns:
str:
select arbitrarily the first URL when severals
are founded. The string is empty string when the URL
is not defined.
"""
if
"meeting_note"
not
in
self
:
return
""
return
self
[
"meeting_note"
].
get
(
"url"
,
""
)
def
conference_year
(
self
):
"""The year of the conference.
Returns:
str:
empty string when it is not defined.
"""
# from the conference dates
match
=
REG_YEAR
.
search
(
self
.
conference_dates
())
if
match
:
return
match
.
group
(
1
)
return
""
tests/basis/test_02_factory_tools.py
View file @
a0dd169b
...
...
@@ -5,10 +5,8 @@
* Test tools to get / add conference data.
"""
from
store_tools
import
(
add_conference_data
,
build_record
,
from
store_tools
import
(
build_record
,
build_store
,
get_conference_data
,
is_conference
,
is_institute
,
is_thesis
,
...
...
@@ -24,75 +22,9 @@ from store_tools import (add_conference_data,
# ............................................................................
#
# Conference
proceeding and talk
# Conference
(is_conference, build_record)
#
def
test_get_conference_data_cds_02001
():
"""get the conference data from cds.cern.ch.
the map of identifiers and keys is the following:
+------------------+--------------+-----------|
| | cds | inspire |
+------------------+--------------+-----------|
| proceeding recid | 1411352 | 1089237 |
| conference recid | 1181092 | 980401 |
| conference key | rome20101206 | C10-12-06 |
+------------------+--------------+-----------|
"""
# by id
recjson1
=
get_conference_data
(
"cds.cern.ch"
,
conf_id
=
1181092
)
assert
recjson1
[
"recid"
]
==
1181092
assert
recjson1
[
"meeting_name"
][
0
][
"coference_code"
]
==
"rome20101206"
# by keys
recjson2
=
get_conference_data
(
"cds.cern.ch"
,
key
=
"rome20101206"
)
assert
recjson2
[
"recid"
]
==
1181092
assert
recjson2
[
"meeting_name"
][
0
][
"coference_code"
]
==
"rome20101206"
assert
recjson1
==
recjson2
# v1.4.0 remove obsolete test_get_conference_data_ins_02002
def
test_add_conference_data_cds_02003
():
store
=
build_store
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1411352
)
add_conference_data
(
recjson
)
assert
"meeting_name"
in
recjson
assert
"meeting_note"
in
recjson
assert
recjson
[
"meeting_name"
][
0
][
"coference_code"
]
==
"rome20101206"
assert
recjson
[
"meeting_note"
][
"url"
]
==
\
"http://www.roma1.infn.it/discrete10"
assert
recjson
[
"meeting_note"
][
"recid"
]
==
1181092
def
test_add_conference_data_cds_02004
():
# ........................................................................
#
# EXCEPTION
#
store
=
build_store
(
"cds.cern.ch"
)
# no conference URL
recjson
=
store
.
get_record
(
2258914
)
add_conference_data
(
recjson
)
assert
recjson
[
"meeting_note"
][
"url"
]
==
""
# several conference URLs (home page, proceeding)
recjson
=
store
.
get_record
(
2270940
)
add_conference_data
(
recjson
)
assert
recjson
[
"meeting_note"
][
"url"
]
==
\
"http://indico.ihep.ac.cn/event/5221/overview"
# v1.4.0 remove obsolete test_add_conference_data_ins_02005
def
test_conference_cds_02006
():
def
test_is_conference_cds_02001
():
store
=
build_store
(
"cds.cern.ch"
)
recjson
=
store
.
get_record
(
1411352
)
...
...
@@ -105,7 +37,7 @@ def test_conference_cds_02006():
assert
isinstance
(
record
,
RecordCdsConfPaper
)
def
test_conference_ins_0200
7
():
def
test_
is_
conference_ins_0200
2
():
store
=
build_store
(
"inspirehep.net"
,
shelf
=
"literature"
)
recjson
=
store
.
get_record
(
1276938
)
...
...
@@ -118,7 +50,7 @@ def test_conference_ins_02007():
assert
isinstance
(
record
,
RecordHepConfPaper
)