Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
927ce661
Commit
927ce661
authored
Jun 29, 2017
by
LE GAC Renaud
Browse files
Migrate Proceeding.
parent
7dc22382
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
112 additions
and
25 deletions
+112
-25
modules/harvest_tools/proceedings.py
modules/harvest_tools/proceedings.py
+11
-9
modules/invenio_tools/factory.py
modules/invenio_tools/factory.py
+18
-14
modules/invenio_tools/record.py
modules/invenio_tools/record.py
+3
-0
tests/basis/test_02_record_factory.py
tests/basis/test_02_record_factory.py
+17
-0
tests/scan/test_Article.py
tests/scan/test_Article.py
+2
-2
tests/scan/test_Proceeding.py
tests/scan/test_Proceeding.py
+61
-0
No files found.
modules/harvest_tools/proceedings.py
View file @
927ce661
...
...
@@ -18,10 +18,12 @@ class Proceedings(Automaton):
"""Check the content of the proceeding in order to fix non conformities.
Args:
record (RecordConf): record describing a proceeding.
record (RecordConf):
record describing a proceeding.
Returns:
bool: ``False`` when a non conformity is found and can not be
bool:
``False`` when a non conformity is found and can not be
corrected.
"""
...
...
@@ -34,11 +36,9 @@ class Proceedings(Automaton):
try
:
self
.
check
.
is_conference
(
record
)
self
.
check
.
country
(
record
)
self
.
check
.
conference_date
(
record
,
self
.
harvester
.
host
)
self
.
check
.
conference_date
(
record
)
self
.
check
.
clean_erratum
(
record
)
self
.
check
.
submitted
(
record
)
self
.
check
.
year
(
record
)
self
.
check
.
format_editor
(
record
)
self
.
check
.
publisher
(
record
)
...
...
@@ -62,10 +62,12 @@ class Proceedings(Automaton):
"""Insert a conference proceeding in the database.
Args:
record (RecordConf): record describing a proceeding.
record (RecordConf):
record describing a proceeding.
Returns:
int: one when the record is inserted / updated in the database
int:
one when the record is inserted / updated in the database
zero otherwise.
"""
...
...
@@ -75,7 +77,7 @@ class Proceedings(Automaton):
# protection against proceeding not published in a journal
if
not
year
:
year
=
record
.
year
()
year
=
record
.
conference_
year
()
# alias
authors
=
record
.
authors
()
...
...
@@ -83,7 +85,7 @@ class Proceedings(Automaton):
pages
=
record
.
paper_pages
()
preprint
=
record
.
preprint_number
()
report_numbers
=
record
.
report_number
()
submitted
=
record
.
submitted
()
[
0
]
submitted
=
record
.
submitted
()
title
=
record
.
title
()
url
=
record
.
paper_url
()
volume
=
record
.
paper_volume
()
...
...
modules/invenio_tools/factory.py
View file @
927ce661
...
...
@@ -83,17 +83,18 @@ def add_conference_data(recjson):
"""
# ........................................................................
#
# Retrieve conference identifier
# Retrieve conference identifier
and the host
# - the algorithm depend on the store
# - for cds use aleph_linking_page
# - for inspire use publication_info.cnum
#
conf_id
,
conf_key
=
None
,
None
conf_id
,
conf_key
,
host
=
None
,
None
,
None
if
u
"aleph_linking_page"
in
recjson
:
di
=
recjson
[
u
"aleph_linking_page"
]
conf_id
=
di
[
u
"sysno"
]
conf_key
=
di
[
u
"up_link"
]
host
=
"cds.cern.ch"
elif
u
"publication_info"
in
recjson
:
data
=
recjson
[
u
"publication_info"
]
...
...
@@ -102,6 +103,7 @@ def add_conference_data(recjson):
for
di
in
data
:
if
u
"cnum"
in
di
:
conf_key
=
di
[
u
"cnum"
]
host
=
"inspirehep.net"
break
if
conf_id
is
None
and
conf_key
is
None
:
...
...
@@ -112,15 +114,6 @@ def add_conference_data(recjson):
# Get conference data
#
# extract the host name
if
u
"oai"
in
recjson
:
oai
=
recjson
[
u
"oai"
][
u
"value"
]
elif
u
"FIXME_OAI"
in
recjson
:
oai
=
recjson
[
u
"FIXME_OAI"
][
u
"id"
]
host
=
REG_OAI
.
match
(
oai
).
group
(
1
)
# get the data
if
conf_id
is
not
None
:
conf_id
=
(
conf_id
if
isinstance
(
conf_id
,
int
)
else
int
(
conf_id
))
...
...
@@ -129,14 +122,25 @@ def add_conference_data(recjson):
else
:
confjson
=
get_conference_data
(
host
,
key
=
conf_key
)
#
# extract the conference url
# * information is in confjson[url]
# * in most of the case it is a dictionary
# * it happen that it is a list. The first entry is for the conference
# home page while the second one is for the proceeding (cds 2270940)
# - in other case the url is not defined (cds 2258914)
confurl
=
u
""
if
u
"url"
in
confjson
:
obj
=
confjson
[
u
"url"
]
confurl
=
(
obj
[
u
"url"
]
if
isinstance
(
obj
,
dict
)
else
obj
[
0
][
u
"url"
])
# ........................................................................
#
# Add conference data to the recjson
#
recjson
[
u
"meeting_name"
]
=
confjson
[
u
"meeting_name"
]
recjson
[
u
"meeting_note"
]
=
{
u
"recid"
:
confjson
[
u
"recid"
],
u
"url"
:
confjson
[
u
"url"
][
u
"url"
]}
recjson
[
u
"meeting_note"
]
=
{
u
"recid"
:
confjson
[
u
"recid"
],
u
"url"
:
confurl
}
def
build_record
(
recjson
):
...
...
modules/invenio_tools/record.py
View file @
927ce661
...
...
@@ -295,6 +295,9 @@ class Record(dict):
It is an empty string when not defined
"""
if
u
"system_control_number"
not
in
self
:
return
u
""
data
=
self
[
u
"system_control_number"
]
data
=
(
data
if
isinstance
(
data
,
list
)
else
[
data
])
...
...
tests/basis/test_02_record_factory.py
View file @
927ce661
...
...
@@ -125,6 +125,23 @@ def test_add_conference_data():
assert
recjson
[
"meeting_note"
][
"recid"
]
==
980401
# ........................................................................
#
# EXCEPTION
#
store
=
InvenioStore
(
"cds.cern.ch"
)
# no conference URL
recjson
=
store
.
get_record
(
2258914
)
add_conference_data
(
recjson
)
assert
recjson
[
"meeting_note"
][
"url"
]
==
""
# several conference URLs (home page, proceeding)
recjson
=
store
.
get_record
(
2270940
)
add_conference_data
(
recjson
)
assert
recjson
[
"meeting_note"
][
"url"
]
==
\
"http://indico.ihep.ac.cn/event/5221/overview"
def
test_conference_cds
():
...
...
tests/scan/test_
01_
Article.py
→
tests/scan/test_Article.py
View file @
927ce661
# -*- coding: utf-8 -*-
"""test_
01_
Article
"""test_Article
* Harvester is Article
* Harvester is Article
s
* Store is cds.cern.ch
* LHCb ACL for the current year
* Check that all error messages are expected
...
...
tests/scan/test_Proceeding.py
0 → 100644
View file @
927ce661
# -*- coding: utf-8 -*-
"""test_Proceeding
* Harvester is Proceedings
* Store is cds.cern.ch
* LHCb ACTI for the current year
* Check that all error messages are expected
"""
import
pytest
from
gluon
import
current
from
harvest_tools.proceedings
import
Proceedings
from
harvest_tools.factory
import
build_harvester_tool
from
test_tools
import
messages
@
pytest
.
fixture
(
scope
=
"module"
)
def
harvester_messages
():
return
messages
()
def
test_lhcb_acti
(
harvester_messages
):
"""Harvest LHCb proceeding for a given year.
This test is useful to:
* debug an harvester
* profile its performance to see where the time is spent.
* compare different implementation to measure improvements.
* ...
"""
# These parameter only make sense if you are inserting record in database
# Select the current year in order to test different case
db
=
current
.
db
id_team
=
7
# LHCb
id_project
=
8
# LHCb
id_category
=
7
# ACTI
year
=
current
.
request
.
now
.
year
# build the harvester
tool
=
build_harvester_tool
(
db
,
id_team
,
id_project
,
"proceedings"
,
id_category
,
year_start
=
str
(
year
),
year_end
=
""
,
dry_run
=
True
,
debug
=
True
)
assert
isinstance
(
tool
,
Proceedings
)
# run the harvester
tool
.
process_url
(
"cds.cern.ch"
,
"LHCb Conference Proceedings"
)
# analyse the log
# Number of proceeding cannot be check since it evolve within a year
# Only test that there are no unexpected messages
msgs
=
set
([
el
.
txt
for
el
in
tool
.
logs
])
assert
msgs
.
issubset
(
harvester_messages
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment