Commit 927ce661 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Migrate Proceeding.

parent 7dc22382
......@@ -18,10 +18,12 @@ class Proceedings(Automaton):
"""Check the content of the proceeding in order to fix non conformities.
Args:
record (RecordConf): record describing a proceeding.
record (RecordConf):
record describing a proceeding.
Returns:
bool: ``False`` when a non conformity is found and can not be
bool:
``False`` when a non conformity is found and can not be
corrected.
"""
......@@ -34,11 +36,9 @@ class Proceedings(Automaton):
try:
self.check.is_conference(record)
self.check.country(record)
self.check.conference_date(record, self.harvester.host)
self.check.conference_date(record)
self.check.clean_erratum(record)
self.check.submitted(record)
self.check.year(record)
self.check.format_editor(record)
self.check.publisher(record)
......@@ -62,10 +62,12 @@ class Proceedings(Automaton):
"""Insert a conference proceeding in the database.
Args:
record (RecordConf): record describing a proceeding.
record (RecordConf):
record describing a proceeding.
Returns:
int: one when the record is inserted / updated in the database
int:
one when the record is inserted / updated in the database
zero otherwise.
"""
......@@ -75,7 +77,7 @@ class Proceedings(Automaton):
# protection against proceeding not published in a journal
if not year:
year = record.year()
year = record.conference_year()
# alias
authors = record.authors()
......@@ -83,7 +85,7 @@ class Proceedings(Automaton):
pages = record.paper_pages()
preprint = record.preprint_number()
report_numbers = record.report_number()
submitted = record.submitted()[0]
submitted = record.submitted()
title = record.title()
url = record.paper_url()
volume = record.paper_volume()
......
......@@ -83,17 +83,18 @@ def add_conference_data(recjson):
"""
# ........................................................................
#
# Retrieve conference identifier
# Retrieve conference identifier and the host
# - the algorithm depend on the store
# - for cds use aleph_linking_page
# - for inspire use publication_info.cnum
#
conf_id, conf_key = None, None
conf_id, conf_key, host = None, None, None
if u"aleph_linking_page" in recjson:
di = recjson[u"aleph_linking_page"]
conf_id = di[u"sysno"]
conf_key = di[u"up_link"]
host = "cds.cern.ch"
elif u"publication_info" in recjson:
data = recjson[u"publication_info"]
......@@ -102,6 +103,7 @@ def add_conference_data(recjson):
for di in data:
if u"cnum" in di:
conf_key = di[u"cnum"]
host = "inspirehep.net"
break
if conf_id is None and conf_key is None:
......@@ -112,15 +114,6 @@ def add_conference_data(recjson):
# Get conference data
#
# extract the host name
if u"oai" in recjson:
oai = recjson[u"oai"][u"value"]
elif u"FIXME_OAI" in recjson:
oai = recjson[u"FIXME_OAI"][u"id"]
host = REG_OAI.match(oai).group(1)
# get the data
if conf_id is not None:
conf_id = (conf_id if isinstance(conf_id, int) else int(conf_id))
......@@ -129,14 +122,25 @@ def add_conference_data(recjson):
else:
confjson = get_conference_data(host, key=conf_key)
#
# extract the conference url
# * information is in confjson[url]
# * in most of the case it is a dictionary
# * it happen that it is a list. The first entry is for the conference
# home page while the second one is for the proceeding (cds 2270940)
# - in other case the url is not defined (cds 2258914)
confurl = u""
if u"url" in confjson:
obj = confjson[u"url"]
confurl = (obj[u"url"] if isinstance(obj, dict) else obj[0][u"url"])
# ........................................................................
#
# Add conference data to the recjson
#
recjson[u"meeting_name"] = confjson[u"meeting_name"]
recjson[u"meeting_note"] = {
u"recid": confjson[u"recid"],
u"url": confjson[u"url"][u"url"]}
recjson[u"meeting_note"] = {u"recid": confjson[u"recid"], u"url": confurl}
def build_record(recjson):
......
......@@ -295,6 +295,9 @@ class Record(dict):
It is an empty string when not defined
"""
if u"system_control_number" not in self:
return u""
data = self[u"system_control_number"]
data = (data if isinstance(data, list) else [data])
......
......@@ -125,6 +125,23 @@ def test_add_conference_data():
assert recjson["meeting_note"]["recid"] == 980401
# ........................................................................
#
# EXCEPTION
#
store = InvenioStore("cds.cern.ch")
# no conference URL
recjson = store.get_record(2258914)
add_conference_data(recjson)
assert recjson["meeting_note"]["url"] == ""
# several conference URLs (home page, proceeding)
recjson = store.get_record(2270940)
add_conference_data(recjson)
assert recjson["meeting_note"]["url"] == \
"http://indico.ihep.ac.cn/event/5221/overview"
def test_conference_cds():
......
# -*- coding: utf-8 -*-
"""test_01_Article
"""test_Article
* Harvester is Article
* Harvester is Articles
* Store is cds.cern.ch
* LHCb ACL for the current year
* Check that all error messages are expected
......
# -*- coding: utf-8 -*-
"""test_Proceeding
* Harvester is Proceedings
* Store is cds.cern.ch
* LHCb ACTI for the current year
* Check that all error messages are expected
"""
import pytest
from gluon import current
from harvest_tools.proceedings import Proceedings
from harvest_tools.factory import build_harvester_tool
from test_tools import messages
@pytest.fixture(scope="module")
def harvester_messages():
return messages()
def test_lhcb_acti(harvester_messages):
"""Harvest LHCb proceeding for a given year.
This test is useful to:
* debug an harvester
* profile its performance to see where the time is spent.
* compare different implementation to measure improvements.
* ...
"""
# These parameter only make sense if you are inserting record in database
# Select the current year in order to test different case
db = current.db
id_team = 7 # LHCb
id_project = 8 # LHCb
id_category = 7 # ACTI
year = current.request.now.year
# build the harvester
tool = build_harvester_tool(
db,
id_team,
id_project,
"proceedings",
id_category,
year_start=str(year),
year_end="",
dry_run=True,
debug=True)
assert isinstance(tool, Proceedings)
# run the harvester
tool.process_url("cds.cern.ch", "LHCb Conference Proceedings")
# analyse the log
# Number of proceeding cannot be check since it evolve within a year
# Only test that there are no unexpected messages
msgs = set([el.txt for el in tool.logs])
assert msgs.issubset(harvester_messages)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment