Commit 29725268 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Migrate Talks.

parent 927ce661
......@@ -658,7 +658,7 @@ class CheckAndFix(object):
record.my_authors = value
def is_conference(self, record):
"""Check that the record described a conference talk / proceeding.
"""Check that the record contains conference data.
Args:
record (RecordPubli):
......@@ -675,6 +675,9 @@ class CheckAndFix(object):
if not isinstance(record, RecordConf):
raise CheckException(MSG_NO_CONF)
if u"meeting_name" not in record:
raise CheckException(MSG_NO_CONF)
def is_thesis(self, record):
"""Check that the record described a thesis.
......
......@@ -18,10 +18,12 @@ class Talks(Automaton):
"""Check the content of the talk in order to fix non conformities.
Args:
record (RecordConf): record describing a conference.
record (RecordConf):
record describing a conference.
Returns:
bool: ``False`` when a non conformity is found and can not be
bool:
``False`` when a non conformity is found and can not be
corrected.
"""
......@@ -34,10 +36,9 @@ class Talks(Automaton):
try:
self.check.is_conference(record)
self.check.country(record)
self.check.conference_date(record, self.harvester.host)
self.check.conference_date(record)
self.check.submitted(record)
self.check.year(record)
self.check.format_authors(record, fmt="F. Last")
self.check.get_my_authors(record, sort=True)
......@@ -57,23 +58,25 @@ class Talks(Automaton):
"""Insert a conference talk in the database.
Args:
record (RecordConf): record describing a conference.
record (RecordConf):
record describing a conference.
Returns:
int: one when the record is inserted / updated in the database
int:
one when the record is inserted / updated in the database
zero otherwise.
"""
# alias
oai_url = record.oai_url()
year = record.year()
year = record.conference_year()
# alias for the conference information
conference_dates = record.conference_dates()
conference_title = record.conference_title()
first_author = record.first_author()
id_country = self.search_country(record.conference_country())
submitted = record.submitted()[0]
submitted = record.submitted()
title = record.title()
# get the collaboration identifier
......
......@@ -69,17 +69,17 @@ def add_conference_data(recjson):
+---------------+-----------------------------------------------+
| meeting_name | closing_date, coference_code, country, date, |
| | location, opening_date, year |
| meeting | recid, url |
| meeting_note | recid, url |
+---------------+-----------------------------------------------+
Args:
recjson (dict): record data (MarcJSON)
Raise:
CdsException:
- no conference identifier and key in the recjson
- conference recjson found but with a wrong identifier
- conference not found
Note:
* Fields are not added when there is no conference identifier and
no conference key in the recjson.
* The method CheckAndFix.is_conference will identify that case.
"""
# ........................................................................
#
......@@ -107,7 +107,7 @@ def add_conference_data(recjson):
break
if conf_id is None and conf_key is None:
raise CdsException(MSG_NO_CONF_ID_KEY)
return
# ........................................................................
#
......
......@@ -20,6 +20,7 @@
"""
import pytest
from harvest_tools.checkandfix import CheckAndFix
from harvest_tools.exception import CheckException
from invenio_tools import load_record
......@@ -42,6 +43,13 @@ def test_is_conference(svc, reccds, recins):
assert svc.is_conference(reccds) is None
assert svc.is_conference(recins) is None
# test exception
# the publication cds2242595 is a talk without conference data
#
reccds2 = load_record("cds.cern.ch", 2242595)
with pytest.raises(CheckException):
svc.is_conference(reccds2)
def test_country(svc, reccds, recins):
assert svc.country(reccds) is None
......
# -*- coding: utf-8 -*-
"""test_Talk
* Harvester is Talks
* Store is cds.cern.ch
* LHCb COM for the current year
* Check that all error messages are expected
"""
import pytest
from gluon import current
from harvest_tools.talks import Talks
from harvest_tools.factory import build_harvester_tool
from test_tools import messages
@pytest.fixture(scope="module")
def harvester_messages():
return messages()
def test_lhcb_com(harvester_messages):
"""Harvest LHCb talk for a given year.
This test is useful to:
* debug an harvester
* profile its performance to see where the time is spent.
* compare different implementation to measure improvements.
* ...
"""
# These parameter only make sense if you are inserting record in database
# Select the current year in order to test different case
db = current.db
id_team = 7 # LHCb
id_project = 8 # LHCb
id_category = 9 # COM
year = current.request.now.year
# build the harvester
tool = build_harvester_tool(
db,
id_team,
id_project,
"talks",
id_category,
year_start=str(year),
year_end="",
dry_run=True,
debug=True)
assert isinstance(tool, Talks)
# run the harvester
tool.process_url("cds.cern.ch", "LHCb Talks")
# analyse the log
# Number of talk cannot be check since it evolve within a year
# Only test that there are no unexpected messages
msgs = set([el.txt for el in tool.logs])
assert msgs.issubset(harvester_messages)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment