Commit e4010f30 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Add tests for InvenioStore and Articles automaton.

parent d48ec9af
......@@ -530,6 +530,7 @@
'Registration successful': 'Registration successful',
'Regular expression defining the name of our institute.': 'Expression régulière definissant le nom de votre laboratoire.',
'Reject': 'Rejeter',
'Reject %s is not defined': "Rejeté %s n'est pas défini",
'Reject article is not published': "Rejeté l'article n'est pas publié",
'Reject collaboration is not well formed': 'Rejeté la collaboration est mal formatté',
'Reject collaborations is not defined': "Rejeté la collaboration n'est pas définie",
......@@ -543,6 +544,7 @@
'Reject no author(s)': "Rejeté pas d'autheur(s)",
'Reject no authors': "Rejeté pas d'auteurs",
'Reject no authors of my institute': "Rejeté pas d'auteurs de mon laboratoire",
'Reject no conference date': 'Rejeté pas de dates pour la conférence',
'Reject no conference information': "Rejeté pas d'information sur la conférence",
'Reject no CPPM authors': "Rejeté pas d'auteurs du CPPM",
'Reject no OAI identifier': "Rejeté pas d'identifiant OAI",
......@@ -562,6 +564,7 @@
'Reject to many first author': 'Rejeté trop de premier autheur',
'Reject to many submit date': 'Rejeté plusieurs date de soumission',
'Reject to many year': 'Rejeté plusieurs année',
'Reject too many %s synonyms': 'Rejeté trop de synonymes %s',
'Reject too many collaborations synonyms': 'Rejeté synonyme de collaboration défini plusieurs fois',
'Reject too many countries synonyms': 'Rejeté synonyme de pays défini plusieurs fois',
'Reject too many publishers synonyms': 'Rejeté synonyme de revue défini plusieurs fois',
......
# -*- coding: utf-8 -*-
"""test_harvest_article
"""
from gluon import current
import pytest
from harvest_tools.articles import (
Articles,
MSG_NO_EDITOR,
MSG_TRANSFORM_PREPRINT)
from harvest_tools.base import (
MSG_FIX_ORIGIN,
MSG_IN_DB,
MSG_LOAD,
MSG_NO_ENTRY,
MSG_TOOMANY_SYNONYM)
from harvest_tools.checkandfix import (
MSG_NO_AUTHOR,
MSG_NO_CONF_DATE,
MSG_NO_DATE,
MSG_NO_MY_AUTHOR,
MSG_NO_REF,
MSG_NO_YEAR,
MSG_TEMPORARY_RECORD,
MSG_TO_MANY_DATE,
MSG_TO_MANY_FAUTHOR,
MSG_TO_MANY_YEAR,
MSG_WELL_FORMED_CONF_DATES,
MSG_WELL_FORMED_DATE,
MSG_WELL_FORMED_EDITOR)
from harvest_tools.factory import build_harvester_tool
from harvest_tools.preprints import MSG_PREPRINT_NO_NUMBER
from harvest_tools.reports import MSG_REPORT_NO_NUMBER
from harvest_tools.thesis import MSG_NO_THESIS
@pytest.fixture(scope="module")
def messages():
T = current.T
set_msgs = {
T(MSG_NO_EDITOR),
T(MSG_TRANSFORM_PREPRINT),
T(MSG_FIX_ORIGIN),
T(MSG_IN_DB),
T(MSG_LOAD),
T(MSG_NO_ENTRY),
T(MSG_TOOMANY_SYNONYM),
T(MSG_NO_AUTHOR),
T(MSG_NO_CONF_DATE),
T(MSG_NO_DATE),
T(MSG_NO_MY_AUTHOR),
T(MSG_NO_REF),
T(MSG_NO_YEAR),
T(MSG_TEMPORARY_RECORD),
T(MSG_TO_MANY_DATE),
T(MSG_TO_MANY_FAUTHOR),
T(MSG_TO_MANY_YEAR),
T(MSG_WELL_FORMED_CONF_DATES),
T(MSG_WELL_FORMED_DATE),
T(MSG_WELL_FORMED_EDITOR),
T(MSG_PREPRINT_NO_NUMBER),
T(MSG_REPORT_NO_NUMBER),
T(MSG_NO_THESIS)}
return set_msgs
def test_lhcb_acl(messages):
"""Harvest LHCb article for a given year.
This test is useful to:
* debug an harvester
* profile its performance to see where the time is spent.
* compare different implementation to measure improvements.
* ...
"""
# These parameter only make sense if you are inserting record in database
# Select the current year in order to test different case
db = current.db
id_team = 7 # LHCb
id_project = 8 # LHCb
id_category = 2 # ACL
year = current.request.now.year
# build the harvester
tool = build_harvester_tool(
db,
id_team,
id_project,
"articles",
id_category,
year_start=str(year),
year_end="",
dry_run=True,
debug=False)
assert isinstance(tool, Articles)
# run the harvester
tool.process_url("cds.cern.ch", "LHCb Papers")
# analyse the log
# Number of article cannot be check since it evolve within a year
# Only test that there are no unexpected messages
msgs = set([el.txt for el in tool.logs])
assert msgs.issubset(messages)
# -*- coding: utf-8 -*-
"""test_inveniostore
"""
from invenio_tools.inveniostore import InvenioStore
def test_get_ids():
"""Check the list of record ids for LHCb articles published in 2015.
"""
store = InvenioStore("cds.cern.ch")
kwargs = {
"f1": "year",
"p1": "2015",
"cc": "LHCb Papers",
"m1": "r",
"so": "d",
"sf": "year"}
rec_ids = store.get_ids(**kwargs)
rec_ids.sort()
ref_2015_ids = [
1750838, 1755550, 1951383, 1951424, 1955544, 1966993, 1967222,
1967422, 1968989, 1969197, 1970675, 1970690, 1972201, 1975522,
1975714, 1978281, 1978798, 1981106, 1983198, 1987883, 1996441,
2000543, 2002385, 2003252, 2003792, 2003793, 2003794, 2004586,
2004591, 2005510, 2007377, 2011387, 2012165, 2012990, 2014715,
2014733, 2014836, 2016239, 2016711, 2019534, 2019536, 2020686,
2021262, 2029609, 2029820, 2030417, 2033887, 2033891, 2038937,
2040342, 2045144, 2047219, 2048426, 2048427, 2048812, 2049870,
2055598, 2057916, 2059561, 2060452]
assert len(rec_ids) == 60
assert rec_ids == ref_2015_ids
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment