Commit 4dc48966 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Migrate Notes.

parent 5a665e1a
......@@ -18,10 +18,12 @@ class Notes(Automaton):
"""Check the content of the note in order to fix non conformities.
Args:
record (RecordPubli): record describing a note
record (RecordPubli):
record describing a note
Returns:
bool: ``False`` when a non conformity is found and can not be
bool:
``False`` when a non conformity is found and can not be
corrected.
"""
......@@ -33,7 +35,6 @@ class Notes(Automaton):
try:
self.check.submitted(record)
self.check.year(record)
self.check.format_authors(record, fmt="F. Last")
self.check.get_my_authors(record, sort=True)
......@@ -53,10 +54,12 @@ class Notes(Automaton):
"""Insert a public note in the database.
Args:
record (RecordPubli): record describing a note.
record (RecordPubli):
record describing a note.
Returns:
int: one when the record is inserted / updated in the database
int:
one when the record is inserted / updated in the database
zero otherwise.
"""
......@@ -64,7 +67,7 @@ class Notes(Automaton):
first_author = record.first_author()
oai_url = record.oai_url()
title = record.title()
year = record.year()
year = record.submitted()[0:4]
# get existing notes
fields = dict(first_author=first_author,
......
......@@ -244,14 +244,16 @@ class RecordPubli(Record):
data = (data if isinstance(data, list) else [data])
df = DataFrame(data)
columns = df.columns
# protection -- list of year, e.g. [2014, 2014] (cds 1951625)
df["year"] = \
df.year.apply(
lambda x: (", ".join(set(x)) if isinstance(x, list) else x))
if "year" in columns:
df["year"] = \
df.year.apply(
lambda x:
(", ".join(set(x)) if isinstance(x, list) else x))
# erratum -- sort by year and volume
columns = df.columns
if set(["year", "volume"]).issubset(columns):
df = df.sort_values(["year", "volume"])
......
# -*- coding: utf-8 -*-
"""test_Note
* Harvester is Preprints
* Store is cds.cern.ch
* LHCb AP for the current year
* Check that all error messages are expected
"""
import pytest
from gluon import current
from harvest_tools.notes import Notes
from harvest_tools.factory import build_harvester_tool
from test_tools import messages
@pytest.fixture(scope="module")
def harvester_messages():
return messages()
def test_lhcb_ap2(harvester_messages):
"""Harvest LHCb note for a given year.
This test is useful to:
* debug an harvester
* profile its performance to see where the time is spent.
* compare different implementation to measure improvements.
* ...
"""
# These parameter only make sense if you are inserting record in database
# Select the current year in order to test different case
db = current.db
id_team = 7 # LHCb
id_project = 8 # LHCb
id_category = 14 # AP
year = current.request.now.year
# build the harvester
tool = build_harvester_tool(
db,
id_team,
id_project,
"notes",
id_category,
year_start=str(year),
year_end="",
dry_run=True,
debug=True)
assert isinstance(tool, Notes)
# run the harvester
tool.process_url("cds.cern.ch",
"LHCb Notes, LHCb Conference Contributions")
# analyse the log
# Number of talk cannot be check since it evolve within a year
# Only test that there are no unexpected messages
msgs = set([el.txt for el in tool.logs])
assert msgs.issubset(harvester_messages)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment