Commit 4414c211 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Reorganise the tests section.

parent 13b0f83e
......@@ -45,6 +45,7 @@ MONTHS = {"Jan": "01",
"Nov": "11",
"Dec": "12"}
MSG_FAUTHOR_COLLABORATION = "Reject first author is a Collaboration"
MSG_NO_AUTHOR = "Reject no author(s)"
MSG_NO_CONF_DATE = "Reject no conference date"
MSG_NO_DATE = "Reject no submission date"
......@@ -64,10 +65,11 @@ REG_CONF_DATES_2 = \
REG_DOI = re.compile(r"\d+\.\d+/([a-zA-Z]+)\.(\d+)\.(\w+)")
REG_WELL_FORMED_CONF_DATES_1 = re.compile("\d{2} - \d{2} [A-Z][a-z]{2} \d{4}")
REG_WELL_FORMED_CONF_DATES_1 = \
re.compile("\d{1,2}-\d{1,2} [A-Z][a-z]{2} \d{4}")
REG_WELL_FORMED_CONF_DATES_2 = \
re.compile("\d{2} [A-Z][a-z]{2} - \d{2} [A-Z][a-z]{2} \d{4}")
re.compile("\d{1,2} [A-Z][a-z]{2} - \d{1,2} [A-Z][a-z]{2} \d{4}")
UNIVERSITY = "University"
......@@ -326,7 +328,9 @@ class CheckAndFix(object):
return val
def authors(self, record):
"""Check that author fields are defined.
"""Check that:
* author fields are defined.
* first author is not like ATLAS Collaboration
Args:
record (RecordPubli):
......@@ -338,11 +342,14 @@ class CheckAndFix(object):
"""
if self.dbg:
print "\t\tCheck authors"
print("\t\tCheck authors")
if not record.is_authors():
raise CheckException(MSG_NO_AUTHOR)
if "collaboration" in record.first_author().lower():
raise CheckException(MSG_FAUTHOR_COLLABORATION)
def collaboration(self, record):
"""Check synonyms for collaboration by using by the proper value.
......@@ -481,16 +488,16 @@ class CheckAndFix(object):
opening, closing = self._get_conference_dates(record)
if opening.month == closing.month:
val = "%02i - %02i %s %i" % (opening.day,
closing.day,
opening.strftime("%b"),
opening.year)
val = "%i-%i %s %i" % (opening.day,
closing.day,
opening.strftime("%b"),
opening.year)
else:
val = "%02i %s - %02i %s %i" % (opening.day,
opening.strftime("%b"),
closing.day,
closing.strftime("%b"),
opening.year)
val = "%i %s - %i %s %i" % (opening.day,
opening.strftime("%b"),
closing.day,
closing.strftime("%b"),
opening.year)
meeting = record[u"meeting_name"]
meeting = (meeting[0] if isinstance(meeting, list) else meeting)
......@@ -628,20 +635,33 @@ class CheckAndFix(object):
values = record[u"dissertation_note"][u"university"]
# CPPM -- fix the name of Aix-Marseille university
if self._get_reg_institute().find("CPPM") != -1:
affiliations = record.first_author_institutes()
if "CPPM" in affiliations:
# name of the university depends on the year
year = re.search(r"(\d{4})", record.these_defense()).group(1)
year = REG_YEAR.search(record.these_defense()).group(1)
if int(year) < 2012:
university = "Université de la Méditerrannée Aix-Marseille II"
university = \
u"Université de la Méditerrannée Aix-Marseille II"
else:
university = "Aix Marseille Université"
values = (university if "Marseille" in values else values)
# single affiliation
affiliations = affiliations.split("|")
if len(affiliations) == 1:
values = university
# multiple affiliation are separated by "|"
else:
li = [el for el in affiliations if "CPPM" in el]
if len(li) == 1:
values = values.replace(li[0], university)
# Other -- replace U. by University
else:
university = current.T(UNIVERSITY).decode("utf8")
values.replace('U.', university)
university = current.T(UNIVERSITY).decode("utf8")
values = values.replace('U.', university)
record[u"dissertation_note"][u"university"] = values
......@@ -900,7 +920,7 @@ class CheckAndFix(object):
raise ToolException(MSG_UNKNOWN_PUBLISHER)
abbreviation = db.publishers[dbid].abbreviation
if abbreviation != val:
if abbreviation != val:
record[u"publication_info"].loc[0, "title"] = abbreviation
# convert ToolException to CheckExcpetion
......
......@@ -247,12 +247,12 @@ class Record(dict):
# standard case
value = self._get(field, subfield)
# in some case OAI is a list
# in some case OAI is a list (e.g. cds1513204)
# select the OAI corresponding to the record identifier.
if isinstance(value, list):
myid = self.id()
for el in value:
if el.endswith(myid):
if el.endswith(str(myid)):
return el
return ""
......
""" invenio_tools.recordinst
"""
<<<<<<< HEAD
from .base import is_institute
from .exception import RecordException
from .record import Record
=======
from base import is_institute, REG_OAI
from exception import RecordException
from record import Record
>>>>>>> Migrate RecordInst.
MSG_INVALID_HOST = "Invalid record host"
MSG_INVALID_RECORD = "Invalid record, it is not describing an institute"
......@@ -58,10 +51,6 @@ class RecordInst(Record):
"""
def __init__(self, recjson):
host = REG_OAI.match(recjson[u"FIXME_OAI"][u"id"]).group(1)
if host != "inspirehep.net":
raise RecordException(MSG_INVALID_HOST)
if not is_institute(recjson):
raise RecordException(MSG_INVALID_RECORD)
......
......@@ -178,6 +178,10 @@ class RecordPubli(Record):
columns = df.columns
df = df.drop(columns.difference(refcols), axis="columns")
# protection against duplicated entries, e.g. twice the first author
if set(["last_name", "first_name"]).issubset(df.columns):
df = df.drop_duplicates(["last_name", "first_name"])
# protection -- affiliation not defined
if "affiliation" not in columns:
dfa = DataFrame([""]*len(df), columns=["affiliation"])
......@@ -286,7 +290,8 @@ class RecordPubli(Record):
Returns:
list:
the list is empty when authors are not defined.
* name are unique
* the list is empty when authors are not defined.
"""
df = self[u"authors"]
......
......@@ -21,6 +21,7 @@ from harvest_tools.base import (
MSG_TOOMANY_SYNONYM)
from harvest_tools.checkandfix import (
MSG_FAUTHOR_COLLABORATION,
MSG_NO_AUTHOR,
MSG_NO_CONF_DATE,
MSG_NO_DATE,
......@@ -54,34 +55,34 @@ def messages():
T = current.T
set_msgs = {
T(MSG_NO_EDITOR),
T(MSG_TRANSFORM_PREPRINT),
T(MSG_FAUTHOR_COLLABORATION),
T(MSG_FIX_ORIGIN),
T(MSG_IN_DB),
T(MSG_INSERT_FAIL),
T(MSG_INV_CONF),
T(MSG_INV_CONF_KEY),
T(MSG_INSERT_FAIL),
T(MSG_LOAD),
T(MSG_NO_AUTHOR),
T(MSG_NO_CONF),
T(MSG_NO_CONF_DATE),
T(MSG_NO_CONF_ID_KEY),
T(MSG_NO_DATE),
T(MSG_NO_COUNTRY),
T(MSG_NO_EDITOR),
T(MSG_NO_ENTRY % "collaborations"),
T(MSG_NO_ENTRY % "countries"),
T(MSG_NO_ENTRY % "publishers"),
T(MSG_TOOMANY_SYNONYM),
T(MSG_NO_AUTHOR),
T(MSG_NO_CONF),
T(MSG_NO_CONF_DATE),
T(MSG_NO_DATE),
T(MSG_NO_MY_AUTHOR),
T(MSG_NO_PUBLISHER),
T(MSG_NO_REF),
T(MSG_PREPRINT_IS_PAPER),
T(MSG_PREPRINT_IS_CONFERENCE),
T(MSG_PREPRINT_IS_THESIS),
T(MSG_PREPRINT_NO_NUMBER),
T(MSG_PREPRINT_IS_PAPER),
T(MSG_PREPRINT_IS_THESIS),
T(MSG_REPORT_NO_NUMBER),
T(MSG_TEMPORARY_RECORD),
T(MSG_TOOMANY_SYNONYM),
T(MSG_TRANSFORM_PREPRINT),
T(MSG_UNKNOWN_COLLABORATION),
T(MSG_UNKNOWN_COUNTRY),
T(MSG_UNKNOWN_PUBLISHER),
......
......@@ -13,6 +13,7 @@ Test all methods of the Record class for a given article:
"""
import pytest
from invenio_tools.inveniostore import InvenioStore
from invenio_tools.record import Record
......
......@@ -24,6 +24,7 @@ Note:
"""
import pandas as pd
import pytest
from invenio_tools import load_record
......@@ -98,6 +99,7 @@ def test_reformat_author(reccds):
authors = reccds.authors_as_list()
assert len(authors) == reccds["number_of_authors"]
assert authors[0] == u"R. Aaij"
assert authors[1] == u"B. Adeva"
assert authors[12] == u"A. A. Alves Jr"
......@@ -172,12 +174,16 @@ def test_is_with_erratum(reccds):
assert not reccds.is_with_erratum()
def test_paper_info(reccds):
def test_paper_info(reccds, recins):
assert reccds.paper_editor() == "JINST"
assert reccds.paper_pages() == "P12005"
assert reccds.paper_volume() == "9"
assert reccds.paper_year() == "2014"
assert recins.paper_editor() == "JINST"
assert recins.paper_pages() == "P12005"
assert recins.paper_volume() == "9"
assert recins.paper_year() == "2014"
def test_paper_reference(reccds):
assert reccds.paper_reference() == "JINST 9 2014 P12005"
......
......@@ -21,6 +21,7 @@ Note:
"""
import pytest
from invenio_tools import load_record
......
......@@ -20,6 +20,7 @@ Note:
"""
import pytest
from invenio_tools import load_record
......
......@@ -5,7 +5,8 @@ Test specific methods of the RecordInst class for CPPM
"""
import pytest
from invenio_tools import load_record
from invenio_tools import load_record, RecordException, RecordInst
@pytest.fixture(scope="module")
......@@ -13,6 +14,18 @@ def record():
return load_record("inspirehep.net", 902989)
def test_exception_host():
record = load_record("cds.cern.ch", 1951625)
with pytest.raises(RecordException):
RecordInst(record)
def test_exception_record():
record = load_record("inspirehep.net", 1319638)
with pytest.raises(RecordException):
RecordInst(record)
def test_future_identifer(record):
assert record.future_identifier() == u'CPPM, Marseille'
......
......@@ -13,6 +13,8 @@
import pytest
import requests
from gluon import current
from harvest_tools.base import search_synonym
from harvest_tools.checkandfix import CheckAndFix, CheckException
from invenio_tools import load_record
......@@ -91,5 +93,29 @@ def test_my_affiliation(svc, reccds):
svc.my_affiliation(recna62, id_project=8, id_team=7)
def test_search_synonym():
db = current.db
# collaboration ANTARES, TANAMI (defined as synonym in the db)
record = load_record("inspirehep.net", 1342250)
colid = search_synonym(
db.collaborations,
"collaboration",
record.collaboration())
assert colid == 2
# collaboration = ANTARES (defined as synonym in the db))
record = load_record("inspirehep.net", 718872)
colid = search_synonym(
db.collaborations,
"collaboration",
record.collaboration())
assert colid == 2
def test_collaboration(svc, reccds):
assert svc.collaboration(reccds) is None
......@@ -5,15 +5,16 @@
"""
import pytest
from gluon import current
from harvest_tools.automaton import Automaton, ToolException
from harvest_tools.automaton import Automaton
from harvest_tools.msgcollection import MsgCollection
from invenio_tools.inveniostore import InvenioStore
@pytest.fixture(scope="module")
def svc():
# article for lhcb and forthe current year
# article for lhcb and for the current year
return Automaton(current.db,
7,
8,
......@@ -24,10 +25,49 @@ def svc():
debug=True)
def test__is_record_in_db(svc):
"""The record cds1389970 and ins939619 describe the same LHCb paper.
* the paper is loaded in the database from cds.cern.ch.
* primary and secondary OAI are defined
"""
db = current.db
print
# check that the record is in the database
query = \
db.publications.origin.contains("http://cds.cern.ch/record/1389907")
row = db(query).select().first()
assert row.origin == \
"http://cds.cern.ch/record/1389907, " \
"http://inspirehep.net/record/939619"
rec_id_1 = svc._is_record_in_db(
"Luminosity measurement",
host="cds.cern.ch",
rec_id=1389907, )
assert rec_id_1 != 0
rec_id_2 = svc._is_record_in_db(
"Luminosity measurement",
oai_url="http://inspirehep.net/record/939619")
assert rec_id_2 != 0
assert rec_id_1 == rec_id_2
def test_process_recid(svc):
"""Test the deepest method to retrieve a record.
"""
# reset
svc.collection_logs = []
svc.logs = []
# mimic high level stage process_collection and process_url
collection = u"LHCb Papers"
......
......@@ -14,6 +14,7 @@
"""
import pytest
from harvest_tools.checkandfix import CheckAndFix
from invenio_tools import load_record
......
......@@ -19,6 +19,7 @@
"""
import pytest
from harvest_tools.checkandfix import CheckAndFix
from harvest_tools.exception import CheckException
from invenio_tools import load_record
......@@ -60,11 +61,11 @@ def test_conference_date(svc, reccds, recins):
assert reccds.conference_dates() == "6 - 11 Dec 2010"
svc.conference_date(reccds)
assert reccds.conference_dates() == "06 - 11 Dec 2010"
assert reccds.conference_dates() == "6-11 Dec 2010"
assert recins.conference_dates() == "6-11 Dec 2010"
svc.conference_date(recins)
assert recins.conference_dates() == "06 - 11 Dec 2010"
assert recins.conference_dates() == "6-11 Dec 2010"
def test_submitted(svc, reccds, recins):
......
......@@ -12,6 +12,7 @@
"""
import pytest
from harvest_tools.checkandfix import CheckAndFix
from invenio_tools import load_record
......@@ -57,4 +58,4 @@ def test_format_universities(svc, reccds):
svc.format_universities(reccds2)
assert reccds2[u"dissertation_note"][u"university"] == \
u"Aix Marseille Université"
u"Shandong Université & Aix Marseille Université"
# -*- coding: utf-8 -*-
"""test_01_acl
* collection of article with exception
"""
import pytest
from harvest_tools.checkandfix import CheckAndFix
from invenio_tools import load_record
@pytest.fixture(scope="module")
def svc():
return CheckAndFix()
def test_acl_cds2234042(svc):
"""
* The field ``prepublication`` is dictionary
* For the publication 2234042 it is a list.
* Protection added Record.submitted and CheckAndFix.submitted
"""
reccds = load_record("cds.cern.ch", 2234042)
assert reccds.submitted() == "18 Nov 2016"
svc = CheckAndFix()
assert svc.submitted(reccds) is None
assert reccds.submitted() == "2016-11-18"
"""test_reject_on_oai
"""
from gluon import current
from harvest_tools import Automaton
def test_is_record_in_db():
"""The record cds 1389970 and ins 939619 describe the same LHcb paper.
It is in the database and load from cds.cern.ch.
"""
db = current.db
# check that the record is in the database
query = db.publications.origin.contains("http://cds.cern.ch/record/1389907")
row = db(query).select().first()
assert row.origin == "http://cds.cern.ch/record/1389907, http://inspirehep.net/record/939619"
atm = Automaton(db, 7, 8, "articles", 2)
rec_id_1 = atm._is_record_in_db("Luminosity measurement",
host="cds.cern.ch",
rec_id=1389907, )
assert rec_id_1 != 0
rec_id_2 = atm._is_record_in_db("Luminosity measurement",
oai_url = "http://inspirehep.net/record/939619" )
assert rec_id_2 != 0
assert rec_id_1 == rec_id_2
\ No newline at end of file
"""ARTICLE
http://cds.cern.ch/record/1753190.
Note:
* Paper is published but there are error in the paper reference
* Page volume is not defined in 773v
* Page number is wrong in 773c 75 instead of 158
The CheckAndFix correction are applied.
Only the changes are checked
"""
import copy
import pytest
from harvest_tools import CheckAndFix
from invenio_tools import load_record
@pytest.fixture(scope="module")
def record():
return load_record('cds.cern.ch', 1753190)
@pytest.fixture(scope="module")
def recordfix(record):
rec = copy.deepcopy(record)
svc = CheckAndFix()
svc.paper_reference(rec)
return rec
def test_paper_editor(record, recordfix):
assert record.paper_editor() == "Eur. Phys. J. C"
assert recordfix.paper_editor() == "Eur. Phys. J. C"
def test_paper_page(record, recordfix):
# 191119 fix in CDS
assert record.paper_pages() == "158"
assert recordfix.paper_pages() == "158"
def test_paper_volume(record, recordfix):
# 191119 fix in CDS
assert record.paper_volume() == "C75"
assert recordfix.paper_volume() == "C75"
def test_paper_reference(record, recordfix):
# 191119 fix in CDS
assert record.paper_reference() == "Eur. Phys. J. C C75 2015 158"
assert recordfix.paper_reference() == "Eur. Phys. J. C C75 2015 158"
def test_paper_year(record, recordfix):
assert record.paper_year() == "2015"
assert recordfix.paper_year() == "2015"