Commit 56512435 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Migrate CheckAndFix: methods required by the harvester Article.

parent 61979a02
......@@ -538,7 +538,7 @@
'Reject conference dates is not well formed': 'Rejecté les dates de la conférence dates sont mal formatté',
'Reject countries is not defined': "Rejeté le pays n'est pas définie",
'Reject editor is not well formed': "Rejeté l'éditeur est mal formatté",
'Reject incomplete paper reference': 'Rejeté la référence du papier est incomplète',
'Reject incomplete paper reference. Check ': 'Rejeté la référence du papier est incomplète. Vérifiez ',
'Reject invalid country': 'Rejeté pays inconnu',
'Reject invalid publisher': "Rejeté l'abbréviatioin de la revue n'est pas valide",
'Reject no %s authors': "Rejeté pas d'autheur(s) du %s",
......
This diff is collapsed.
""" invenio_tools.recordpubli
"""
import numpy as np
import re
......@@ -527,22 +528,27 @@ class RecordPubli(Record):
return True
def is_published(self):
"""``True`` is the record is published and contains a complet set
of publication infromation (title, volume, year and pagination).
"""``True`` is the record is published and contains a full set
of publication information (title, volume, year and pagination).
Returns:
bool:
"""
df = self[u"publication_info"]
query = \
(df.title.str.len() > 0) \
& (df.volume.str.len() > 0) \
& (df.year.str.len() > 0) \
& (df.pagination.str.len() > 0)
# NOTE
# * df.columns are title, volume, year and pagination
# * df can contains one or more rows due to erratum.
# * assume that the first row is the oldest one and corresponds tp
# the first publication
# * the row contains empty string when the record is not published.
# * iloc[0] returns a serie where the index are the column's name
#
columns = (self[u"publication_info"].iloc[0]
.replace("", np.nan)
.dropna()
.index)
return len(df[query]) > 0
return len(columns.intersection(PAPER_REFERENCE_KEYS)) == 4
def is_with_erratum(self):
"""``True`` when the record contains erratum data.
......@@ -774,7 +780,7 @@ class RecordPubli(Record):
Returns:
unicode:
* format are"YYYY-MM", "YYYY-MM-DD", "DD MMM YYYY", *etc.*
* Empty sring when not defined.
* Empty string when not defined.
"""
return self._get(u"prepublication", u"date")
......
......@@ -160,6 +160,13 @@ def test_collaboration(reccds):
def test_is_published(reccds):
assert reccds.is_published()
# Paper is published but there are error in the paper reference
# Correct reference is Eur. Phys. J. C 75 (2015) 158
# But volume is not defined and pagination is wrong (75)
record = load_record("cds.cern.ch", 1753190)
assert not record.is_published()
def test_is_with_erratum(reccds):
assert not reccds.is_with_erratum()
......
......@@ -80,22 +80,3 @@ def test_conference_url(reccds, recins):
def test_conference_year(reccds, recins):
assert reccds.conference_year() == "2010"
assert recins.conference_year() == "2010"
# def test_reference_conference_id(reccds):
# from pprint import pprint
# pprint(reccds["meeting_name"])
# pprint(recins["meeting_name"])
# assert reccds.reference_conference_id() == "1181092"
#
#
# def test_reference_conference_key(reccds):
# assert reccds.reference_conference_key() == "rome20101206"
#
#
# def test_reference_conference_proceeding(reccds):
# assert reccds.reference_conference_proceeding() == "1313736"
#
#
# def test_reference_conference_talk(reccds):
# assert reccds.reference_conference_talk() == "1313736"
# -*- coding: utf-8 -*-
"""test_10_CheckAndFix_article
* Test CheckAndFix methods for article:
- format_editor
- publisher
- paper_reference
- submitted
- year
- format_author
- get_my_authors
* Same article in cds.cern.ch and inspirehep.net
Phys. Rev. D 95 (2017) 052005
"""
import pytest
from harvest_tools.checkandfix import CheckAndFix
from harvest_tools.exception import ToolException
from invenio_tools import load_record
@pytest.fixture(scope="module")
def reccds():
return load_record("cds.cern.ch", 2242641)
@pytest.fixture(scope="module")
def recins():
return load_record("inspirehep.net", 1509922)
@pytest.fixture(scope="module")
def svc():
return CheckAndFix()
def test_format_editor(svc, reccds, recins):
# cds
assert reccds.paper_editor() == "Phys. Rev. D"
assert reccds.paper_volume() == "95"
svc.format_editor(reccds)
assert reccds.paper_editor() == "Phys. Rev. D"
assert reccds.paper_volume() == "95"
# inspire
assert recins.paper_editor() == "Phys.Rev."
assert recins.paper_volume() == "D95"
svc.format_editor(recins)
assert recins.paper_editor() == "Phys. Rev. D"
assert recins.paper_volume() == "95"
def test_publisher(svc, reccds):
assert svc.publisher(reccds) is None
def test_paper_reference(svc, reccds):
# check recovery procedure using DOI
# remove the publisher and volume information
paper_ref = reccds.paper_reference()
reccds["publication_info"].loc[0, ["title", "volume"]] = ["", ""]
svc.paper_reference(reccds)
assert reccds.paper_reference() == paper_ref
# Paper is published but there are error in the paper reference
# Correct reference is Eur. Phys. J. C 75 (2015) 158
# But volume is not defined and pagination is wrong (75)
# It is not possible to recover it from the doi data.
record = load_record("cds.cern.ch", 1753190)
with pytest.raises(ToolException):
svc.paper_reference(record)
def test_submitted(svc, reccds, recins):
assert reccds.submitted() == "19 Jan 2017"
assert recins.submitted() == "2017-01-19"
svc.submitted(reccds)
reccds.submitted()
assert reccds.submitted() == "2017-01-19"
# test the case 19 01 2017
reccds[u"prepublication"][u"date"] = "19 01 2017"
svc.submitted(reccds)
assert reccds.submitted() == "2017-01-19"
# test the case 2017
reccds[u"prepublication"][u"date"] = "2017"
svc.submitted(reccds)
assert reccds.submitted() == "2017-01"
def test_format_authors(svc, reccds):
authors = reccds.authors_as_list()
assert len(authors) == reccds["number_of_authors"]
assert authors[0] == u"Aaij, Roel"
assert authors[1] == u"Adeva, Bernardo"
assert authors[344] == u"Koopman, Rose"
assert authors[-1] == u"Zucchelli, Stefano"
svc.format_authors(reccds, fmt="F. Last")
authors = reccds.authors_as_list()
assert authors[0] == u"R. Aaij"
assert authors[1] == u"B. Adeva"
assert authors[344] == u"R. Koopman"
assert authors[-1] == u"S. Zucchelli"
def test_get_my_authors(svc, reccds):
svc.format_authors(reccds, fmt="F. Last")
assert svc.get_my_authors(reccds, sep=u"|", sort=True) is None
my_authors = reccds.my_authors.encode("utf-8")
assert my_authors == "J. Arnau Romeu|E. Aslanides|J. Cogan|" \
"K. De Bruyn|R. Le Gac|O. Leroy|" \
"G. Mancinelli|M. Martin|A. Mordà|" \
"J. Serrano|A. Tayduganov|A. Tsaregorodtsev"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment