Commit eb80c771 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Migrate CheckAndFix: the base methods required by the class Automaton.

parent a4170897
...@@ -138,41 +138,42 @@ class CheckAndFix(object): ...@@ -138,41 +138,42 @@ class CheckAndFix(object):
"""Get the rescue list for my authors. """Get the rescue list for my authors.
Args: Args:
record (RecordPubli): record describing a publication. record (RecordPubli):
id_project (int): identifier of the project in the database. record describing a publication.
id_team (int): identifier of the team in the database.
id_project (int):
identifier of the project in the database.
id_team (int):
identifier of the team in the database.
Returns: Returns:
list: empty when not defined list:
empty when not defined
""" """
year = record.year() year = record.submitted()
# try to recover year when not defined # try to recover year when not defined
if not year: if not year:
# published article, proceeding # published article, proceeding
if "773" in record and "y" in record["773"]: if record[u"publication_info"].year.iloc[0] != "":
year = record["773"]["y"] year = record[u"publication_info"].year.iloc[0]
# start date of a conference # start date of a conference
elif "111" in record and "x" in record["111"]: elif record._get(u"meeting_name", u"opening_date") != u"":
year = record["111"]["x"] year = record._get(u"meeting_name", u"opening_date")
# end date of a conference # end date of a conference
elif "111" in record and "z" in record["111"]: elif record._get(u"meeting_name", u"closing_date") != u"":
year = record["111"]["z"] year = record._get(u"meeting_name", u"closing_date")
# submitted date
elif "269" in record and "c" in record["269"]:
year = record["269"]["c"]
else: else:
return [] return []
# #
# NOTE # protection
# keep in mind that the CheckAndfix mechanism is not yet run # submitted and paper year are protect against erratum, but ...
# therefore year can be a list due to erratum, ...
# #
if isinstance(year, list): if isinstance(year, list):
year.sort() year.sort()
...@@ -197,7 +198,7 @@ class CheckAndFix(object): ...@@ -197,7 +198,7 @@ class CheckAndFix(object):
id_teams=id_team) id_teams=id_team)
if row: if row:
self.__reference = row['authors'].split(', ') self.__reference = row['authors'].strip("\n"). split(', ')
else: else:
self.__reference = [] self.__reference = []
...@@ -315,24 +316,23 @@ class CheckAndFix(object): ...@@ -315,24 +316,23 @@ class CheckAndFix(object):
else: else:
raise CheckException(MSG_NO_REF) raise CheckException(MSG_NO_REF)
def authors(self, record): @staticmethod
def authors(record):
"""Check that author fields are defined. """Check that author fields are defined.
Args: Args:
record (RecordPubli): record describing a publication. record (RecordPubli):
record describing a publication.
Raises: Raises:
CheckException: when there is no authors or more than CheckException:
one *first author*. when there is no authors.
""" """
if not record.is_authors(): if not record.is_authors():
raise CheckException(MSG_NO_AUTHOR) raise CheckException(MSG_NO_AUTHOR)
if len(record["100"]) > 1:
raise CheckException(MSG_TO_MANY_FAUTHOR)
def clean_erratum(self, record): def clean_erratum(self, record):
"""Clean record with erratum by removing them. """Clean record with erratum by removing them.
...@@ -361,10 +361,12 @@ class CheckAndFix(object): ...@@ -361,10 +361,12 @@ class CheckAndFix(object):
Have a look to the synonyms when the collaboration is not well formed. Have a look to the synonyms when the collaboration is not well formed.
Args: Args:
record (RecordPubli): record describing a publication. record (RecordPubli):
record describing a publication.
Raises: Raises:
CheckException: when the collaboration value is defined CheckException:
when the collaboration value is defined
nor entered as a synonym. nor entered as a synonym.
""" """
...@@ -376,6 +378,7 @@ class CheckAndFix(object): ...@@ -376,6 +378,7 @@ class CheckAndFix(object):
try: try:
search_synonym(db.collaborations, "collaboration", val) search_synonym(db.collaborations, "collaboration", val)
except ToolException as e: except ToolException as e:
raise CheckException(*e.args) raise CheckException(*e.args)
...@@ -454,10 +457,12 @@ class CheckAndFix(object): ...@@ -454,10 +457,12 @@ class CheckAndFix(object):
to new one. to new one.
Args: Args:
record (RecordPubli): record describing a publication. record (RecordPubli):
record describing a publication.
Returns: Returns:
bool: ``True`` when a record is found in the database with bool:
``True`` when a record is found in the database with
the bad OAI. the bad OAI.
""" """
...@@ -684,11 +689,18 @@ class CheckAndFix(object): ...@@ -684,11 +689,18 @@ class CheckAndFix(object):
It is based on the author rescue list stored in the database. It is based on the author rescue list stored in the database.
Args: Args:
record (RecordPubli): record describing a publication. record (RecordPubli):
id_project (int): identifier of the project in the database record describing a publication.
id_team (int): identifier of the team in the database
id_project (int):
identifier of the project in the database
id_team (int):
identifier of the team in the database
fmt_rescue (str): fmt_rescue (str):
the format for the authors used in the rescue list the format for the authors used in the rescue list
sort (bool): sort (bool):
sort authors by family name when true otherwise use the sort authors by family name when true otherwise use the
order of authors at the creation of the record order of authors at the creation of the record
...@@ -720,12 +732,12 @@ class CheckAndFix(object): ...@@ -720,12 +732,12 @@ class CheckAndFix(object):
record.reformat_authors(fmt_rescue) record.reformat_authors(fmt_rescue)
if sort: if sort:
authors = (record["700"][["last_name", "fmt_name"]] authors = (record[u"authors"][["last_name", "fmt_name"]]
.sort_values(by="last_name") .sort_values(by="last_name")
.fmt_name) .fmt_name)
else: else:
authors = (record["700"].fmt_name authors = (record[u"authors"].fmt_name
.sort_index()) .sort_index())
# go back to the origin formatting # go back to the origin formatting
...@@ -926,9 +938,15 @@ class CheckAndFix(object): ...@@ -926,9 +938,15 @@ class CheckAndFix(object):
CheckException: when the record is marked temporary CheckException: when the record is marked temporary
""" """
# found on INSPIREHEP (see record 1317573) # INSPIREHEP
if "500" in record and "a" in record["500"]: # Can be find by using the XML syntax:
if record["500"]["a"] == "*Temporary record*": # http://inspirehep.net/search?500__a="*Temporary record*"
#
# or the corresponding JSON field:
# http://inspirehep.net/comment="*Temporary record*"
#
if u"comment" in record:
if record[u"comment"] == u"*Temporary record*":
raise CheckException(MSG_TEMPORARY_RECORD) raise CheckException(MSG_TEMPORARY_RECORD)
def year(self, record): def year(self, record):
......
...@@ -204,7 +204,7 @@ class RecordPubli(Record): ...@@ -204,7 +204,7 @@ class RecordPubli(Record):
"""Convert publication_info into DataFrame: """Convert publication_info into DataFrame:
Note: Note:
* the field is a list when there are eratum * the field is a list when there are erratum
* in some case the subfield year is a list (cds 1951625) * in some case the subfield year is a list (cds 1951625)
publication information are stored in DataFrame with the publication information are stored in DataFrame with the
......
# -*- coding: utf-8 -*-
"""test_08_CheckAndFix_base
* Test CheckAndFix methods required by the Automaton base class.
- constructor
- is_bad_aoi
- temporary_record
- authors
- my_affiliation
- collaboration
"""
import pytest
import requests
from harvest_tools.checkandfix import CheckAndFix, CheckException
from invenio_tools import load_record
@pytest.fixture(scope="module")
def reccds():
return load_record("cds.cern.ch", 1951625)
@pytest.fixture(scope="module")
def svc():
return CheckAndFix()
def test_constructor(svc):
assert svc.reg_institute == \
"^Marseille, CPPM|"\
"^CPPM, Marseille|"\
"^Centre de Physique des Particules de Marseille \(CPPM\)"
def test_is_bad_oai(svc, reccds):
assert not svc.is_bad_oai_used(reccds)
def test_temporary_record(svc, reccds):
assert svc.temporary_record(reccds) is None
# look for some temporarily record in inspirehep.net
# using the MarcXML syntax
# get a list of recids
payload = {"p": "500__a:'*Temporary record*'", "of": "id", "rg": 10}
r = requests.get("http://inspirehep.net/search", params=payload)
li = r.json()
# try with the oldest one to avoid issue with missing information, etc.
with pytest.raises(CheckException):
recins = load_record("inspirehep.net", li[-1])
svc.temporary_record(recins)
def test_authors(svc, reccds):
assert svc.authors(reccds) is None
def test__get_author_rescue_list(svc, reccds):
assert svc._get_author_rescue_list(reccds, 8, 7) == ["C. Adrover",
"S. Akar",
"E. Aslanides",
"J. Cogan",
"W. Kanso",
"R. Le Gac",
"O. Leroy",
"G. Mancinelli",
"E. Maurice",
"A. Morda",
"A. Mordà",
"M. Perrin-Terrin",
"M. Sapunov",
"J. Serrano",
"A. Tsaregorodtsev"]
def test_my_affiliation(svc, reccds):
assert svc.my_affiliation(reccds, 8, 7) == "Marseille, CPPM"
# a paper from NA62 -- no CPPM author
recna62 = load_record("cds.cern.ch", 1434415)
with pytest.raises(CheckException):
svc.my_affiliation(recna62, id_project=8, id_team=7)
def test_collaboration(svc, reccds):
assert svc.collaboration(reccds) is None
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment