Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit eb80c771 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Migrate CheckAndFix: the base methods required by the class Automaton.

parent a4170897
......@@ -138,41 +138,42 @@ class CheckAndFix(object):
"""Get the rescue list for my authors.
Args:
record (RecordPubli): record describing a publication.
id_project (int): identifier of the project in the database.
id_team (int): identifier of the team in the database.
record (RecordPubli):
record describing a publication.
id_project (int):
identifier of the project in the database.
id_team (int):
identifier of the team in the database.
Returns:
list: empty when not defined
list:
empty when not defined
"""
year = record.year()
year = record.submitted()
# try to recover year when not defined
if not year:
# published article, proceeding
if "773" in record and "y" in record["773"]:
year = record["773"]["y"]
if record[u"publication_info"].year.iloc[0] != "":
year = record[u"publication_info"].year.iloc[0]
# start date of a conference
elif "111" in record and "x" in record["111"]:
year = record["111"]["x"]
elif record._get(u"meeting_name", u"opening_date") != u"":
year = record._get(u"meeting_name", u"opening_date")
# end date of a conference
elif "111" in record and "z" in record["111"]:
year = record["111"]["z"]
# submitted date
elif "269" in record and "c" in record["269"]:
year = record["269"]["c"]
elif record._get(u"meeting_name", u"closing_date") != u"":
year = record._get(u"meeting_name", u"closing_date")
else:
return []
#
# NOTE
# keep in mind that the CheckAndfix mechanism is not yet run
# therefore year can be a list due to erratum, ...
# protection
# submitted and paper year are protect against erratum, but ...
#
if isinstance(year, list):
year.sort()
......@@ -197,7 +198,7 @@ class CheckAndFix(object):
id_teams=id_team)
if row:
self.__reference = row['authors'].split(', ')
self.__reference = row['authors'].strip("\n"). split(', ')
else:
self.__reference = []
......@@ -315,24 +316,23 @@ class CheckAndFix(object):
else:
raise CheckException(MSG_NO_REF)
def authors(self, record):
@staticmethod
def authors(record):
"""Check that author fields are defined.
Args:
record (RecordPubli): record describing a publication.
record (RecordPubli):
record describing a publication.
Raises:
CheckException: when there is no authors or more than
one *first author*.
CheckException:
when there is no authors.
"""
if not record.is_authors():
raise CheckException(MSG_NO_AUTHOR)
if len(record["100"]) > 1:
raise CheckException(MSG_TO_MANY_FAUTHOR)
def clean_erratum(self, record):
"""Clean record with erratum by removing them.
......@@ -361,10 +361,12 @@ class CheckAndFix(object):
Have a look to the synonyms when the collaboration is not well formed.
Args:
record (RecordPubli): record describing a publication.
record (RecordPubli):
record describing a publication.
Raises:
CheckException: when the collaboration value is defined
CheckException:
when the collaboration value is defined
nor entered as a synonym.
"""
......@@ -376,6 +378,7 @@ class CheckAndFix(object):
try:
search_synonym(db.collaborations, "collaboration", val)
except ToolException as e:
raise CheckException(*e.args)
......@@ -454,10 +457,12 @@ class CheckAndFix(object):
to new one.
Args:
record (RecordPubli): record describing a publication.
record (RecordPubli):
record describing a publication.
Returns:
bool: ``True`` when a record is found in the database with
bool:
``True`` when a record is found in the database with
the bad OAI.
"""
......@@ -684,11 +689,18 @@ class CheckAndFix(object):
It is based on the author rescue list stored in the database.
Args:
record (RecordPubli): record describing a publication.
id_project (int): identifier of the project in the database
id_team (int): identifier of the team in the database
record (RecordPubli):
record describing a publication.
id_project (int):
identifier of the project in the database
id_team (int):
identifier of the team in the database
fmt_rescue (str):
the format for the authors used in the rescue list
sort (bool):
sort authors by family name when true otherwise use the
order of authors at the creation of the record
......@@ -720,12 +732,12 @@ class CheckAndFix(object):
record.reformat_authors(fmt_rescue)
if sort:
authors = (record["700"][["last_name", "fmt_name"]]
authors = (record[u"authors"][["last_name", "fmt_name"]]
.sort_values(by="last_name")
.fmt_name)
else:
authors = (record["700"].fmt_name
authors = (record[u"authors"].fmt_name
.sort_index())
# go back to the origin formatting
......@@ -926,9 +938,15 @@ class CheckAndFix(object):
CheckException: when the record is marked temporary
"""
# found on INSPIREHEP (see record 1317573)
if "500" in record and "a" in record["500"]:
if record["500"]["a"] == "*Temporary record*":
# INSPIREHEP
# Can be find by using the XML syntax:
# http://inspirehep.net/search?500__a="*Temporary record*"
#
# or the corresponding JSON field:
# http://inspirehep.net/comment="*Temporary record*"
#
if u"comment" in record:
if record[u"comment"] == u"*Temporary record*":
raise CheckException(MSG_TEMPORARY_RECORD)
def year(self, record):
......
......@@ -204,7 +204,7 @@ class RecordPubli(Record):
"""Convert publication_info into DataFrame:
Note:
* the field is a list when there are eratum
* the field is a list when there are erratum
* in some case the subfield year is a list (cds 1951625)
publication information are stored in DataFrame with the
......
# -*- coding: utf-8 -*-
"""test_08_CheckAndFix_base
* Test CheckAndFix methods required by the Automaton base class.
- constructor
- is_bad_aoi
- temporary_record
- authors
- my_affiliation
- collaboration
"""
import pytest
import requests
from harvest_tools.checkandfix import CheckAndFix, CheckException
from invenio_tools import load_record
@pytest.fixture(scope="module")
def reccds():
return load_record("cds.cern.ch", 1951625)
@pytest.fixture(scope="module")
def svc():
return CheckAndFix()
def test_constructor(svc):
assert svc.reg_institute == \
"^Marseille, CPPM|"\
"^CPPM, Marseille|"\
"^Centre de Physique des Particules de Marseille \(CPPM\)"
def test_is_bad_oai(svc, reccds):
assert not svc.is_bad_oai_used(reccds)
def test_temporary_record(svc, reccds):
assert svc.temporary_record(reccds) is None
# look for some temporarily record in inspirehep.net
# using the MarcXML syntax
# get a list of recids
payload = {"p": "500__a:'*Temporary record*'", "of": "id", "rg": 10}
r = requests.get("http://inspirehep.net/search", params=payload)
li = r.json()
# try with the oldest one to avoid issue with missing information, etc.
with pytest.raises(CheckException):
recins = load_record("inspirehep.net", li[-1])
svc.temporary_record(recins)
def test_authors(svc, reccds):
assert svc.authors(reccds) is None
def test__get_author_rescue_list(svc, reccds):
assert svc._get_author_rescue_list(reccds, 8, 7) == ["C. Adrover",
"S. Akar",
"E. Aslanides",
"J. Cogan",
"W. Kanso",
"R. Le Gac",
"O. Leroy",
"G. Mancinelli",
"E. Maurice",
"A. Morda",
"A. Mordà",
"M. Perrin-Terrin",
"M. Sapunov",
"J. Serrano",
"A. Tsaregorodtsev"]
def test_my_affiliation(svc, reccds):
assert svc.my_affiliation(reccds, 8, 7) == "Marseille, CPPM"
# a paper from NA62 -- no CPPM author
recna62 = load_record("cds.cern.ch", 1434415)
with pytest.raises(CheckException):
svc.my_affiliation(recna62, id_project=8, id_team=7)
def test_collaboration(svc, reccds):
assert svc.collaboration(reccds) is None
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment