Commit f7cdbbb0 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Add test_11_harvest_tools_base.py

parent 7d5c1287
......@@ -9,8 +9,9 @@ from .base import (DRY_RUN,
MSG_IN_DB,
MSG_LOAD,
family_name_fr,
get_rex_institute,
learn_my_authors,
get_rex_institute)
order_oais)
from .automaton import Automaton
from .articles import Articles
......
......@@ -30,28 +30,7 @@ def family_name_fr(full_name):
family name
"""
return full_name[full_name.find(' ') + 1:]
def order_oais(oais):
"""Order OAIS string as cds, inspirehep
Args:
oais (str):
record identifier in stores
Returns:
str
"""
if oais is None:
return ""
if oais.count(",") != 1 or REX_OAI_CDS.match(oais):
return oais
u, v = (el.strip() for el in oais.split(","))
return f"{v}, {u}"
return full_name[full_name.rfind('. ') + 2:]
def filter_logs(logs):
......@@ -87,6 +66,53 @@ def filter_logs(logs):
return [logs[tpl[0]] for tpl in fltr.items() if tpl[1] is False]
def get_rex_institute(db, app):
"""Get the regular expression defining the affiliation of my institute.
It is obtained by concatenating the affiliation keys.
Affiliation key can contains character like ``(``, ``)`` or ``&``.
They are replaced by ``\(`` *etc*.
Args:
db (pydal.DAL):
database connection
app (gluon.storage.Storage):
namespace defining the application
Returns:
str:
"""
# alias
reg_institute = app.reg_institute
# regular expression for the affiliation keys
# protect special character
# add start and end of string for an exact match
if not reg_institute:
lst = []
for row in db(db.affiliation_keys.id > 0).iterselect():
val = row.key_u
val = (val
.replace("(", "\(")
.replace(")", "\)")
.replace("&", "\&")
.replace("$", "\$")
.replace("+", "\+")
.replace("?", "\?"))
val = r"(^|\|){}($|\|)" .format(val)
lst.append(val)
app.reg_institute = reg_institute = r"|".join(lst)
return reg_institute
def learn_my_authors(db,
authors=None,
id_project=None,
......@@ -156,48 +182,23 @@ def learn_my_authors(db,
db.my_authors[row.id] = dict(authors=', '.join(database_authors))
def get_rex_institute(db, app):
"""Get the regular expression defining the affiliation of my institute.
It is obtained by concatenating the affiliation keys.
Affiliation key can contains character like ``(``, ``)`` or ``&``.
They are replaced by ``\(`` *etc*.
def order_oais(oais):
"""Order OAIS string as cds, inspirehep
Args:
db (pydal.DAL):
database connection
app (gluon.storage.Storage):
namespace defining the application
oais (str):
record identifier in stores
Returns:
str:
str
"""
# alias
reg_institute = app.reg_institute
# regular expression for the affiliation keys
# protect special character
# add start and end of string for an exact match
if not reg_institute:
lst = []
for row in db(db.affiliation_keys.id > 0).iterselect():
val = row.key_u
val = (val
.replace("(", "\(")
.replace(")", "\)")
.replace("&", "\&")
.replace("$", "\$")
.replace("+", "\+")
.replace("?", "\?"))
val = r"(^|\|){}($|\|)" .format(val)
if oais is None:
return ""
lst.append(val)
if oais.count(",") != 1 or REX_OAI_CDS.match(oais):
return oais
app.reg_institute = reg_institute = r"|".join(lst)
u, v = (el.strip() for el in oais.split(","))
return f"{v}, {u}"
return reg_institute
"""test_11_CheckAndFix_base
* Test CheckAndFix methods required by the Automaton base class.
- constructor
- is_bad_aoi
- temporary_record
- authors
- my_affiliation
- collaboration
"""
import pytest
import requests
from gluon import current
from harvest_tools.base import search_synonym
from harvest_tools.checkandfix import CheckAndFix, CheckException
from store_tools import load_record
@pytest.fixture(scope="module")
def reccds():
return load_record("cds.cern.ch", 1951625)
@pytest.fixture(scope="module")
def recins():
return load_record("inspirehep.net", 1319638, shelf="literature")
@pytest.fixture(scope="module")
def svc():
return CheckAndFix()
def test_constructor_11001(svc):
assert svc.reg_institute == \
r"(^|\|)Marseille, CPPM($|\|)|" \
r"(^|\|)CPPM, Marseille($|\|)|" \
r"(^|\|)Centre de Physique des Particules de Marseille \(CPPM\)($|\|)|" \
r"(^|\|)Aix Marseille Univ, CNRS/IN2P3, CPPM, Marseille, France($|\|)"
# ............................................................................
#
# cds.cern.ch record
#
def test_is_oai_cds_11010(svc, reccds):
assert svc.is_oai(reccds)
def test_is_bad_oai_cds_11011(svc, reccds):
assert not svc.is_bad_oai_used(reccds)
# v1.4.0 inhibit tempo rary record with the new inspirehep API (March 20)
#
# def test_temporary_record_cds_11012(svc, reccds):
#
# assert svc.temporary_record(reccds) is None
#
# # look for some temporarily record in inspirehep.net
# # using the MarcXML syntax
# # get a list of recids
# payload = {"p": "500__a:'*Temporary record*'", "of": "id", "rg": 10}
#
# r = requests.get("https://old.inspirehep.net/search", params=payload)
# li = r.json()
#
# # try with the oldest one to avoid issue with missing information, etc.
# with pytest.raises(CheckException):
# recins = load_record("inspirehep.net", li[-1], shelf="literature")
# svc.temporary_record(recins)
def test_authors_cds_11013(svc, reccds):
assert svc.authors(reccds) is None
def test__get_author_rescue_list_cds_11014(svc, reccds):
assert svc._get_author_rescue_list(reccds, 8, 7) == ["C. Adrover",
"S. Akar",
"E. Aslanides",
"J. Cogan",
"W. Kanso",
"R. Le Gac",
"O. Leroy",
"G. Mancinelli",
"E. Maurice",
"A. Morda",
"A. Mordà",
"M. Perrin-Terrin",
"M. Sapunov",
"J. Serrano",
"A. Tsaregorodtsev"]
def test_my_affiliation_cds_11015(svc, reccds):
assert svc.my_affiliation(reccds, 8, 7) == "Marseille, CPPM"
# a paper from NA62 -- no CPPM author
recna62 = load_record("cds.cern.ch", 1434415)
with pytest.raises(CheckException):
svc.my_affiliation(recna62, id_project=8, id_team=7)
def test_collaboration_cds_11016(svc, reccds):
assert svc.collaboration(reccds) is None
# ............................................................................
#
# inspirehep.net record (March 2020 onward)
#
def test_is_oai_ins_11020(svc, recins):
assert svc.is_oai(recins)
def test_is_bad_oai_ins_11021(svc, recins):
assert not svc.is_bad_oai_used(recins)
def test_authors_ins_11023(svc, recins):
assert svc.authors(recins) is None
def test__get_author_rescue_list_ins_11024(svc, recins):
assert svc._get_author_rescue_list(recins, 8, 7) == ["C. Adrover",
"S. Akar",
"E. Aslanides",
"J. Cogan",
"W. Kanso",
"R. Le Gac",
"O. Leroy",
"G. Mancinelli",
"E. Maurice",
"A. Morda",
"A. Mordà",
"M. Perrin-Terrin",
"M. Sapunov",
"J. Serrano",
"A. Tsaregorodtsev"]
def test_my_affiliation_ins_11025(svc, recins):
assert svc.my_affiliation(recins, 8, 7) == "Marseille, CPPM"
# a paper from NA62 -- no CPPM author
recna62 = load_record("cds.cern.ch", 1434415)
with pytest.raises(CheckException):
svc.my_affiliation(recna62, id_project=8, id_team=7)
def test_collaboration_ins_11026(svc, recins):
assert svc.collaboration(recins) is None
# ............................................................................
#
# others
#
def test_search_synonym_11030():
db = current.db
# collaboration ANTARES, TANAMI (defined as synonym in the db)
record = load_record("inspirehep.net", 1342250, shelf="literature")
colid = search_synonym(
db.collaborations,
"collaboration",
record.collaboration())
assert colid == 2
# collaboration = ANTARES (defined as synonym in the db))
record = load_record("inspirehep.net", 718872, shelf="literature")
colid = search_synonym(
db.collaborations,
"collaboration",
record.collaboration())
assert colid == 2
"""test_11_harvest_tools_base
"""
import pytest
from gluon import current
from harvest_tools import (family_name_fr,
get_rex_institute,
order_oais)
from store_tools import load_record, search_synonym
@pytest.fixture(scope="module")
def reccds():
return load_record("cds.cern.ch", 1951625)
@pytest.fixture(scope="module")
def recins():
return load_record("inspirehep.net", 1319638, shelf="literature")
def test_family_name_11001(reccds):
reccds.check_format_authors(fmt="F. Last")
authors = reccds.df_authors
for row in authors.itertuples():
assert row.last_name == family_name_fr(row.fmt_name)
def test_get_rex_institue_11002():
val = get_rex_institute(current.db, current.app)
assert val == \
r"(^|\|)Marseille, CPPM($|\|)|" \
r"(^|\|)CPPM, Marseille($|\|)|" \
r"(^|\|)Centre de Physique des Particules de Marseille \(CPPM\)($|\|)|" \
r"(^|\|)Aix Marseille Univ, CNRS/IN2P3, CPPM, Marseille, France($|\|)"
def test_order_oais_11003(reccds, recins):
assert order_oais(reccds.oai()) == \
"oai:cds.cern.ch:1951625, oai:inspirehep.net:1319638"
assert order_oais(recins.oai()) == \
"oai:cds.cern.ch:1951625, oai:inspirehep.net:1319638"
# ............................................................................
#
# others
#
def test_search_synonym_11030():
db = current.db
# collaboration ANTARES, TANAMI (defined as synonym in the db)
record = load_record("inspirehep.net", 1342250, shelf="literature")
colid = search_synonym(
db.collaborations,
"collaboration",
record.collaboration())
assert colid == 2
# collaboration = ANTARES (defined as synonym in the db))
record = load_record("inspirehep.net", 718872, shelf="literature")
colid = search_synonym(
db.collaborations,
"collaboration",
record.collaboration())
assert colid == 2
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment