Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
f7cdbbb0
Commit
f7cdbbb0
authored
Jan 20, 2021
by
LE GAC Renaud
Browse files
Add test_11_harvest_tools_base.py
parent
7d5c1287
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
135 additions
and
242 deletions
+135
-242
modules/harvest_tools/__init__.py
modules/harvest_tools/__init__.py
+2
-1
modules/harvest_tools/base.py
modules/harvest_tools/base.py
+59
-58
tests/basis/test_11_CheckAndFix_base.py.off
tests/basis/test_11_CheckAndFix_base.py.off
+0
-183
tests/basis/test_11_harvest_tools_base.py
tests/basis/test_11_harvest_tools_base.py
+74
-0
No files found.
modules/harvest_tools/__init__.py
View file @
f7cdbbb0
...
...
@@ -9,8 +9,9 @@ from .base import (DRY_RUN,
MSG_IN_DB
,
MSG_LOAD
,
family_name_fr
,
get_rex_institute
,
learn_my_authors
,
get_rex_institute
)
order_oais
)
from
.automaton
import
Automaton
from
.articles
import
Articles
...
...
modules/harvest_tools/base.py
View file @
f7cdbbb0
...
...
@@ -30,28 +30,7 @@ def family_name_fr(full_name):
family name
"""
return
full_name
[
full_name
.
find
(
' '
)
+
1
:]
def
order_oais
(
oais
):
"""Order OAIS string as cds, inspirehep
Args:
oais (str):
record identifier in stores
Returns:
str
"""
if
oais
is
None
:
return
""
if
oais
.
count
(
","
)
!=
1
or
REX_OAI_CDS
.
match
(
oais
):
return
oais
u
,
v
=
(
el
.
strip
()
for
el
in
oais
.
split
(
","
))
return
f
"
{
v
}
,
{
u
}
"
return
full_name
[
full_name
.
rfind
(
'. '
)
+
2
:]
def
filter_logs
(
logs
):
...
...
@@ -87,6 +66,53 @@ def filter_logs(logs):
return
[
logs
[
tpl
[
0
]]
for
tpl
in
fltr
.
items
()
if
tpl
[
1
]
is
False
]
def
get_rex_institute
(
db
,
app
):
"""Get the regular expression defining the affiliation of my institute.
It is obtained by concatenating the affiliation keys.
Affiliation key can contains character like ``(``, ``)`` or ``&``.
They are replaced by ``\(`` *etc*.
Args:
db (pydal.DAL):
database connection
app (gluon.storage.Storage):
namespace defining the application
Returns:
str:
"""
# alias
reg_institute
=
app
.
reg_institute
# regular expression for the affiliation keys
# protect special character
# add start and end of string for an exact match
if
not
reg_institute
:
lst
=
[]
for
row
in
db
(
db
.
affiliation_keys
.
id
>
0
).
iterselect
():
val
=
row
.
key_u
val
=
(
val
.
replace
(
"("
,
"\("
)
.
replace
(
")"
,
"\)"
)
.
replace
(
"&"
,
"\&"
)
.
replace
(
"$"
,
"\$"
)
.
replace
(
"+"
,
"\+"
)
.
replace
(
"?"
,
"\?"
))
val
=
r
"(^|\|){}($|\|)"
.
format
(
val
)
lst
.
append
(
val
)
app
.
reg_institute
=
reg_institute
=
r
"|"
.
join
(
lst
)
return
reg_institute
def
learn_my_authors
(
db
,
authors
=
None
,
id_project
=
None
,
...
...
@@ -156,48 +182,23 @@ def learn_my_authors(db,
db
.
my_authors
[
row
.
id
]
=
dict
(
authors
=
', '
.
join
(
database_authors
))
def
get_rex_institute
(
db
,
app
):
"""Get the regular expression defining the affiliation of my institute.
It is obtained by concatenating the affiliation keys.
Affiliation key can contains character like ``(``, ``)`` or ``&``.
They are replaced by ``\(`` *etc*.
def
order_oais
(
oais
):
"""Order OAIS string as cds, inspirehep
Args:
db (pydal.DAL):
database connection
app (gluon.storage.Storage):
namespace defining the application
oais (str):
record identifier in stores
Returns:
str
:
str
"""
# alias
reg_institute
=
app
.
reg_institute
# regular expression for the affiliation keys
# protect special character
# add start and end of string for an exact match
if
not
reg_institute
:
lst
=
[]
for
row
in
db
(
db
.
affiliation_keys
.
id
>
0
).
iterselect
():
val
=
row
.
key_u
val
=
(
val
.
replace
(
"("
,
"\("
)
.
replace
(
")"
,
"\)"
)
.
replace
(
"&"
,
"\&"
)
.
replace
(
"$"
,
"\$"
)
.
replace
(
"+"
,
"\+"
)
.
replace
(
"?"
,
"\?"
))
val
=
r
"(^|\|){}($|\|)"
.
format
(
val
)
if
oais
is
None
:
return
""
lst
.
append
(
val
)
if
oais
.
count
(
","
)
!=
1
or
REX_OAI_CDS
.
match
(
oais
):
return
oais
app
.
reg_institute
=
reg_institute
=
r
"|"
.
join
(
lst
)
u
,
v
=
(
el
.
strip
()
for
el
in
oais
.
split
(
","
))
return
f
"
{
v
}
,
{
u
}
"
return
reg_institute
tests/basis/test_11_CheckAndFix_base.py.off
deleted
100644 → 0
View file @
7d5c1287
"""test_11_CheckAndFix_base
* Test CheckAndFix methods required by the Automaton base class.
- constructor
- is_bad_aoi
- temporary_record
- authors
- my_affiliation
- collaboration
"""
import pytest
import requests
from gluon import current
from harvest_tools.base import search_synonym
from harvest_tools.checkandfix import CheckAndFix, CheckException
from store_tools import load_record
@pytest.fixture(scope="module")
def reccds():
return load_record("cds.cern.ch", 1951625)
@pytest.fixture(scope="module")
def recins():
return load_record("inspirehep.net", 1319638, shelf="literature")
@pytest.fixture(scope="module")
def svc():
return CheckAndFix()
def test_constructor_11001(svc):
assert svc.reg_institute == \
r"(^|\|)Marseille, CPPM($|\|)|" \
r"(^|\|)CPPM, Marseille($|\|)|" \
r"(^|\|)Centre de Physique des Particules de Marseille \(CPPM\)($|\|)|" \
r"(^|\|)Aix Marseille Univ, CNRS/IN2P3, CPPM, Marseille, France($|\|)"
# ............................................................................
#
# cds.cern.ch record
#
def test_is_oai_cds_11010(svc, reccds):
assert svc.is_oai(reccds)
def test_is_bad_oai_cds_11011(svc, reccds):
assert not svc.is_bad_oai_used(reccds)
# v1.4.0 inhibit tempo rary record with the new inspirehep API (March 20)
#
# def test_temporary_record_cds_11012(svc, reccds):
#
# assert svc.temporary_record(reccds) is None
#
# # look for some temporarily record in inspirehep.net
# # using the MarcXML syntax
# # get a list of recids
# payload = {"p": "500__a:'*Temporary record*'", "of": "id", "rg": 10}
#
# r = requests.get("https://old.inspirehep.net/search", params=payload)
# li = r.json()
#
# # try with the oldest one to avoid issue with missing information, etc.
# with pytest.raises(CheckException):
# recins = load_record("inspirehep.net", li[-1], shelf="literature")
# svc.temporary_record(recins)
def test_authors_cds_11013(svc, reccds):
assert svc.authors(reccds) is None
def test__get_author_rescue_list_cds_11014(svc, reccds):
assert svc._get_author_rescue_list(reccds, 8, 7) == ["C. Adrover",
"S. Akar",
"E. Aslanides",
"J. Cogan",
"W. Kanso",
"R. Le Gac",
"O. Leroy",
"G. Mancinelli",
"E. Maurice",
"A. Morda",
"A. Mordà",
"M. Perrin-Terrin",
"M. Sapunov",
"J. Serrano",
"A. Tsaregorodtsev"]
def test_my_affiliation_cds_11015(svc, reccds):
assert svc.my_affiliation(reccds, 8, 7) == "Marseille, CPPM"
# a paper from NA62 -- no CPPM author
recna62 = load_record("cds.cern.ch", 1434415)
with pytest.raises(CheckException):
svc.my_affiliation(recna62, id_project=8, id_team=7)
def test_collaboration_cds_11016(svc, reccds):
assert svc.collaboration(reccds) is None
# ............................................................................
#
# inspirehep.net record (March 2020 onward)
#
def test_is_oai_ins_11020(svc, recins):
assert svc.is_oai(recins)
def test_is_bad_oai_ins_11021(svc, recins):
assert not svc.is_bad_oai_used(recins)
def test_authors_ins_11023(svc, recins):
assert svc.authors(recins) is None
def test__get_author_rescue_list_ins_11024(svc, recins):
assert svc._get_author_rescue_list(recins, 8, 7) == ["C. Adrover",
"S. Akar",
"E. Aslanides",
"J. Cogan",
"W. Kanso",
"R. Le Gac",
"O. Leroy",
"G. Mancinelli",
"E. Maurice",
"A. Morda",
"A. Mordà",
"M. Perrin-Terrin",
"M. Sapunov",
"J. Serrano",
"A. Tsaregorodtsev"]
def test_my_affiliation_ins_11025(svc, recins):
assert svc.my_affiliation(recins, 8, 7) == "Marseille, CPPM"
# a paper from NA62 -- no CPPM author
recna62 = load_record("cds.cern.ch", 1434415)
with pytest.raises(CheckException):
svc.my_affiliation(recna62, id_project=8, id_team=7)
def test_collaboration_ins_11026(svc, recins):
assert svc.collaboration(recins) is None
# ............................................................................
#
# others
#
def test_search_synonym_11030():
db = current.db
# collaboration ANTARES, TANAMI (defined as synonym in the db)
record = load_record("inspirehep.net", 1342250, shelf="literature")
colid = search_synonym(
db.collaborations,
"collaboration",
record.collaboration())
assert colid == 2
# collaboration = ANTARES (defined as synonym in the db))
record = load_record("inspirehep.net", 718872, shelf="literature")
colid = search_synonym(
db.collaborations,
"collaboration",
record.collaboration())
assert colid == 2
tests/basis/test_11_harvest_tools_base.py
0 → 100644
View file @
f7cdbbb0
"""test_11_harvest_tools_base
"""
import
pytest
from
gluon
import
current
from
harvest_tools
import
(
family_name_fr
,
get_rex_institute
,
order_oais
)
from
store_tools
import
load_record
,
search_synonym
@
pytest
.
fixture
(
scope
=
"module"
)
def
reccds
():
return
load_record
(
"cds.cern.ch"
,
1951625
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
recins
():
return
load_record
(
"inspirehep.net"
,
1319638
,
shelf
=
"literature"
)
def
test_family_name_11001
(
reccds
):
reccds
.
check_format_authors
(
fmt
=
"F. Last"
)
authors
=
reccds
.
df_authors
for
row
in
authors
.
itertuples
():
assert
row
.
last_name
==
family_name_fr
(
row
.
fmt_name
)
def
test_get_rex_institue_11002
():
val
=
get_rex_institute
(
current
.
db
,
current
.
app
)
assert
val
==
\
r
"(^|\|)Marseille, CPPM($|\|)|"
\
r
"(^|\|)CPPM, Marseille($|\|)|"
\
r
"(^|\|)Centre de Physique des Particules de Marseille \(CPPM\)($|\|)|"
\
r
"(^|\|)Aix Marseille Univ, CNRS/IN2P3, CPPM, Marseille, France($|\|)"
def
test_order_oais_11003
(
reccds
,
recins
):
assert
order_oais
(
reccds
.
oai
())
==
\
"oai:cds.cern.ch:1951625, oai:inspirehep.net:1319638"
assert
order_oais
(
recins
.
oai
())
==
\
"oai:cds.cern.ch:1951625, oai:inspirehep.net:1319638"
# ............................................................................
#
# others
#
def
test_search_synonym_11030
():
db
=
current
.
db
# collaboration ANTARES, TANAMI (defined as synonym in the db)
record
=
load_record
(
"inspirehep.net"
,
1342250
,
shelf
=
"literature"
)
colid
=
search_synonym
(
db
.
collaborations
,
"collaboration"
,
record
.
collaboration
())
assert
colid
==
2
# collaboration = ANTARES (defined as synonym in the db))
record
=
load_record
(
"inspirehep.net"
,
718872
,
shelf
=
"literature"
)
colid
=
search_synonym
(
db
.
collaborations
,
"collaboration"
,
record
.
collaboration
())
assert
colid
==
2
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment