Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
1885637c
Commit
1885637c
authored
Oct 07, 2015
by
LE GAC Renaud
Browse files
Move the function search_synoym in harvester.base and test it.
parent
9f00fca0
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
104 additions
and
91 deletions
+104
-91
docs/api/generated/harvest_tools.base.search_synonym.rst
docs/api/generated/harvest_tools.base.search_synonym.rst
+6
-0
docs/api/harvester.rst
docs/api/harvester.rst
+1
-1
modules/harvest_tools/__init__.py
modules/harvest_tools/__init__.py
+2
-1
modules/harvest_tools/automaton.py
modules/harvest_tools/automaton.py
+9
-60
modules/harvest_tools/base.py
modules/harvest_tools/base.py
+63
-0
tests/harvester/CheckAndFix/test_com_cds1559714_fix.py
tests/harvester/CheckAndFix/test_com_cds1559714_fix.py
+0
-28
tests/harvester/test_basic_functions.py
tests/harvester/test_basic_functions.py
+23
-1
No files found.
docs/api/generated/harvest_tools.base.search_synonym.rst
0 → 100644
View file @
1885637c
harvest_tools.base.search_synonym
=================================
.. currentmodule:: harvest_tools.base
.. autofunction:: search_synonym
\ No newline at end of file
docs/api/harvester.rst
View file @
1885637c
...
...
@@ -48,7 +48,7 @@ Helper functions
~base.family_name_fr
~base.format_author_fr
~base.learn_my_authors
~
automaton
.search_synonym
~
base
.search_synonym
Logger
^^^^^^
...
...
modules/harvest_tools/__init__.py
View file @
1885637c
...
...
@@ -6,9 +6,10 @@ and to push them in the database.
from
base
import
(
DRY_RUN
,
family_name_fr
,
format_author_fr
,
search_synonym
,
ToolException
)
from
automaton
import
Automaton
,
search_synonym
from
automaton
import
Automaton
from
articles
import
Articles
from
factory
import
build_harvester_tool
,
get_harvester_tool
from
msg
import
Msg
...
...
modules/harvest_tools/automaton.py
View file @
1885637c
...
...
@@ -6,7 +6,10 @@ import re
import
traceback
from
base
import
MSG_FIX_ORIGIN
,
MSG_IN_DB
,
ToolException
from
base
import
(
MSG_FIX_ORIGIN
,
MSG_IN_DB
,
search_synonym
,
ToolException
)
from
gluon.storage
import
Storage
from
invenio_tools
import
(
CheckAndFix
,
InvenioStore
,
...
...
@@ -14,74 +17,18 @@ from invenio_tools import (CheckAndFix,
OAI_URL
)
from
msg
import
Msg
from
msgcollection
import
MsgCollection
from
plugin_dbui
import
CALLBACK_ERRORS
,
get_id
,
UNDEF_ID
from
plugin_dbui
import
CALLBACK_ERRORS
,
get_id
MSG_NO_CAT
=
'Select a "category" !!!'
MSG_NO_PROJECT
=
'Select a "project" !!!'
MSG_NO_TEAM
=
'Select a "team" !!!'
MSG_TOOMANY_SYNONYM
=
"Reject too many %s synonyms."
MSG_NSERT_FAIL
=
"Fail to insert the new record in the database."
MSG_NO_OAI
=
"Reject no OAI identifier"
MSG_WELL_FORM_OAI
=
"Reject OAI is not well formed"
def
search_synonym
(
table
,
fieldname
,
value
,
create
=
False
):
"""Get the database identifier for the record having the database field
or the synonyms field matching the value.
Note:
The database table must have a field name *synonyms*.
It is a string containing values separated by a comma.
Args:
table (gluon.DAL.Table): database table.
fieldname (unicode): field of the database table
identified by its name.
value (unicode): value to be matched.
create(bool): create a new entry in the database table when
it is ``True``
Returns:
int:
* the id of the database record.
* UNDEF_ID if value is not defined.
Raises:
ToolException: when more than one synonym is found.
"""
if
not
value
:
return
UNDEF_ID
db
=
table
.
_db
kwargs
=
{}
kwargs
[
fieldname
]
=
value
id_rec
=
get_id
(
table
,
**
kwargs
)
if
id_rec
is
not
None
:
return
id_rec
# nothing found, have a look to the synonyms field
query
=
table
.
synonyms
.
contains
(
value
)
setrows
=
db
(
query
)
# no synonym found, create the entry
ncount
=
setrows
.
count
()
if
ncount
==
0
and
create
:
return
table
.
insert
(
**
kwargs
)
# one synonym found
elif
ncount
==
1
:
return
setrows
.
select
(
table
.
id
).
first
().
id
# more than one synonyms - don't know how to choose
else
:
msg
=
MSG_TOOMANY_SYNONYM
%
table
.
_tablename
raise
ToolException
(
msg
)
class
Automaton
(
object
):
"""Base class to search and process publications:
...
...
@@ -649,7 +596,8 @@ class Automaton(object):
* UNDEF_ID if value is not defined.
Raises:
ToolException: when more than one synonym is found.
ToolException: when more than one synonym is found ot when
the country is not defined.
"""
return
search_synonym
(
self
.
db
.
countries
,
"country"
,
value
)
...
...
@@ -666,7 +614,8 @@ class Automaton(object):
* UNDEF_ID if value is not defined.
Raises:
ToolException: when more than one synonym is found.
ToolException: when more than one synonym is found or when
the publisher is not defined.
"""
return
search_synonym
(
self
.
db
.
publishers
,
"abbreviation"
,
value
)
modules/harvest_tools/base.py
View file @
1885637c
...
...
@@ -3,6 +3,8 @@
"""
from
invenio_tools
import
REG_AUTHOR
from
plugin_dbui
import
get_id
,
UNDEF_ID
DRY_RUN
=
"dry run"
...
...
@@ -10,6 +12,8 @@ MSG_CRASH = "Crash: %s"
MSG_FIX_ORIGIN
=
"Fixed the origin field"
MSG_IN_DB
=
"Already in the database"
MSG_LOAD
=
"Load in the database"
MSG_NO_ENTRY
=
"Reject %s is not defined."
MSG_TOOMANY_SYNONYM
=
"Reject too many %s synonyms."
def
family_name_fr
(
full_name
):
...
...
@@ -146,5 +150,64 @@ def learn_my_authors(db,
db
.
my_authors
[
row
.
id
]
=
dict
(
authors
=
', '
.
join
(
database_authors
))
def
search_synonym
(
table
,
fieldname
,
value
,
create
=
False
):
"""Get the database identifier for the record having the database field
or the synonyms field matching the value.
Note:
The database table must have a field name *synonyms*.
It is a string containing values separated by a comma.
Args:
table (gluon.DAL.Table): database table.
fieldname (unicode): field of the database table
identified by its name.
value (unicode): value to be matched.
create(bool): create a new entry in the database table when
it is ``True``
Returns:
int:
* the id of the database record.
* UNDEF_ID if value is not defined.
Raises:
ToolException: when more than one synonym is found.
"""
if
not
value
:
return
UNDEF_ID
db
=
table
.
_db
kwargs
=
{}
kwargs
[
fieldname
]
=
value
id_rec
=
get_id
(
table
,
**
kwargs
)
if
id_rec
is
not
None
:
return
id_rec
# nothing found, have a look to the synonyms field
query
=
table
.
synonyms
.
contains
(
value
)
setrows
=
db
(
query
)
# no synonym found, create the entry
ncount
=
setrows
.
count
()
if
ncount
==
0
:
if
create
:
return
table
.
insert
(
**
kwargs
)
else
:
msg
=
MSG_NO_ENTRY
%
table
.
_tablename
raise
ToolException
(
msg
)
# one synonym found
elif
ncount
==
1
:
return
setrows
.
select
(
table
.
id
).
first
().
id
# more than one synonyms - don't know how to choose
else
:
msg
=
MSG_TOOMANY_SYNONYM
%
table
.
_tablename
raise
ToolException
(
msg
)
class
ToolException
(
Exception
):
pass
tests/harvester/CheckAndFix/test_com_cds1559714_fix.py
deleted
100644 → 0
View file @
9f00fca0
# -*- coding: utf-8 -*-
"""TALK
http://cds.cern.ch/record/1559714
Rare Decays of Heavy Mesons
26th International Symposium on Lepton Photon Interactions
at High Energies, San Francisco, CA, USA, 24 - 29 Jun 2013
No corrections are applied to the record.
Allow to test the brute force decoding with its mistakes.
Note:
* Country is not well defined (USA)
"""
import
pytest
from
invenio_tools
import
CheckAndFix
,
CheckException
,
load_record
def
test_country_exception
():
record
=
load_record
(
'cds.cern.ch'
,
1559714
)
svc
=
CheckAndFix
()
# no exception since the value is define in the synonyms.
assert
svc
.
country
(
record
)
is
None
tests/harvester/test_basic_functions.py
View file @
1885637c
...
...
@@ -2,10 +2,32 @@
"""test basic harvester functions
"""
from
harvest_tools
import
format_author_fr
import
pytest
from
gluon
import
current
from
harvest_tools
import
format_author_fr
,
search_synonym
,
ToolException
from
invenio_tools
import
load_record
def
test_format_author
():
assert
format_author_fr
(
"Aaij, Roel"
)
==
"R. Aaij"
assert
format_author_fr
(
"Le Gac, Renaud"
)
==
"R. Le Gac"
assert
format_author_fr
(
"Bettler, Marc-Olivier"
)
==
"M.-O. Bettler"
def
test_search_synonym
():
db
=
current
.
globalenv
[
'db'
]
# collaboration ANTARES, TANAMI (should not be defined as a synonym)
record
=
load_record
(
"inspirehep.net"
,
1342250
)
with
pytest
.
raises
(
ToolException
):
search_synonym
(
db
.
collaborations
,
"collaboration"
,
record
.
collaboration
())
# collaboration = ANTARES (defined as synonym in the db))
record
=
load_record
(
"inspirehep.net"
,
718872
)
colid
=
search_synonym
(
db
.
collaborations
,
"collaboration"
,
record
.
collaboration
())
assert
colid
==
2
# country = USA (defined as a synonym)
record
=
load_record
(
'cds.cern.ch'
,
1559714
)
country_id
=
search_synonym
(
db
.
countries
,
"country"
,
record
.
conference_country
())
assert
country_id
==
311
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment