Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
56512435
Commit
56512435
authored
Jun 27, 2017
by
LE GAC Renaud
Browse files
Migrate CheckAndFix: methods required by the harvester Article.
parent
61979a02
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
314 additions
and
328 deletions
+314
-328
languages/fr-fr.py
languages/fr-fr.py
+1
-1
modules/harvest_tools/checkandfix.py
modules/harvest_tools/checkandfix.py
+156
-297
modules/invenio_tools/recordpubli.py
modules/invenio_tools/recordpubli.py
+17
-11
tests/basis/test_04_RecordPubli.py
tests/basis/test_04_RecordPubli.py
+7
-0
tests/basis/test_05_RecordConf.py
tests/basis/test_05_RecordConf.py
+0
-19
tests/basis/test_10_CheckAndFix_article.py
tests/basis/test_10_CheckAndFix_article.py
+133
-0
No files found.
languages/fr-fr.py
View file @
56512435
...
...
@@ -538,7 +538,7 @@
'Reject conference dates is not well formed'
:
'Rejecté les dates de la conférence dates sont mal formatté'
,
'Reject countries is not defined'
:
"Rejeté le pays n'est pas définie"
,
'Reject editor is not well formed'
:
"Rejeté l'éditeur est mal formatté"
,
'Reject incomplete paper reference'
:
'Rejeté la référence du papier est incomplète'
,
'Reject incomplete paper reference
. Check
'
:
'Rejeté la référence du papier est incomplète
. Vérifiez
'
,
'Reject invalid country'
:
'Rejeté pays inconnu'
,
'Reject invalid publisher'
:
"Rejeté l'abbréviatioin de la revue n'est pas valide"
,
'Reject no %s authors'
:
"Rejeté pas d'autheur(s) du %s"
,
...
...
modules/harvest_tools/checkandfix.py
View file @
56512435
This diff is collapsed.
Click to expand it.
modules/invenio_tools/recordpubli.py
View file @
56512435
""" invenio_tools.recordpubli
"""
import
numpy
as
np
import
re
...
...
@@ -527,22 +528,27 @@ class RecordPubli(Record):
return
True
def
is_published
(
self
):
"""``True`` is the record is published and contains a
complet
set
of publication inf
r
omation (title, volume, year and pagination).
"""``True`` is the record is published and contains a
full
set
of publication info
r
mation (title, volume, year and pagination).
Returns:
bool:
"""
df
=
self
[
u
"publication_info"
]
query
=
\
(
df
.
title
.
str
.
len
()
>
0
)
\
&
(
df
.
volume
.
str
.
len
()
>
0
)
\
&
(
df
.
year
.
str
.
len
()
>
0
)
\
&
(
df
.
pagination
.
str
.
len
()
>
0
)
# NOTE
# * df.columns are title, volume, year and pagination
# * df can contains one or more rows due to erratum.
# * assume that the first row is the oldest one and corresponds tp
# the first publication
# * the row contains empty string when the record is not published.
# * iloc[0] returns a serie where the index are the column's name
#
columns
=
(
self
[
u
"publication_info"
].
iloc
[
0
]
.
replace
(
""
,
np
.
nan
)
.
dropna
()
.
index
)
return
len
(
df
[
query
])
>
0
return
len
(
columns
.
intersection
(
PAPER_REFERENCE_KEYS
))
==
4
def
is_with_erratum
(
self
):
"""``True`` when the record contains erratum data.
...
...
@@ -774,7 +780,7 @@ class RecordPubli(Record):
Returns:
unicode:
* format are"YYYY-MM", "YYYY-MM-DD", "DD MMM YYYY", *etc.*
* Empty sring when not defined.
* Empty s
t
ring when not defined.
"""
return
self
.
_get
(
u
"prepublication"
,
u
"date"
)
...
...
tests/basis/test_04_RecordPubli.py
View file @
56512435
...
...
@@ -160,6 +160,13 @@ def test_collaboration(reccds):
def
test_is_published
(
reccds
):
assert
reccds
.
is_published
()
# Paper is published but there are error in the paper reference
# Correct reference is Eur. Phys. J. C 75 (2015) 158
# But volume is not defined and pagination is wrong (75)
record
=
load_record
(
"cds.cern.ch"
,
1753190
)
assert
not
record
.
is_published
()
def
test_is_with_erratum
(
reccds
):
assert
not
reccds
.
is_with_erratum
()
...
...
tests/basis/test_05_RecordConf.py
View file @
56512435
...
...
@@ -80,22 +80,3 @@ def test_conference_url(reccds, recins):
def
test_conference_year
(
reccds
,
recins
):
assert
reccds
.
conference_year
()
==
"2010"
assert
recins
.
conference_year
()
==
"2010"
# def test_reference_conference_id(reccds):
# from pprint import pprint
# pprint(reccds["meeting_name"])
# pprint(recins["meeting_name"])
# assert reccds.reference_conference_id() == "1181092"
#
#
# def test_reference_conference_key(reccds):
# assert reccds.reference_conference_key() == "rome20101206"
#
#
# def test_reference_conference_proceeding(reccds):
# assert reccds.reference_conference_proceeding() == "1313736"
#
#
# def test_reference_conference_talk(reccds):
# assert reccds.reference_conference_talk() == "1313736"
tests/basis/test_10_CheckAndFix_article.py
0 → 100644
View file @
56512435
# -*- coding: utf-8 -*-
"""test_10_CheckAndFix_article
* Test CheckAndFix methods for article:
- format_editor
- publisher
- paper_reference
- submitted
- year
- format_author
- get_my_authors
* Same article in cds.cern.ch and inspirehep.net
Phys. Rev. D 95 (2017) 052005
"""
import
pytest
from
harvest_tools.checkandfix
import
CheckAndFix
from
harvest_tools.exception
import
ToolException
from
invenio_tools
import
load_record
@
pytest
.
fixture
(
scope
=
"module"
)
def
reccds
():
return
load_record
(
"cds.cern.ch"
,
2242641
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
recins
():
return
load_record
(
"inspirehep.net"
,
1509922
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
svc
():
return
CheckAndFix
()
def
test_format_editor
(
svc
,
reccds
,
recins
):
# cds
assert
reccds
.
paper_editor
()
==
"Phys. Rev. D"
assert
reccds
.
paper_volume
()
==
"95"
svc
.
format_editor
(
reccds
)
assert
reccds
.
paper_editor
()
==
"Phys. Rev. D"
assert
reccds
.
paper_volume
()
==
"95"
# inspire
assert
recins
.
paper_editor
()
==
"Phys.Rev."
assert
recins
.
paper_volume
()
==
"D95"
svc
.
format_editor
(
recins
)
assert
recins
.
paper_editor
()
==
"Phys. Rev. D"
assert
recins
.
paper_volume
()
==
"95"
def
test_publisher
(
svc
,
reccds
):
assert
svc
.
publisher
(
reccds
)
is
None
def
test_paper_reference
(
svc
,
reccds
):
# check recovery procedure using DOI
# remove the publisher and volume information
paper_ref
=
reccds
.
paper_reference
()
reccds
[
"publication_info"
].
loc
[
0
,
[
"title"
,
"volume"
]]
=
[
""
,
""
]
svc
.
paper_reference
(
reccds
)
assert
reccds
.
paper_reference
()
==
paper_ref
# Paper is published but there are error in the paper reference
# Correct reference is Eur. Phys. J. C 75 (2015) 158
# But volume is not defined and pagination is wrong (75)
# It is not possible to recover it from the doi data.
record
=
load_record
(
"cds.cern.ch"
,
1753190
)
with
pytest
.
raises
(
ToolException
):
svc
.
paper_reference
(
record
)
def
test_submitted
(
svc
,
reccds
,
recins
):
assert
reccds
.
submitted
()
==
"19 Jan 2017"
assert
recins
.
submitted
()
==
"2017-01-19"
svc
.
submitted
(
reccds
)
reccds
.
submitted
()
assert
reccds
.
submitted
()
==
"2017-01-19"
# test the case 19 01 2017
reccds
[
u
"prepublication"
][
u
"date"
]
=
"19 01 2017"
svc
.
submitted
(
reccds
)
assert
reccds
.
submitted
()
==
"2017-01-19"
# test the case 2017
reccds
[
u
"prepublication"
][
u
"date"
]
=
"2017"
svc
.
submitted
(
reccds
)
assert
reccds
.
submitted
()
==
"2017-01"
def
test_format_authors
(
svc
,
reccds
):
authors
=
reccds
.
authors_as_list
()
assert
len
(
authors
)
==
reccds
[
"number_of_authors"
]
assert
authors
[
0
]
==
u
"Aaij, Roel"
assert
authors
[
1
]
==
u
"Adeva, Bernardo"
assert
authors
[
344
]
==
u
"Koopman, Rose"
assert
authors
[
-
1
]
==
u
"Zucchelli, Stefano"
svc
.
format_authors
(
reccds
,
fmt
=
"F. Last"
)
authors
=
reccds
.
authors_as_list
()
assert
authors
[
0
]
==
u
"R. Aaij"
assert
authors
[
1
]
==
u
"B. Adeva"
assert
authors
[
344
]
==
u
"R. Koopman"
assert
authors
[
-
1
]
==
u
"S. Zucchelli"
def
test_get_my_authors
(
svc
,
reccds
):
svc
.
format_authors
(
reccds
,
fmt
=
"F. Last"
)
assert
svc
.
get_my_authors
(
reccds
,
sep
=
u
"|"
,
sort
=
True
)
is
None
my_authors
=
reccds
.
my_authors
.
encode
(
"utf-8"
)
assert
my_authors
==
"J. Arnau Romeu|E. Aslanides|J. Cogan|"
\
"K. De Bruyn|R. Le Gac|O. Leroy|"
\
"G. Mancinelli|M. Martin|A. Mordà|"
\
"J. Serrano|A. Tayduganov|A. Tsaregorodtsev"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment