Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
4dc48966
Commit
4dc48966
authored
Jun 29, 2017
by
LE GAC Renaud
Browse files
Migrate Notes.
parent
5a665e1a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
77 additions
and
10 deletions
+77
-10
modules/harvest_tools/notes.py
modules/harvest_tools/notes.py
+9
-6
modules/invenio_tools/recordpubli.py
modules/invenio_tools/recordpubli.py
+6
-4
tests/scan/test_Note.py
tests/scan/test_Note.py
+62
-0
No files found.
modules/harvest_tools/notes.py
View file @
4dc48966
...
...
@@ -18,10 +18,12 @@ class Notes(Automaton):
"""Check the content of the note in order to fix non conformities.
Args:
record (RecordPubli): record describing a note
record (RecordPubli):
record describing a note
Returns:
bool: ``False`` when a non conformity is found and can not be
bool:
``False`` when a non conformity is found and can not be
corrected.
"""
...
...
@@ -33,7 +35,6 @@ class Notes(Automaton):
try
:
self
.
check
.
submitted
(
record
)
self
.
check
.
year
(
record
)
self
.
check
.
format_authors
(
record
,
fmt
=
"F. Last"
)
self
.
check
.
get_my_authors
(
record
,
sort
=
True
)
...
...
@@ -53,10 +54,12 @@ class Notes(Automaton):
"""Insert a public note in the database.
Args:
record (RecordPubli): record describing a note.
record (RecordPubli):
record describing a note.
Returns:
int: one when the record is inserted / updated in the database
int:
one when the record is inserted / updated in the database
zero otherwise.
"""
...
...
@@ -64,7 +67,7 @@ class Notes(Automaton):
first_author
=
record
.
first_author
()
oai_url
=
record
.
oai_url
()
title
=
record
.
title
()
year
=
record
.
year
()
year
=
record
.
submitted
()[
0
:
4
]
# get existing notes
fields
=
dict
(
first_author
=
first_author
,
...
...
modules/invenio_tools/recordpubli.py
View file @
4dc48966
...
...
@@ -244,14 +244,16 @@ class RecordPubli(Record):
data
=
(
data
if
isinstance
(
data
,
list
)
else
[
data
])
df
=
DataFrame
(
data
)
columns
=
df
.
columns
# protection -- list of year, e.g. [2014, 2014] (cds 1951625)
df
[
"year"
]
=
\
df
.
year
.
apply
(
lambda
x
:
(
", "
.
join
(
set
(
x
))
if
isinstance
(
x
,
list
)
else
x
))
if
"year"
in
columns
:
df
[
"year"
]
=
\
df
.
year
.
apply
(
lambda
x
:
(
", "
.
join
(
set
(
x
))
if
isinstance
(
x
,
list
)
else
x
))
# erratum -- sort by year and volume
columns
=
df
.
columns
if
set
([
"year"
,
"volume"
]).
issubset
(
columns
):
df
=
df
.
sort_values
([
"year"
,
"volume"
])
...
...
tests/scan/test_Note.py
0 → 100644
View file @
4dc48966
# -*- coding: utf-8 -*-
"""test_Note
* Harvester is Preprints
* Store is cds.cern.ch
* LHCb AP for the current year
* Check that all error messages are expected
"""
import
pytest
from
gluon
import
current
from
harvest_tools.notes
import
Notes
from
harvest_tools.factory
import
build_harvester_tool
from
test_tools
import
messages
@
pytest
.
fixture
(
scope
=
"module"
)
def
harvester_messages
():
return
messages
()
def
test_lhcb_ap2
(
harvester_messages
):
"""Harvest LHCb note for a given year.
This test is useful to:
* debug an harvester
* profile its performance to see where the time is spent.
* compare different implementation to measure improvements.
* ...
"""
# These parameter only make sense if you are inserting record in database
# Select the current year in order to test different case
db
=
current
.
db
id_team
=
7
# LHCb
id_project
=
8
# LHCb
id_category
=
14
# AP
year
=
current
.
request
.
now
.
year
# build the harvester
tool
=
build_harvester_tool
(
db
,
id_team
,
id_project
,
"notes"
,
id_category
,
year_start
=
str
(
year
),
year_end
=
""
,
dry_run
=
True
,
debug
=
True
)
assert
isinstance
(
tool
,
Notes
)
# run the harvester
tool
.
process_url
(
"cds.cern.ch"
,
"LHCb Notes, LHCb Conference Contributions"
)
# analyse the log
# Number of talk cannot be check since it evolve within a year
# Only test that there are no unexpected messages
msgs
=
set
([
el
.
txt
for
el
in
tool
.
logs
])
assert
msgs
.
issubset
(
harvester_messages
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment