Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
a26e2255
Commit
a26e2255
authored
Oct 08, 2015
by
LE GAC Renaud
Browse files
Modify the signature of the method Automation.get_record_by_fields.
parent
b47d5d10
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
64 additions
and
60 deletions
+64
-60
modules/harvest_tools/articles.py
modules/harvest_tools/articles.py
+32
-23
modules/harvest_tools/automaton.py
modules/harvest_tools/automaton.py
+20
-13
modules/harvest_tools/notes.py
modules/harvest_tools/notes.py
+2
-4
modules/harvest_tools/preprints.py
modules/harvest_tools/preprints.py
+2
-4
modules/harvest_tools/proceedings.py
modules/harvest_tools/proceedings.py
+2
-4
modules/harvest_tools/reports.py
modules/harvest_tools/reports.py
+2
-4
modules/harvest_tools/talks.py
modules/harvest_tools/talks.py
+2
-4
modules/harvest_tools/thesis.py
modules/harvest_tools/thesis.py
+2
-4
No files found.
modules/harvest_tools/articles.py
View file @
a26e2255
...
...
@@ -91,7 +91,7 @@ class Articles(Automaton):
The other arguments are used to transform the corresponding preprint
into an article.
Args:
Keyword
Args:
oai_url (unicode): the OAI identifier of the article.
id_publisher (int): identifier of the publisher in the database.
my_authors (unicode): authors of my institute separated by a comma.
...
...
@@ -138,33 +138,39 @@ class Articles(Automaton):
return
(
rec_id
,
1
)
def
get_record_by_fields
(
self
,
oai_url
,
year
,
id_publisher
=
None
,
my_authors
=
None
,
oai_url
=
None
,
pages
=
None
,
publication_url
=
None
,
preprint_number
=
None
,
title
=
None
,
volume
=
None
,
year
=
None
):
"""Get the record matching the input fields
volume
=
None
):
"""Get article matching fields values defined
in the keyword arguments.
Note:
Fix the field origin when a match is found.
Note:
Transform a preprint into article.
This method is required deal with an article entered by hand and
found later by the harvester.
Args:
oai_url (unicode): the oai_url, *e.g*
``http://cds.cern.ch/record/123456``. The origin field
of the existing database record is update to **oai_url**
when a match is found.
year (unicode): the year of the publication. It is used
by the search algorithm and by the logger.
Keyword Args:
id_publisher (int): identifier of the publisher in the database.
my_authors (unicode): authors of my institute separated by a comma.
oai_url (unicode): the URL defining the OAI.
pages (unicode): the page reference.
publication_url (unicode): the URL of the publications
preprint_number (unicode): the preprint number
title (unicode): the title of the publication.
volume (unicode): the volume reference.
year (unicode): the year of publication.
Returns:
tuple: ``(id, status)`` which contains the ``id`` of the record.
...
...
@@ -176,35 +182,40 @@ class Articles(Automaton):
if
self
.
dbg
:
print
"get existing article by fields"
# alias
db
=
self
.
db
id_project
=
self
.
id_project
id_team
=
self
.
id_team
logs
=
self
.
logs
# check against published articles
rec_id
=
get_id
(
db
.
publications
,
id_projects
=
self
.
id_project
,
id_projects
=
id_project
,
id_publishers
=
id_publisher
,
id_teams
=
self
.
id_team
,
id_teams
=
id_team
,
pages
=
pages
,
volume
=
volume
,
year
=
year
)
# fix origin field
if
rec_id
and
not
db
.
publications
[
rec_id
].
origin
:
publication
=
db
.
publications
[
rec_id
]
if
rec_id
and
not
publication
.
origin
:
if
not
self
.
dry_run
:
db
.
publication
s
[
rec_id
]
=
dict
(
origin
=
oai_url
)
publication
=
dict
(
origin
=
oai_url
)
self
.
logs
[
-
1
].
modify
(
MSG_FIX_ORIGIN
,
year
)
logs
[
-
1
].
modify
(
MSG_FIX_ORIGIN
,
year
)
return
(
rec_id
,
1
)
if
rec_id
:
self
.
logs
[
-
1
].
idle
(
MSG_IN_DB
,
year
)
logs
[
-
1
].
idle
(
MSG_IN_DB
,
year
)
return
(
rec_id
,
0
)
# check against published preprint
# a preprint can be identified by its category which is PRE (15)
rec_id
=
get_id
(
db
.
publications
,
id_categories
=
self
.
id_preprint
,
id_projects
=
self
.
id_project
,
id_teams
=
self
.
id_team
,
id_projects
=
id_project
,
id_teams
=
id_team
,
preprint
=
preprint_number
)
if
not
rec_id
:
...
...
@@ -284,15 +295,13 @@ class Articles(Automaton):
fields
=
dict
(
id_publisher
=
id_publisher
,
my_authors
=
my_authors
,
oai_url
=
oai_url
,
pages
=
pages
,
publication_url
=
publication_url
,
preprint_number
=
preprint_number
,
title
=
title
,
volume
=
volume
,
year
=
year
)
volume
=
volume
)
rec_id
,
status
=
self
.
get_record_by_fields
(
**
fields
)
rec_id
,
status
=
self
.
get_record_by_fields
(
oai_url
,
year
,
**
fields
)
if
rec_id
:
return
status
...
...
modules/harvest_tools/automaton.py
View file @
a26e2255
...
...
@@ -343,19 +343,26 @@ class Automaton(object):
return
True
def
get_record_by_fields
(
self
,
**
kwargs
):
def
get_record_by_fields
(
self
,
oai_url
,
year
,
**
kwargs
):
"""Get database record matching fields values defined
in the keyword arguments.
Keyword Args:
oai_url (unicode): *e.g* ``"http://cds.cern.ch/record/123456"``
year (int): the year of the publication.
Note:
Fix the field origin when a match is found.
This method is required to deal with publication entered by hand
and found later by an harvester.
Note:
The year is only used by the logger.
Args:
oai_url (unicode): the oai_url, *e.g*
``http://cds.cern.ch/record/123456``. The origin field
of the existing database record is update to **oai_url**
when a match is found.
year (int): the year of the publication. It is used
by the search algorithm and by the logger.
Keyword Args:
kwargs (unicode): a series of key, value pair where the
key is the name of a publications database field.
Returns:
tuple: ``(id, status)`` which contains the ``id`` of the record.
...
...
@@ -371,9 +378,9 @@ class Automaton(object):
db
=
self
.
db
logs
=
self
.
logs
#
origin can't be used fo
r t
he
search
oai_url
=
kwargs
[
"oai_url"
]
del
kwargs
[
"
oai_url"
]
#
add the publication yea
r t
o
search
criteria
if
year
:
kwargs
[
"
year"
]
=
year
# look for an existing record
rec_id
=
get_id
(
db
.
publications
,
**
kwargs
)
...
...
@@ -387,10 +394,10 @@ class Automaton(object):
if
not
self
.
dry_run
:
publication
=
dict
(
origin
=
oai_url
)
logs
[
-
1
].
modify
(
MSG_FIX_ORIGIN
,
kwargs
[
"
year
"
]
)
logs
[
-
1
].
modify
(
MSG_FIX_ORIGIN
,
year
)
return
(
rec_id
,
1
)
logs
[
-
1
].
idle
(
MSG_IN_DB
,
kwargs
[
"
year
"
]
)
logs
[
-
1
].
idle
(
MSG_IN_DB
,
year
)
return
(
rec_id
,
0
)
def
insert_record
(
self
,
record
):
...
...
modules/harvest_tools/notes.py
View file @
a26e2255
...
...
@@ -72,11 +72,9 @@ class Notes(Automaton):
id_categories
=
self
.
id_category
,
id_projects
=
self
.
id_project
,
id_teams
=
self
.
id_team
,
oai_url
=
oai_url
,
title
=
title
,
year
=
year
)
title
=
title
)
rec_id
,
status
=
self
.
get_record_by_fields
(
**
fields
)
rec_id
,
status
=
self
.
get_record_by_fields
(
oai_url
,
year
,
**
fields
)
if
rec_id
:
return
status
...
...
modules/harvest_tools/preprints.py
View file @
a26e2255
...
...
@@ -98,13 +98,11 @@ class Preprints(Automaton):
fields
=
dict
(
first_author
=
first_author
,
id_projects
=
self
.
id_project
,
id_teams
=
self
.
id_team
,
oai_url
=
oai_url
,
preprint
=
preprint
,
submitted
=
submitted
,
title
=
title
,
year
=
year
)
title
=
title
)
rec_id
,
status
=
self
.
get_record_by_fields
(
**
fields
)
rec_id
,
status
=
self
.
get_record_by_fields
(
oai_url
,
year
,
**
fields
)
if
rec_id
:
return
status
...
...
modules/harvest_tools/proceedings.py
View file @
a26e2255
...
...
@@ -104,17 +104,15 @@ class Proceedings(Automaton):
conference_title
=
conference_title
,
first_author
=
first_author
,
id_publishers
=
id_publisher
,
oai_url
=
oai_url
,
preprint
=
preprint
,
pages
=
pages
,
publication_url
=
url
,
report_numbers
=
report_numbers
,
submitted
=
submitted
,
volume
=
volume
,
title
=
title
,
year
=
year
)
title
=
title
)
rec_id
,
status
=
self
.
get_record_by_fields
(
**
fields
)
rec_id
,
status
=
self
.
get_record_by_fields
(
oai_url
,
year
,
**
fields
)
if
rec_id
:
return
status
...
...
modules/harvest_tools/reports.py
View file @
a26e2255
...
...
@@ -96,11 +96,9 @@ class Reports(Automaton):
fields
=
dict
(
id_categories
=
self
.
id_category
,
id_projects
=
self
.
id_project
,
id_teams
=
self
.
id_team
,
oai_url
=
oai_url
,
title
=
title
,
year
=
year
)
title
=
title
)
rec_id
,
status
=
self
.
get_record_by_fields
(
**
fields
)
rec_id
,
status
=
self
.
get_record_by_fields
(
oai_url
,
year
,
**
fields
)
if
rec_id
:
return
status
...
...
modules/harvest_tools/talks.py
View file @
a26e2255
...
...
@@ -83,11 +83,9 @@ class Talks(Automaton):
# get an already published talk
fields
=
dict
(
conference_title
=
conference_title
,
first_author
=
first_author
,
oai_url
=
oai_url
,
title
=
title
,
year
=
year
)
title
=
title
)
rec_id
,
status
=
self
.
get_record_by_fields
(
**
fields
)
rec_id
,
status
=
self
.
get_record_by_fields
(
oai_url
,
year
,
**
fields
)
if
rec_id
:
return
status
...
...
modules/harvest_tools/thesis.py
View file @
a26e2255
...
...
@@ -90,11 +90,9 @@ class Thesis(Automaton):
defense
=
defense_date
,
id_projects
=
self
.
id_project
,
id_teams
=
self
.
id_team
,
oai_url
=
oai_url
,
title
=
title
,
year
=
year
)
title
=
title
)
rec_id
,
status
=
self
.
get_record_by_fields
(
**
fields
)
rec_id
,
status
=
self
.
get_record_by_fields
(
oai_url
,
year
,
**
fields
)
if
rec_id
:
return
status
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment