Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
d4e6f91d
Commit
d4e6f91d
authored
Oct 08, 2015
by
LE GAC Renaud
Browse files
Modify the signature of the method Article.get_field_by_origin.
parent
a26e2255
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
83 additions
and
63 deletions
+83
-63
modules/harvest_tools/articles.py
modules/harvest_tools/articles.py
+83
-63
No files found.
modules/harvest_tools/articles.py
View file @
d4e6f91d
...
...
@@ -77,66 +77,6 @@ class Articles(Automaton):
return
True
def
get_record_by_origin
(
self
,
id_publisher
=
None
,
my_authors
=
None
,
oai_url
=
None
,
pages
=
None
,
publication_url
=
None
,
title
=
None
,
volume
=
None
,
year
=
None
):
"""Get an existing record using the origin field and its value
defined in the ``oai_url`` keyword argument.
The other arguments are used to transform the corresponding preprint
into an article.
Keyword Args:
oai_url (unicode): the OAI identifier of the article.
id_publisher (int): identifier of the publisher in the database.
my_authors (unicode): authors of my institute separated by a comma.
pages (unicode): the page reference.
publication_url (unicode): the URL of the publications
title (unicode): the title of the publication.
volume (unicode): the volume reference.
year (unicode): the year of publication.
Returns:
tuple: ``(id, status)`` which contains the ``id`` of the record.
It is equal to ``None`` when nothing is found.
The ``status`` is equal to one when the existing preprint was
modified into article, zero otherwise
"""
if
self
.
dbg
:
print
"check existing article by origin"
db
=
self
.
db
rec_id
=
get_id
(
db
.
publications
,
origin
=
oai_url
)
if
not
rec_id
:
return
(
None
,
0
)
# not a preprint ?
if
db
.
publications
[
rec_id
].
id_categories
!=
self
.
id_preprint
:
self
.
logs
[
-
1
].
idle
(
MSG_IN_DB
,
year
)
return
(
rec_id
,
0
)
# transform a preprint into an article
self
.
logs
[
-
1
].
modify
(
MSG_TRANSFORM_PREPRINT
,
year
)
if
not
self
.
dry_run
:
db
.
publications
[
rec_id
]
=
dict
(
authors_institute
=
my_authors
,
id_categories
=
self
.
id_category
,
id_publishers
=
id_publisher
,
id_status
=
UNDEF_ID
,
pages
=
pages
,
publication_url
=
publication_url
,
title
=
title
,
volume
=
volume
,
year
=
year
)
return
(
rec_id
,
1
)
def
get_record_by_fields
(
self
,
oai_url
,
year
,
...
...
@@ -239,6 +179,85 @@ class Articles(Automaton):
return
(
rec_id
,
1
)
def
get_record_by_origin
(
self
,
primary_oai_url
,
year
,
id_publisher
=
None
,
my_authors
=
None
,
oai_url
=
None
,
pages
=
None
,
publication_url
=
None
,
title
=
None
,
volume
=
None
):
"""Get an existing record using the origin field and its value
defined in the *primary_oai_url* argument.
Note:
This method is required to transform a preprint into and article.
All the keyword arguments are needed by the transformation.
Args:
primary_oai_url (unicode): the *primary* OAI identifier of the
record. It is used by the search algorithm.
year (unicode): the year of publication which is used
by the logger.
Keyword Args:
id_publisher (int): identifier of the publisher in the database.
my_authors (unicode): authors of my institute separated by a comma.
oai_url (unicode): the full oai_url(s) of the article.
pages (unicode): the page reference.
publication_url (unicode): the URL of the publications
title (unicode): the title of the publication.
volume (unicode): the volume reference.
Returns:
tuple: ``(id, status)`` which contains the ``id`` of the record.
It is equal to ``None`` when nothing is found.
The ``status`` is equal to one when the existing preprint was
modified into article, zero otherwise
"""
if
self
.
dbg
:
print
"check existing article by origin"
# alias
db
=
self
.
db
logs
=
self
.
logs
publications
=
db
.
publications
# search by origin
query
=
db
.
publications
.
origin
.
contains
(
primary_oai_url
)
setrows
=
db
(
query
)
if
setrows
.
count
()
==
0
:
return
(
None
,
0
)
# a record is found
rec_id
=
setrows
.
select
(
publications
.
id
).
first
().
id
publication
=
publications
[
rec_id
]
# not a preprint ?
if
publication
.
id_categories
!=
self
.
id_preprint
:
logs
[
-
1
].
idle
(
MSG_IN_DB
,
year
)
return
(
rec_id
,
0
)
# transform a preprint into an article
logs
[
-
1
].
modify
(
MSG_TRANSFORM_PREPRINT
,
year
)
if
not
self
.
dry_run
:
db
.
publications
[
rec_id
]
=
dict
(
authors_institute
=
my_authors
,
id_categories
=
self
.
id_category
,
id_publishers
=
id_publisher
,
id_status
=
UNDEF_ID
,
oai_url
=
oai_url
,
pages
=
pages
,
publication_url
=
publication_url
,
title
=
title
,
volume
=
volume
,
year
=
year
)
return
(
rec_id
,
1
)
def
insert_record
(
self
,
record
):
"""Insert an article in the database.
...
...
@@ -286,10 +305,11 @@ class Articles(Automaton):
pages
=
pages
,
publication_url
=
publication_url
,
title
=
title
,
volume
=
volume
,
year
=
year
)
volume
=
volume
)
rec_id
,
status
=
self
.
get_record_by_origin
(
**
fields
)
rec_id
,
status
=
self
.
get_record_by_origin
(
record
.
primary_oai_url
(),
year
,
**
fields
)
if
rec_id
:
return
status
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment