Commit a26e2255 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Modify the signature of the method Automation.get_record_by_fields.

parent b47d5d10
...@@ -91,7 +91,7 @@ class Articles(Automaton): ...@@ -91,7 +91,7 @@ class Articles(Automaton):
The other arguments are used to transform the corresponding preprint The other arguments are used to transform the corresponding preprint
into an article. into an article.
Args: Keyword Args:
oai_url (unicode): the OAI identifier of the article. oai_url (unicode): the OAI identifier of the article.
id_publisher (int): identifier of the publisher in the database. id_publisher (int): identifier of the publisher in the database.
my_authors (unicode): authors of my institute separated by a comma. my_authors (unicode): authors of my institute separated by a comma.
...@@ -138,33 +138,39 @@ class Articles(Automaton): ...@@ -138,33 +138,39 @@ class Articles(Automaton):
return (rec_id, 1) return (rec_id, 1)
def get_record_by_fields(self, def get_record_by_fields(self,
oai_url,
year,
id_publisher=None, id_publisher=None,
my_authors=None, my_authors=None,
oai_url=None,
pages=None, pages=None,
publication_url=None, publication_url=None,
preprint_number=None, preprint_number=None,
title=None, title=None,
volume=None, volume=None):
year=None): """Get article matching fields values defined
"""Get the record matching the input fields in the keyword arguments.
Note: Note:
Fix the field origin when a match is found. This method is required deal with an article entered by hand and
found later by the harvester.
Note:
Transform a preprint into article.
Args: Args:
oai_url (unicode): the oai_url, *e.g*
``http://cds.cern.ch/record/123456``. The origin field
of the existing database record is update to **oai_url**
when a match is found.
year (unicode): the year of the publication. It is used
by the search algorithm and by the logger.
Keyword Args:
id_publisher (int): identifier of the publisher in the database. id_publisher (int): identifier of the publisher in the database.
my_authors (unicode): authors of my institute separated by a comma. my_authors (unicode): authors of my institute separated by a comma.
oai_url (unicode): the URL defining the OAI.
pages (unicode): the page reference. pages (unicode): the page reference.
publication_url (unicode): the URL of the publications publication_url (unicode): the URL of the publications
preprint_number (unicode): the preprint number preprint_number (unicode): the preprint number
title (unicode): the title of the publication. title (unicode): the title of the publication.
volume (unicode): the volume reference. volume (unicode): the volume reference.
year (unicode): the year of publication.
Returns: Returns:
tuple: ``(id, status)`` which contains the ``id`` of the record. tuple: ``(id, status)`` which contains the ``id`` of the record.
...@@ -176,35 +182,40 @@ class Articles(Automaton): ...@@ -176,35 +182,40 @@ class Articles(Automaton):
if self.dbg: if self.dbg:
print "get existing article by fields" print "get existing article by fields"
# alias
db = self.db db = self.db
id_project = self.id_project
id_team = self.id_team
logs = self.logs
# check against published articles # check against published articles
rec_id = get_id(db.publications, rec_id = get_id(db.publications,
id_projects=self.id_project, id_projects=id_project,
id_publishers=id_publisher, id_publishers=id_publisher,
id_teams=self.id_team, id_teams=id_team,
pages=pages, pages=pages,
volume=volume, volume=volume,
year=year) year=year)
# fix origin field # fix origin field
if rec_id and not db.publications[rec_id].origin: publication = db.publications[rec_id]
if rec_id and not publication.origin:
if not self.dry_run: if not self.dry_run:
db.publications[rec_id] = dict(origin=oai_url) publication = dict(origin=oai_url)
self.logs[-1].modify(MSG_FIX_ORIGIN, year) logs[-1].modify(MSG_FIX_ORIGIN, year)
return (rec_id, 1) return (rec_id, 1)
if rec_id: if rec_id:
self.logs[-1].idle(MSG_IN_DB, year) logs[-1].idle(MSG_IN_DB, year)
return (rec_id, 0) return (rec_id, 0)
# check against published preprint # check against published preprint
# a preprint can be identified by its category which is PRE (15) # a preprint can be identified by its category which is PRE (15)
rec_id = get_id(db.publications, rec_id = get_id(db.publications,
id_categories=self.id_preprint, id_categories=self.id_preprint,
id_projects=self.id_project, id_projects=id_project,
id_teams=self.id_team, id_teams=id_team,
preprint=preprint_number) preprint=preprint_number)
if not rec_id: if not rec_id:
...@@ -284,15 +295,13 @@ class Articles(Automaton): ...@@ -284,15 +295,13 @@ class Articles(Automaton):
fields = dict(id_publisher=id_publisher, fields = dict(id_publisher=id_publisher,
my_authors=my_authors, my_authors=my_authors,
oai_url=oai_url,
pages=pages, pages=pages,
publication_url=publication_url, publication_url=publication_url,
preprint_number=preprint_number, preprint_number=preprint_number,
title=title, title=title,
volume=volume, volume=volume)
year=year)
rec_id, status = self.get_record_by_fields(**fields) rec_id, status = self.get_record_by_fields(oai_url, year, **fields)
if rec_id: if rec_id:
return status return status
......
...@@ -343,19 +343,26 @@ class Automaton(object): ...@@ -343,19 +343,26 @@ class Automaton(object):
return True return True
def get_record_by_fields(self, **kwargs): def get_record_by_fields(self, oai_url, year, **kwargs):
"""Get database record matching fields values defined """Get database record matching fields values defined
in the keyword arguments. in the keyword arguments.
Keyword Args:
oai_url (unicode): *e.g* ``"http://cds.cern.ch/record/123456"``
year (int): the year of the publication.
Note: Note:
Fix the field origin when a match is found. This method is required to deal with publication entered by hand
and found later by an harvester.
Note: Args:
The year is only used by the logger. oai_url (unicode): the oai_url, *e.g*
``http://cds.cern.ch/record/123456``. The origin field
of the existing database record is update to **oai_url**
when a match is found.
year (int): the year of the publication. It is used
by the search algorithm and by the logger.
Keyword Args:
kwargs (unicode): a series of key, value pair where the
key is the name of a publications database field.
Returns: Returns:
tuple: ``(id, status)`` which contains the ``id`` of the record. tuple: ``(id, status)`` which contains the ``id`` of the record.
...@@ -371,9 +378,9 @@ class Automaton(object): ...@@ -371,9 +378,9 @@ class Automaton(object):
db = self.db db = self.db
logs = self.logs logs = self.logs
# origin can't be used for the search # add the publication year to search criteria
oai_url = kwargs["oai_url"] if year:
del kwargs["oai_url"] kwargs["year"] = year
# look for an existing record # look for an existing record
rec_id = get_id(db.publications, **kwargs) rec_id = get_id(db.publications, **kwargs)
...@@ -387,10 +394,10 @@ class Automaton(object): ...@@ -387,10 +394,10 @@ class Automaton(object):
if not self.dry_run: if not self.dry_run:
publication = dict(origin=oai_url) publication = dict(origin=oai_url)
logs[-1].modify(MSG_FIX_ORIGIN, kwargs["year"]) logs[-1].modify(MSG_FIX_ORIGIN, year)
return (rec_id, 1) return (rec_id, 1)
logs[-1].idle(MSG_IN_DB, kwargs["year"]) logs[-1].idle(MSG_IN_DB, year)
return (rec_id, 0) return (rec_id, 0)
def insert_record(self, record): def insert_record(self, record):
......
...@@ -72,11 +72,9 @@ class Notes(Automaton): ...@@ -72,11 +72,9 @@ class Notes(Automaton):
id_categories=self.id_category, id_categories=self.id_category,
id_projects=self.id_project, id_projects=self.id_project,
id_teams=self.id_team, id_teams=self.id_team,
oai_url=oai_url, title=title)
title=title,
year=year)
rec_id, status = self.get_record_by_fields(**fields) rec_id, status = self.get_record_by_fields(oai_url, year, **fields)
if rec_id: if rec_id:
return status return status
......
...@@ -98,13 +98,11 @@ class Preprints(Automaton): ...@@ -98,13 +98,11 @@ class Preprints(Automaton):
fields = dict(first_author=first_author, fields = dict(first_author=first_author,
id_projects=self.id_project, id_projects=self.id_project,
id_teams=self.id_team, id_teams=self.id_team,
oai_url=oai_url,
preprint=preprint, preprint=preprint,
submitted=submitted, submitted=submitted,
title=title, title=title)
year=year)
rec_id, status = self.get_record_by_fields(**fields) rec_id, status = self.get_record_by_fields(oai_url, year, **fields)
if rec_id: if rec_id:
return status return status
......
...@@ -104,17 +104,15 @@ class Proceedings(Automaton): ...@@ -104,17 +104,15 @@ class Proceedings(Automaton):
conference_title=conference_title, conference_title=conference_title,
first_author=first_author, first_author=first_author,
id_publishers=id_publisher, id_publishers=id_publisher,
oai_url=oai_url,
preprint=preprint, preprint=preprint,
pages=pages, pages=pages,
publication_url=url, publication_url=url,
report_numbers=report_numbers, report_numbers=report_numbers,
submitted=submitted, submitted=submitted,
volume=volume, volume=volume,
title=title, title=title)
year=year)
rec_id, status = self.get_record_by_fields(**fields) rec_id, status = self.get_record_by_fields(oai_url, year, **fields)
if rec_id: if rec_id:
return status return status
......
...@@ -96,11 +96,9 @@ class Reports(Automaton): ...@@ -96,11 +96,9 @@ class Reports(Automaton):
fields = dict(id_categories=self.id_category, fields = dict(id_categories=self.id_category,
id_projects=self.id_project, id_projects=self.id_project,
id_teams=self.id_team, id_teams=self.id_team,
oai_url=oai_url, title=title)
title=title,
year=year)
rec_id, status = self.get_record_by_fields(**fields) rec_id, status = self.get_record_by_fields(oai_url, year, **fields)
if rec_id: if rec_id:
return status return status
......
...@@ -83,11 +83,9 @@ class Talks(Automaton): ...@@ -83,11 +83,9 @@ class Talks(Automaton):
# get an already published talk # get an already published talk
fields = dict(conference_title=conference_title, fields = dict(conference_title=conference_title,
first_author=first_author, first_author=first_author,
oai_url=oai_url, title=title)
title=title,
year=year)
rec_id, status = self.get_record_by_fields(**fields) rec_id, status = self.get_record_by_fields(oai_url, year, **fields)
if rec_id: if rec_id:
return status return status
......
...@@ -90,11 +90,9 @@ class Thesis(Automaton): ...@@ -90,11 +90,9 @@ class Thesis(Automaton):
defense=defense_date, defense=defense_date,
id_projects=self.id_project, id_projects=self.id_project,
id_teams=self.id_team, id_teams=self.id_team,
oai_url=oai_url, title=title)
title=title,
year=year)
rec_id, status = self.get_record_by_fields(**fields) rec_id, status = self.get_record_by_fields(oai_url, year, **fields)
if rec_id: if rec_id:
return status return status
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment