Commit 0cbfd9af authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Improved algorithm to find cppm authors.

parent 7aeb1ccb
......@@ -71,34 +71,35 @@ def cppm_authors(record, db=None, id_project=None, id_team=None):
@type id_team: int or None
@param id_team:
@rtype: unicode or None
@return: author names separated by ', ' or UNKNOWN or none
@rtype: str
@return: author names separated by comma or an empty string
when there is no authors
"""
# find authors of the institute signing the record
rep = record.find_authors_by_institute(current.app.reg_institute,
family_name_fr)
if rep:
return rep
# nothing found, check that the institute list is defined
# if institutes are not defined, use the rescue list store in the database
if not record.is_institute_defined():
if db == None or id_team == None:
return UNKNOWN
row = db.cppm_authors(year=record.year(),
id_projects=id_project,
id_teams=id_team)
if not row:
return None
# nothing found, use the rescue list store in the database
#
# NOTE: the following code cover all the cases:
# institutes not defined or defined only for a fraction of the authors
#
if db == None or id_team == None:
return ''
row = db.cppm_authors(year=record.year(),
id_projects=id_project,
id_teams=id_team)
if not row:
return None
reference = row['authors'].split(', ')
return fix_cppm_authors(record, reference)
reference = row['authors'].split(', ')
return fix_cppm_authors(record, reference)
return None
def family_name_fr(x):
"""Extract the family name when the full name is encoded as C{J. Doe}.
......@@ -143,14 +144,14 @@ def fix_cppm_authors(record, reference):
@type reference: list
@param reference: list of author names
@rtype: unicode
@rtype: str
@return: author names separated by comma or an empty string
when there is no authors
"""
s1 = set(record.authors_as_list())
s2 = set(reference)
# sort according to family name
li = list(s1.intersection(s2))
li.sort(key=family_name_fr)
......@@ -1125,22 +1126,6 @@ class Articles(PublicationsTool):
volume=volume,
year=year)
# In April 2014 the pages field of some inspirehep record was modified
# the following code handle this case
if not id:
id = get_id(db.publications, id_projects=self.harvester.id_projects,
id_publishers=id_publisher,
id_teams=self.harvester.id_teams,
title=title,
volume=volume,
year=year)
if id:
if self.selector.mode != DRY_RUN:
db.publications[id] = dict(pages=pages)
self.logs[-1].modify(MSG_FIX_PAGE)
# fix orign field
if id and not db.publications[id].origin:
if self.selector.mode != DRY_RUN:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment