Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
0cbfd9af
Commit
0cbfd9af
authored
Jun 13, 2014
by
LE GAC Renaud
Browse files
Improved algorithm to find cppm authors.
parent
7aeb1ccb
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
21 additions
and
36 deletions
+21
-36
modules/harvest_tools.py
modules/harvest_tools.py
+21
-36
No files found.
modules/harvest_tools.py
View file @
0cbfd9af
...
...
@@ -71,34 +71,35 @@ def cppm_authors(record, db=None, id_project=None, id_team=None):
@type id_team: int or None
@param id_team:
@rtype: unicode or None
@return: author names separated by ', ' or UNKNOWN or none
@rtype: str
@return: author names separated by comma or an empty string
when there is no authors
"""
# find authors of the institute signing the record
rep
=
record
.
find_authors_by_institute
(
current
.
app
.
reg_institute
,
family_name_fr
)
if
rep
:
return
rep
# nothing found, check that the institute list is defined
# if institutes are not defined, use the rescue list store in the database
if
not
record
.
is_institute_defined
():
if
db
==
None
or
id_team
==
None
:
return
UNKNOWN
row
=
db
.
cppm_authors
(
year
=
record
.
year
(),
id_projects
=
id_project
,
id_teams
=
id_team
)
if
not
row
:
return
None
# nothing found, use the rescue list store in the database
#
# NOTE: the following code cover all the cases:
# institutes not defined or defined only for a fraction of the authors
#
if
db
==
None
or
id_team
==
None
:
return
''
row
=
db
.
cppm_authors
(
year
=
record
.
year
(),
id_projects
=
id_project
,
id_teams
=
id_team
)
if
not
row
:
return
None
reference
=
row
[
'authors'
].
split
(
', '
)
return
fix_cppm_authors
(
record
,
reference
)
reference
=
row
[
'authors'
].
split
(
', '
)
return
fix_cppm_authors
(
record
,
reference
)
return
None
def
family_name_fr
(
x
):
"""Extract the family name when the full name is encoded as C{J. Doe}.
...
...
@@ -143,14 +144,14 @@ def fix_cppm_authors(record, reference):
@type reference: list
@param reference: list of author names
@rtype:
unicode
@rtype:
str
@return: author names separated by comma or an empty string
when there is no authors
"""
s1
=
set
(
record
.
authors_as_list
())
s2
=
set
(
reference
)
# sort according to family name
li
=
list
(
s1
.
intersection
(
s2
))
li
.
sort
(
key
=
family_name_fr
)
...
...
@@ -1125,22 +1126,6 @@ class Articles(PublicationsTool):
volume
=
volume
,
year
=
year
)
# In April 2014 the pages field of some inspirehep record was modified
# the following code handle this case
if
not
id
:
id
=
get_id
(
db
.
publications
,
id_projects
=
self
.
harvester
.
id_projects
,
id_publishers
=
id_publisher
,
id_teams
=
self
.
harvester
.
id_teams
,
title
=
title
,
volume
=
volume
,
year
=
year
)
if
id
:
if
self
.
selector
.
mode
!=
DRY_RUN
:
db
.
publications
[
id
]
=
dict
(
pages
=
pages
)
self
.
logs
[
-
1
].
modify
(
MSG_FIX_PAGE
)
# fix orign field
if
id
and
not
db
.
publications
[
id
].
origin
:
if
self
.
selector
.
mode
!=
DRY_RUN
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment