Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
10b9b585
Commit
10b9b585
authored
Sep 20, 2015
by
LE GAC Renaud
Browse files
Review protection in Record(s) and CheckAndFix classes.
parent
6dbde3d3
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
81 additions
and
70 deletions
+81
-70
modules/harvest_tools/publicationstool.py
modules/harvest_tools/publicationstool.py
+1
-1
modules/invenio_tools/checkandfix.py
modules/invenio_tools/checkandfix.py
+62
-58
modules/invenio_tools/recordinst.py
modules/invenio_tools/recordinst.py
+10
-7
modules/invenio_tools/recordpubli.py
modules/invenio_tools/recordpubli.py
+4
-2
modules/invenio_tools/recordthesis.py
modules/invenio_tools/recordthesis.py
+4
-2
No files found.
modules/harvest_tools/publicationstool.py
View file @
10b9b585
...
...
@@ -24,7 +24,7 @@ MSG_NO_PROJECT = 'Select a "project" !!!'
MSG_NO_TEAM
=
'Select a "team" !!!'
MSG_NO_OAI
=
"Reject no OAI identifier"
MSG_WELL_FORM
ED
_OAI
=
"Reject OAI is not well formed"
MSG_WELL_FORM_OAI
=
"Reject OAI is not well formed"
class
PublicationsTool
(
object
):
...
...
modules/invenio_tools/checkandfix.py
View file @
10b9b585
...
...
@@ -47,6 +47,7 @@ MONTHS = {u'Jan':'01',
MSG_NO_AUTHOR
=
"Reject no author(s)"
MSG_NO_COUNTRY
=
"Reject invalid country"
MSG_NO_CONF_DATE
=
"Reject no conference date"
MSG_NO_DATE
=
"Reject no submission date"
MSG_NO_MY_AUTHOR
=
"Reject no authors of my institute"
MSG_NO_REF
=
"Reject incomplete paper reference"
...
...
@@ -119,12 +120,12 @@ class CheckAndFix(object):
if
isinstance
(
record
,
RecordConf
):
# INSPIREHEP start date encoded as 2014-12-31
if
"x"
in
record
[
"111"
]:
val
=
record
[
"111"
][
"x"
]
if
"x"
in
record
[
u
"111"
]:
val
=
record
[
u
"111"
][
"x"
]
# CDS end date encoded as 20141231
elif
"z"
in
record
[
"111"
]:
val
=
record
[
"111"
][
"z"
]
elif
"z"
in
record
[
u
"111"
]:
val
=
record
[
u
"111"
][
"z"
]
val
=
"%s-%s-%s"
%
(
val
[
0
:
4
],
val
[
4
:
6
],
val
[
6
:
8
])
elif
isinstance
(
record
,
RecordThesis
):
...
...
@@ -155,34 +156,34 @@ class CheckAndFix(object):
"""
# standard case
if
isinstance
(
record
[
"773"
],
dict
):
if
isinstance
(
record
[
u
"773"
],
dict
):
if
"o"
in
record
[
"773"
]:
if
"o"
in
record
[
u
"773"
]:
for
reg
in
DECODE_REF
:
m
=
reg
.
match
(
record
[
"773"
][
"o"
])
m
=
reg
.
match
(
record
[
u
"773"
][
"o"
])
if
m
:
record
[
"773"
][
"p"
]
=
m
.
group
(
"p"
)
record
[
"773"
][
"v"
]
=
m
.
group
(
"v"
)
record
[
"773"
][
"y"
]
=
m
.
group
(
"y"
)
record
[
"773"
][
"c"
]
=
m
.
group
(
"c"
)
record
[
u
"773"
][
"p"
]
=
m
.
group
(
"p"
)
record
[
u
"773"
][
"v"
]
=
m
.
group
(
"v"
)
record
[
u
"773"
][
"y"
]
=
m
.
group
(
"y"
)
record
[
u
"773"
][
"c"
]
=
m
.
group
(
"c"
)
return
raise
CheckException
(
MSG_NO_REF
)
# list case -- paper with erratum
elif
isinstance
(
record
[
"773"
],
list
):
elif
isinstance
(
record
[
u
"773"
],
list
):
for
i
in
range
(
len
(
record
[
"773"
])):
for
i
in
range
(
len
(
record
[
u
"773"
])):
if
"o"
in
record
[
"773"
][
i
]:
if
"o"
in
record
[
u
"773"
][
i
]:
fixed
=
False
for
reg
in
DECODE_REF
:
m
=
reg
.
match
(
record
[
"773"
][
i
][
"o"
])
m
=
reg
.
match
(
record
[
u
"773"
][
i
][
"o"
])
if
m
:
record
[
"773"
][
i
][
"p"
]
=
m
.
group
(
"p"
)
record
[
"773"
][
i
][
"v"
]
=
m
.
group
(
"v"
)
record
[
"773"
][
i
][
"y"
]
=
m
.
group
(
"y"
)
record
[
"773"
][
i
][
"c"
]
=
m
.
group
(
"c"
)
record
[
u
"773"
][
i
][
"p"
]
=
m
.
group
(
"p"
)
record
[
u
"773"
][
i
][
"v"
]
=
m
.
group
(
"v"
)
record
[
u
"773"
][
i
][
"y"
]
=
m
.
group
(
"y"
)
record
[
u
"773"
][
i
][
"c"
]
=
m
.
group
(
"c"
)
fixed
=
True
break
...
...
@@ -205,50 +206,50 @@ class CheckAndFix(object):
"""
if
"100"
not
in
record
and
"700"
not
in
record
:
if
u
"100"
not
in
record
and
u
"700"
not
in
record
:
raise
CheckException
(
MSG_NO_AUTHOR
)
if
"100"
in
record
and
isinstance
(
record
[
"100"
],
list
):
if
u
"100"
in
record
and
isinstance
(
record
[
u
"100"
],
list
):
# from time to time first authors is duplicated
li
=
[]
for
di
in
record
[
"100"
]:
for
di
in
record
[
u
"100"
]:
if
di
not
in
li
:
li
.
append
(
di
)
if
len
(
li
)
==
1
:
record
[
"100"
]
=
li
[
0
]
record
[
u
"100"
]
=
li
[
0
]
else
:
raise
CheckException
(
MSG_TO_MANY_FAUTHOR
)
# alias
authors
,
first_author
=
None
,
None
if
"700"
in
record
:
authors
=
record
[
"700"
]
if
u
"700"
in
record
:
authors
=
record
[
u
"700"
]
if
"100"
in
record
:
first_author
=
record
[
"100"
]
if
u
"100"
in
record
:
first_author
=
record
[
u
"100"
]
# first author not defined
if
not
first_author
and
authors
:
if
isinstance
(
record
[
"700"
],
list
):
record
[
"100"
]
=
record
[
"700"
][
0
]
if
isinstance
(
record
[
u
"700"
],
list
):
record
[
u
"100"
]
=
record
[
u
"700"
][
0
]
else
:
record
[
"100"
]
=
record
[
"700"
]
record
[
u
"100"
]
=
record
[
u
"700"
]
# first author not in the authors list
elif
first_author
and
authors
:
if
isinstance
(
record
[
"700"
],
list
):
if
record
[
"100"
][
"a"
]
!=
record
[
"700"
][
0
][
"a"
]:
record
[
"700"
].
insert
(
0
,
record
[
"100"
])
if
isinstance
(
record
[
u
"700"
],
list
):
if
record
[
u
"100"
][
"a"
]
!=
record
[
u
"700"
][
0
][
"a"
]:
record
[
u
"700"
].
insert
(
0
,
record
[
u
"100"
])
elif
record
[
"700"
][
"a"
]
!=
record
[
"100"
][
"a"
]:
record
[
"700"
]
=
[
record
[
"100"
],
record
[
"700"
]]
elif
record
[
u
"700"
][
"a"
]
!=
record
[
u
"100"
][
"a"
]:
record
[
u
"700"
]
=
[
record
[
u
"100"
],
record
[
u
"700"
]]
# only the first author is defined
elif
first_author
and
not
authors
:
record
[
"700"
]
=
record
[
"100"
]
record
[
u
"700"
]
=
record
[
u
"100"
]
def
clean_erratum
(
self
,
record
):
"""Clean record with erratum by removing them.
...
...
@@ -265,10 +266,10 @@ class CheckAndFix(object):
# use the simplest algorithm by selecting the first entry in the list
# fare to assume that the article is published first.
record
[
"773"
]
=
record
[
"773"
][
0
]
record
[
u
"773"
]
=
record
[
u
"773"
][
0
]
# treat year and submitted date
for
k
in
(
"260"
,
"269"
):
for
k
in
(
u
"260"
,
u
"269"
):
if
k
in
record
and
isinstance
(
record
[
k
],
list
):
record
[
k
]
=
record
[
k
][
0
]
...
...
@@ -305,6 +306,9 @@ class CheckAndFix(object):
raise
CheckException
(
MSG_NO_COUNTRY
)
# check and fix conference date
if
not
(
u
"111"
in
record
and
"d"
in
record
[
u
"111"
]):
raise
CheckException
(
MSG_NO_CONF_DATE
)
value
=
record
[
u
"111"
][
"d"
]
m
=
REG_CONF_DATES
.
match
(
value
)
if
not
m
:
...
...
@@ -364,11 +368,11 @@ class CheckAndFix(object):
return
# standard case
if
isinstance
(
record
[
"773"
],
dict
):
if
"p"
in
record
[
"773"
]
and
"v"
in
record
[
"773"
]:
if
isinstance
(
record
[
u
"773"
],
dict
):
if
"p"
in
record
[
u
"773"
]
and
"v"
in
record
[
u
"773"
]:
editor
=
record
[
"773"
][
"p"
]
volume
=
record
[
"773"
][
"v"
]
editor
=
record
[
u
"773"
][
"p"
]
volume
=
record
[
u
"773"
][
"v"
]
# add space after the dot Phys.Rev -> Phys. Rev
editor
=
re
.
sub
(
r
'\.([A-Z])'
,
r
'. \1'
,
editor
)
...
...
@@ -382,14 +386,14 @@ class CheckAndFix(object):
# remove stupid mistake
editor
=
CLEAN_REVIEW
(
editor
)
record
[
"773"
][
"p"
]
=
editor
record
[
"773"
][
"v"
]
=
volume
record
[
u
"773"
][
"p"
]
=
editor
record
[
u
"773"
][
"v"
]
=
volume
# list case -- publication with erratum
elif
isinstance
(
record
[
"773"
],
list
):
elif
isinstance
(
record
[
u
"773"
],
list
):
editors
=
record
.
_get
(
"773"
,
'p'
,
force_list
=
True
)
volumes
=
record
.
_get
(
"773"
,
'v'
,
force_list
=
True
)
editors
=
record
.
_get
(
u
"773"
,
'p'
,
force_list
=
True
)
volumes
=
record
.
_get
(
u
"773"
,
'v'
,
force_list
=
True
)
if
len
(
editors
)
!=
len
(
volumes
):
raise
CheckException
(
MSG_WELL_FORMED_EDITOR
)
...
...
@@ -406,8 +410,8 @@ class CheckAndFix(object):
editor
=
CLEAN_REVIEW
(
editor
)
record
[
"773"
][
i
][
"p"
]
=
editor
record
[
"773"
][
i
][
"v"
]
=
volume
record
[
u
"773"
][
i
][
"p"
]
=
editor
record
[
u
"773"
][
i
][
"v"
]
=
volume
def
format_universities
(
self
,
record
):
"""Format the name of the university for PhD:
...
...
@@ -564,8 +568,8 @@ class CheckAndFix(object):
return
# list of reference (paper with erratum)
refs
=
record
[
"773"
]
if
not
isinstance
(
record
[
"773"
],
list
):
refs
=
record
[
u
"773"
]
if
not
isinstance
(
record
[
u
"773"
],
list
):
refs
=
[
refs
]
# INSPIREHEP
...
...
@@ -647,7 +651,7 @@ class CheckAndFix(object):
if
len
(
dates
)
!=
1
:
raise
CheckException
(
MSG_TO_MANY_DATE
)
if
"269"
not
in
record
or
isinstance
(
record
[
"269"
],
list
):
if
u
"269"
not
in
record
or
isinstance
(
record
[
u
"269"
],
list
):
record
[
u
"269"
]
=
dict
()
record
[
u
"269"
][
"c"
]
=
dates
[
0
]
...
...
@@ -662,8 +666,8 @@ class CheckAndFix(object):
"""
# found on INSPIREHEP (see record 1317573)
if
"500"
in
record
and
"a"
in
record
[
"500"
]:
if
record
[
"500"
][
"a"
]
==
u
"*Temporary record*"
:
if
u
"500"
in
record
and
"a"
in
record
[
u
"500"
]:
if
record
[
u
"500"
][
"a"
]
==
u
"*Temporary record*"
:
raise
CheckException
(
MSG_TEMPORARY_RECORD
)
def
year
(
self
,
record
):
...
...
@@ -687,7 +691,7 @@ class CheckAndFix(object):
val
=
''
# protection against "publication date YYYY-MM-DD"
li
=
record
.
_get
(
"260"
,
'c'
,
force_list
=
True
)
li
=
record
.
_get
(
u
"260"
,
'c'
,
force_list
=
True
)
li
=
[
el
for
el
in
li
if
REG_YEAR
.
match
(
el
)]
if
len
(
li
)
==
1
:
...
...
@@ -714,10 +718,10 @@ class CheckAndFix(object):
if
val
:
m
=
REG_YEAR
.
search
(
val
)
if
m
:
if
"260"
in
record
and
isinstance
(
record
[
"260"
],
dict
):
record
[
"260"
][
"c"
]
=
m
.
group
(
1
)
if
u
"260"
in
record
and
isinstance
(
record
[
u
"260"
],
dict
):
record
[
u
"260"
][
"c"
]
=
m
.
group
(
1
)
else
:
record
[
"260"
]
=
{
"c"
:
m
.
group
(
1
)}
record
[
u
"260"
]
=
{
"c"
:
m
.
group
(
1
)}
return
raise
CheckException
(
MSG_NO_YEAR
)
...
...
modules/invenio_tools/recordinst.py
View file @
10b9b585
...
...
@@ -46,26 +46,29 @@ class RecordInst(Record):
def
future_identifier
(
self
):
"""
Returns:
unicode: the future inspirehep identifier.
unicode: the future inspirehep identifier or an empty string
if not defined.
"""
return
self
[
u
"110"
][
"t"
]
return
self
.
_get
(
u
"110"
,
"t"
)
def
identifier
(
self
):
"""
Returns:
unicode: the current inspirehep identifier (2015).
unicode: the current inspirehep identifier (2015) or an empty
string if not defined.
"""
return
self
[
u
"110"
][
"u"
]
return
self
.
_get
(
u
"110"
,
"u"
)
def
name
(
self
):
"""
Returns:
unicode: the name of the institute.
unicode: the name of the institute or an empty string if
not defined.
"""
return
self
[
u
"110"
][
"b"
]
return
self
.
_get
(
u
"110"
,
"b"
)
def
rex
(
self
):
"""
...
...
@@ -74,4 +77,4 @@ class RecordInst(Record):
in cds.cern.ch or inspirehep.net store
"""
return
r
"%s|%s"
%
(
self
[
u
"110"
][
"u"
],
self
[
u
"110"
][
"t"
]
)
return
r
"%s|%s"
%
(
self
.
identifier
(),
self
.
future_identifier
()
)
modules/invenio_tools/recordpubli.py
View file @
10b9b585
...
...
@@ -152,7 +152,8 @@ class RecordPubli(Record):
s
=
', '
.
join
(
s
)
if
regex
.
search
(
s
):
return
self
[
u
"700"
][
"a"
]
if
"a"
in
self
[
u
"700"
]:
return
self
[
u
"700"
][
"a"
]
return
None
...
...
@@ -168,7 +169,8 @@ class RecordPubli(Record):
s
=
', '
.
join
(
di
[
'u'
])
if
regex
.
search
(
s
):
authors
.
append
(
di
[
'a'
])
if
"a"
in
di
:
authors
.
append
(
di
[
'a'
])
if
cmpFct
:
authors
.
sort
(
key
=
cmpFct
)
...
...
modules/invenio_tools/recordthesis.py
View file @
10b9b585
...
...
@@ -62,12 +62,14 @@ class RecordThesis(RecordPubli):
li
=
[]
if
u
"700"
in
self
and
isinstance
(
self
[
u
"700"
],
dict
):
if
"e"
in
self
[
u
"700"
]
and
self
[
u
"700"
][
"e"
]
==
THESIS_DIR
:
li
.
append
(
self
[
u
"700"
][
"a"
])
if
"a"
in
self
[
u
"700"
]:
li
.
append
(
self
[
u
"700"
][
"a"
])
elif
u
"700"
in
self
and
isinstance
(
self
[
u
"700"
],
list
):
for
di
in
self
[
u
"700"
]:
if
"e"
in
di
and
di
[
"e"
]
==
THESIS_DIR
:
li
.
append
(
di
[
"a"
])
if
"a"
in
di
:
li
.
append
(
di
[
"a"
])
return
', '
.
join
(
li
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment