Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
77341193
Commit
77341193
authored
Sep 16, 2015
by
LE GAC Renaud
Browse files
Replace the function fix_amu by the method CheckAndFix.format_universities.
parent
009c5f94
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
225 additions
and
30 deletions
+225
-30
languages/fr-fr.py
languages/fr-fr.py
+2
-1
modules/harvest_tools/__init__.py
modules/harvest_tools/__init__.py
+0
-1
modules/harvest_tools/base.py
modules/harvest_tools/base.py
+0
-23
modules/harvest_tools/thesis.py
modules/harvest_tools/thesis.py
+3
-2
modules/invenio_tools/checkandfix.py
modules/invenio_tools/checkandfix.py
+55
-2
modules/invenio_tools/record.py
modules/invenio_tools/record.py
+1
-1
tests/harvester/CheckAndFix/test_phd_cds1394605_fix.py
tests/harvester/CheckAndFix/test_phd_cds1394605_fix.py
+70
-0
tests/harvester/CheckAndFix/test_phd_cds1632177_fix.py
tests/harvester/CheckAndFix/test_phd_cds1632177_fix.py
+0
-0
tests/harvester/CheckAndFix/test_phd_cds1642541_fix.py
tests/harvester/CheckAndFix/test_phd_cds1642541_fix.py
+70
-0
tests/harvester/CheckAndFix/test_phd_cds2015250_fix.py
tests/harvester/CheckAndFix/test_phd_cds2015250_fix.py
+24
-0
No files found.
languages/fr-fr.py
View file @
77341193
...
...
@@ -297,8 +297,8 @@
'insert new %s'
:
'insert new %s'
,
'install'
:
'installé'
,
'Institute'
:
'Institut'
,
'Institute identifier in inspirehep.net.'
:
'Identifiant du laboratoire dans inspirehep.net.'
,
'Institute number associated to CPPM authors'
:
"Numéro de l'Institut associé aux auteurs du CPPM"
,
'Institute identifier in inspirehep.net.'
:
"Identifiant du laboratoire dans inspirehep.net."
,
'Invalid'
:
'Non conforme'
,
"Invalid database table '%s'"
:
"Invalid database table '%s'"
,
'Invalid email'
:
'Invalid email'
,
...
...
@@ -486,6 +486,7 @@
'Reject no %s authors'
:
"Rejeté pas d'autheur(s) du %s"
,
'Reject no author(s)'
:
"Rejeté pas d'autheur(s)"
,
'Reject no authors'
:
"Rejeté pas d'auteurs"
,
'Reject no authors of my institute'
:
"Rejeté pas d'auteurs de mon laboratoire"
,
'Reject no conference information'
:
"Rejeté pas d'information sur la conférence"
,
'Reject no CPPM authors'
:
"Rejeté pas d'auteurs du CPPM"
,
'Reject no OAI identifier'
:
"Rejeté pas d'identifiant OAI"
,
...
...
modules/harvest_tools/__init__.py
View file @
77341193
...
...
@@ -5,7 +5,6 @@ and to push them in the database.
"""
from
base
import
(
DRY_RUN
,
family_name_fr
,
fix_amu
,
format_author_fr
,
ToolException
)
...
...
modules/harvest_tools/base.py
View file @
77341193
...
...
@@ -25,29 +25,6 @@ def family_name_fr(full_name):
return
full_name
[
full_name
.
find
(
' '
)
+
1
:]
def
fix_amu
(
record
):
"""Fix the name of the C{Aix Marseille University}
@type record: L{Record}
@rtype: unicode
@return: the university names separated by comma.
"""
universities
=
record
.
these_universities
()
for
idx
in
range
(
len
(
universities
)):
if
re
.
search
(
current
.
app
.
reg_institute
,
universities
[
idx
]):
year
=
re
.
search
(
r
"(\d\d\d\d)"
,
record
.
these_defense
()).
group
(
1
)
if
int
(
year
)
<
2012
:
universities
[
idx
]
=
\
u
"Université de la Méditerrannée Aix-Marseille II"
else
:
universities
[
idx
]
=
u
"Aix Marseille Université"
return
', '
.
join
(
universities
)
def
format_author_fr
(
name
):
"""Format the author name according to French typographic rules,
I{i.e.} C{J.-P. Doe}.
...
...
modules/harvest_tools/thesis.py
View file @
77341193
...
...
@@ -6,7 +6,7 @@ import re
import
traceback
from
base
import
family_name_fr
,
fix_amu
,
MSG_CRASH
,
MSG_LOAD
from
base
import
family_name_fr
,
MSG_CRASH
,
MSG_LOAD
from
invenio_tools
import
CheckException
from
publicationstool
import
PublicationsTool
from
plugin_dbui
import
get_id
,
UNDEF_ID
...
...
@@ -39,7 +39,7 @@ class Thesis(PublicationsTool):
id_category
=
get_id
(
db
.
categories
,
code
=
'PHD'
)
oai_url
=
record
.
oai_url
()
title
=
record
.
title
()
universities
=
fix_amu
(
record
)
universities
=
', '
.
join
(
record
.
these_universities
()
)
# extract the year from the defense date
# this approach seems the most reliable
...
...
@@ -99,6 +99,7 @@ class Thesis(PublicationsTool):
self
.
check
.
oai
(
record
)
self
.
check
.
submitted
(
record
)
self
.
check
.
year
(
record
)
self
.
check
.
format_universities
(
record
)
except
CheckException
as
e
:
self
.
logs
[
-
1
].
reject
(
e
,
record
.
year
())
...
...
modules/invenio_tools/checkandfix.py
View file @
77341193
...
...
@@ -47,7 +47,7 @@ MSG_NO_AUTHOR = "Reject no author(s)"
MSG_NO_CONF
=
"Reject no conference information"
MSG_NO_COUNTRY
=
"Reject invalid country"
MSG_NO_DATE
=
"Reject no submission date"
MSG_NO_MY_AUTHOR
=
"Reject no
%s
authors"
MSG_NO_MY_AUTHOR
=
"Reject no authors
of my institute
"
MSG_NO_OAI
=
"Reject no OAI identifier"
MSG_NO_REF
=
"Reject incomplete paper reference"
MSG_NO_YEAR
=
"Reject no publication year"
...
...
@@ -74,6 +74,8 @@ REG_CONF_DATES_2 = re.compile("(\d+) *([A-Z][a-z]{2}) *-? *(\d+) *([A-Z][a-z]{2}
REG_CONF_DATES
=
re
.
compile
(
regex
.
REG_CONF_DATES
)
REG_SUBMITTED
=
re
.
compile
(
regex
.
REG_SUBMITTED
)
UNIVERSITY
=
"University"
def
load_record
(
host
,
record_id
):
"""Helper function to load a single record from an invenio store.
...
...
@@ -482,6 +484,57 @@ class CheckAndFix(object):
record
[
"773"
][
i
][
"p"
]
=
editor
record
[
"773"
][
i
][
"v"
]
=
volume
def
format_universities
(
self
,
record
):
"""Format the name of the university for PhD:
- Fix the name of Aix-Marseille University
- Replace U. by University
@type record: L{Record}
@param record:
"""
# protection
if
not
record
.
is_thesis
():
return
# CPPM: fix the name of Aix-Marseille university
if
current
.
app
.
inspirehep_institute_id
==
902989
:
year
=
REG_YEAR
.
search
(
record
.
these_defense
()).
group
(
1
)
if
int
(
year
)
<
2012
:
university
=
u
"Université de la Méditerrannée Aix-Marseille II"
else
:
university
=
u
"Aix Marseille Université"
if
u
'502'
in
record
and
"b"
in
record
[
u
'502'
]:
if
isinstance
(
record
[
u
'502'
][
'b'
],
unicode
):
if
"Marseille"
in
record
[
u
'502'
][
'b'
]:
record
[
u
'502'
][
'b'
]
=
university
elif
isinstance
(
record
[
u
'502'
][
'b'
],
list
):
for
i
in
xrange
(
len
(
record
[
u
'502'
][
'b'
])):
if
"Marseille"
in
record
[
u
'502'
][
'b'
][
i
]:
record
[
u
'502'
][
'b'
][
i
]
=
university
# Other: replace U. by University
else
:
university
=
current
.
T
(
UNIVERSITY
).
decode
(
"utf8"
)
if
u
'502'
in
record
and
"b"
in
record
[
u
'502'
]:
if
isinstance
(
record
[
u
'502'
][
'b'
],
unicode
):
value
=
record
[
u
'502'
][
'b'
]
if
"U."
in
value
:
value
=
value
.
replace
(
'U.'
,
university
)
record
[
u
'502'
][
'b'
]
=
value
elif
isinstance
(
record
[
u
'502'
][
'b'
],
list
):
for
i
in
xrange
(
len
(
record
[
u
'502'
][
'b'
])):
value
=
record
[
u
'502'
][
'b'
][
i
]
if
"U."
in
value
:
value
=
value
.
replace
(
'U.'
,
university
)
record
[
u
'502'
][
'b'
][
i
]
=
value
def
my_authors
(
self
,
record
,
reference
=
[],
cmpFct
=
None
):
"""Check that authors of my institutes signed the record.
Fill the meta data record.my_authors.
...
...
@@ -533,7 +586,7 @@ class CheckAndFix(object):
record
.
my_authors
=
s
return
raise
CheckException
(
MSG_NO_MY_AUTHOR
%
reg_institute
.
encode
(
"utf8"
)
)
raise
CheckException
(
MSG_NO_MY_AUTHOR
)
def
oai
(
self
,
record
):
"""Check that the OAI field is defined and well formed.
...
...
modules/invenio_tools/record.py
View file @
77341193
...
...
@@ -535,7 +535,7 @@ class Record(dict):
return
False
def
is_thesis
(
self
):
"""C{True} when the record correspon
f
to a thesis.
"""C{True} when the record correspon
ds
to a thesis.
@rtype: bool
@return:
...
...
tests/harvester/CheckAndFix/test_phd_cds1394605_fix.py
0 → 100644
View file @
77341193
# -*- coding: utf-8 -*-
"""PHD
http://cds.cern.ch/record/1394605
Search for New Physics in the $B^0_s
\t
o J/\psi\phi$ decay channel at LHCb
B. Khanji
16 Sep 2011
Corrections are applied to the record.
Note:
* Only the first author is defined
* Record submitted is not defined
* The year is not defined
"""
import
pytest
from
harvest_tools
import
format_author_fr
from
invenio_tools
import
CheckAndFix
,
load_record
@
pytest
.
fixture
(
scope
=
"module"
)
def
record
():
rec
=
load_record
(
'cds.cern.ch'
,
1394605
)
svc
=
CheckAndFix
()
svc
.
authors
(
rec
)
svc
.
format_authors
(
rec
,
format_author_fr
)
svc
.
my_authors
(
rec
)
svc
.
submitted
(
rec
)
svc
.
year
(
rec
)
svc
.
format_universities
(
rec
)
return
rec
def
test_authors
(
record
):
assert
record
.
authors
()
==
"B. Khanji"
def
test_first_author
(
record
):
assert
record
.
first_author
()
==
"B. Khanji"
def
test_these_defense
(
record
):
assert
record
.
these_defense
()
==
"16 Sep 2011"
def
test_these_directors
(
record
):
assert
record
.
these_directors
()
==
"R. Le Gac, O. Leroy"
def
test_is_thesis
(
record
):
assert
record
.
is_thesis
()
def
test_submitted
(
record
):
assert
record
.
submitted
()
==
[
'2011-09-16'
]
def
test_these_universities
(
record
):
assert
record
.
these_universities
()
==
[
u
"Université de la Méditerrannée Aix-Marseille II"
]
def
test_year
(
record
):
assert
record
.
year
()
==
"2011"
tests/harvester/CheckAndFix/test_phd_cds1632177
-
fix.py
→
tests/harvester/CheckAndFix/test_phd_cds1632177
_
fix.py
View file @
77341193
File moved
tests/harvester/CheckAndFix/test_phd_cds1642541_fix.py
0 → 100644
View file @
77341193
# -*- coding: utf-8 -*-
"""PHD
http://cds.cern.ch/record/1642541
Searches for B meson decays to purely leptonic final states
M. Perrin-Terrin
12 Jul 2013
Corrections are applied to the record.
Note:
* Only the first author is defined
* Record submitted is not defined
* The year is not defined
"""
import
pytest
from
harvest_tools
import
format_author_fr
from
invenio_tools
import
CheckAndFix
,
load_record
@
pytest
.
fixture
(
scope
=
"module"
)
def
record
():
rec
=
load_record
(
'cds.cern.ch'
,
1642541
)
svc
=
CheckAndFix
()
svc
.
authors
(
rec
)
svc
.
format_authors
(
rec
,
format_author_fr
)
svc
.
my_authors
(
rec
)
svc
.
submitted
(
rec
)
svc
.
year
(
rec
)
svc
.
format_universities
(
rec
)
return
rec
def
test_authors
(
record
):
assert
record
.
authors
()
==
"M. Perrin-Terrin"
def
test_first_author
(
record
):
assert
record
.
first_author
()
==
"M. Perrin-Terrin"
def
test_these_defense
(
record
):
assert
record
.
these_defense
()
==
"23 Sep 2013"
def
test_these_directors
(
record
):
assert
record
.
these_directors
()
==
"G. Mancinelli, R. Le Gac"
def
test_is_thesis
(
record
):
assert
record
.
is_thesis
()
def
test_submitted
(
record
):
assert
record
.
submitted
()
==
[
'2013-09-23'
]
def
test_these_universities
(
record
):
assert
record
.
these_universities
()
==
[
u
"Aix Marseille Université"
]
def
test_year
(
record
):
assert
record
.
year
()
==
"2013"
tests/harvester/CheckAndFix/test_phd_cds2015250_fix.py
0 → 100644
View file @
77341193
# -*- coding: utf-8 -*-
"""PHD
http://cds.cern.ch/record/2015250
"""
import
pytest
from
gluon
import
current
from
harvest_tools
import
format_author_fr
from
invenio_tools
import
CheckAndFix
,
load_record
def
test_these_universities
():
record
=
load_record
(
'cds.cern.ch'
,
2015250
)
current
.
app
.
inspirehep_institute_id
=
9999
svc
=
CheckAndFix
()
svc
.
format_universities
(
record
)
assert
record
.
these_universities
()
==
[
u
"Milan Bicocca Université"
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment