Commit 77341193 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Replace the function fix_amu by the method CheckAndFix.format_universities.

parent 009c5f94
...@@ -297,8 +297,8 @@ ...@@ -297,8 +297,8 @@
'insert new %s': 'insert new %s', 'insert new %s': 'insert new %s',
'install': 'installé', 'install': 'installé',
'Institute': 'Institut', 'Institute': 'Institut',
'Institute identifier in inspirehep.net.': 'Identifiant du laboratoire dans inspirehep.net.',
'Institute number associated to CPPM authors': "Numéro de l'Institut associé aux auteurs du CPPM", 'Institute number associated to CPPM authors': "Numéro de l'Institut associé aux auteurs du CPPM",
'Institute identifier in inspirehep.net.': "Identifiant du laboratoire dans inspirehep.net.",
'Invalid': 'Non conforme', 'Invalid': 'Non conforme',
"Invalid database table '%s'": "Invalid database table '%s'", "Invalid database table '%s'": "Invalid database table '%s'",
'Invalid email': 'Invalid email', 'Invalid email': 'Invalid email',
...@@ -486,6 +486,7 @@ ...@@ -486,6 +486,7 @@
'Reject no %s authors': "Rejeté pas d'autheur(s) du %s", 'Reject no %s authors': "Rejeté pas d'autheur(s) du %s",
'Reject no author(s)': "Rejeté pas d'autheur(s)", 'Reject no author(s)': "Rejeté pas d'autheur(s)",
'Reject no authors': "Rejeté pas d'auteurs", 'Reject no authors': "Rejeté pas d'auteurs",
'Reject no authors of my institute': "Rejeté pas d'auteurs de mon laboratoire",
'Reject no conference information': "Rejeté pas d'information sur la conférence", 'Reject no conference information': "Rejeté pas d'information sur la conférence",
'Reject no CPPM authors': "Rejeté pas d'auteurs du CPPM", 'Reject no CPPM authors': "Rejeté pas d'auteurs du CPPM",
'Reject no OAI identifier': "Rejeté pas d'identifiant OAI", 'Reject no OAI identifier': "Rejeté pas d'identifiant OAI",
......
...@@ -5,7 +5,6 @@ and to push them in the database. ...@@ -5,7 +5,6 @@ and to push them in the database.
""" """
from base import (DRY_RUN, from base import (DRY_RUN,
family_name_fr, family_name_fr,
fix_amu,
format_author_fr, format_author_fr,
ToolException) ToolException)
......
...@@ -25,29 +25,6 @@ def family_name_fr(full_name): ...@@ -25,29 +25,6 @@ def family_name_fr(full_name):
return full_name[full_name.find(' ') + 1:] return full_name[full_name.find(' ') + 1:]
def fix_amu(record):
"""Fix the name of the C{Aix Marseille University}
@type record: L{Record}
@rtype: unicode
@return: the university names separated by comma.
"""
universities = record.these_universities()
for idx in range(len(universities)):
if re.search(current.app.reg_institute, universities[idx]):
year = re.search(r"(\d\d\d\d)", record.these_defense()).group(1)
if int(year) < 2012:
universities[idx] = \
u"Université de la Méditerrannée Aix-Marseille II"
else:
universities[idx] = u"Aix Marseille Université"
return ', '.join(universities)
def format_author_fr(name): def format_author_fr(name):
"""Format the author name according to French typographic rules, """Format the author name according to French typographic rules,
I{i.e.} C{J.-P. Doe}. I{i.e.} C{J.-P. Doe}.
......
...@@ -6,7 +6,7 @@ import re ...@@ -6,7 +6,7 @@ import re
import traceback import traceback
from base import family_name_fr, fix_amu, MSG_CRASH, MSG_LOAD from base import family_name_fr, MSG_CRASH, MSG_LOAD
from invenio_tools import CheckException from invenio_tools import CheckException
from publicationstool import PublicationsTool from publicationstool import PublicationsTool
from plugin_dbui import get_id, UNDEF_ID from plugin_dbui import get_id, UNDEF_ID
...@@ -39,7 +39,7 @@ class Thesis(PublicationsTool): ...@@ -39,7 +39,7 @@ class Thesis(PublicationsTool):
id_category = get_id(db.categories, code='PHD') id_category = get_id(db.categories, code='PHD')
oai_url = record.oai_url() oai_url = record.oai_url()
title = record.title() title = record.title()
universities = fix_amu(record) universities = ', '.join(record.these_universities())
# extract the year from the defense date # extract the year from the defense date
# this approach seems the most reliable # this approach seems the most reliable
...@@ -99,6 +99,7 @@ class Thesis(PublicationsTool): ...@@ -99,6 +99,7 @@ class Thesis(PublicationsTool):
self.check.oai(record) self.check.oai(record)
self.check.submitted(record) self.check.submitted(record)
self.check.year(record) self.check.year(record)
self.check.format_universities(record)
except CheckException as e: except CheckException as e:
self.logs[-1].reject(e, record.year()) self.logs[-1].reject(e, record.year())
......
...@@ -47,7 +47,7 @@ MSG_NO_AUTHOR = "Reject no author(s)" ...@@ -47,7 +47,7 @@ MSG_NO_AUTHOR = "Reject no author(s)"
MSG_NO_CONF = "Reject no conference information" MSG_NO_CONF = "Reject no conference information"
MSG_NO_COUNTRY = "Reject invalid country" MSG_NO_COUNTRY = "Reject invalid country"
MSG_NO_DATE = "Reject no submission date" MSG_NO_DATE = "Reject no submission date"
MSG_NO_MY_AUTHOR = "Reject no %s authors" MSG_NO_MY_AUTHOR = "Reject no authors of my institute"
MSG_NO_OAI = "Reject no OAI identifier" MSG_NO_OAI = "Reject no OAI identifier"
MSG_NO_REF = "Reject incomplete paper reference" MSG_NO_REF = "Reject incomplete paper reference"
MSG_NO_YEAR = "Reject no publication year" MSG_NO_YEAR = "Reject no publication year"
...@@ -74,6 +74,8 @@ REG_CONF_DATES_2 = re.compile("(\d+) *([A-Z][a-z]{2}) *-? *(\d+) *([A-Z][a-z]{2} ...@@ -74,6 +74,8 @@ REG_CONF_DATES_2 = re.compile("(\d+) *([A-Z][a-z]{2}) *-? *(\d+) *([A-Z][a-z]{2}
REG_CONF_DATES = re.compile(regex.REG_CONF_DATES) REG_CONF_DATES = re.compile(regex.REG_CONF_DATES)
REG_SUBMITTED = re.compile(regex.REG_SUBMITTED) REG_SUBMITTED = re.compile(regex.REG_SUBMITTED)
UNIVERSITY = "University"
def load_record(host, record_id): def load_record(host, record_id):
"""Helper function to load a single record from an invenio store. """Helper function to load a single record from an invenio store.
...@@ -482,6 +484,57 @@ class CheckAndFix(object): ...@@ -482,6 +484,57 @@ class CheckAndFix(object):
record["773"][i]["p"] = editor record["773"][i]["p"] = editor
record["773"][i]["v"] = volume record["773"][i]["v"] = volume
def format_universities(self, record):
"""Format the name of the university for PhD:
- Fix the name of Aix-Marseille University
- Replace U. by University
@type record: L{Record}
@param record:
"""
# protection
if not record.is_thesis():
return
# CPPM: fix the name of Aix-Marseille university
if current.app.inspirehep_institute_id == 902989:
year = REG_YEAR.search(record.these_defense()).group(1)
if int(year) < 2012:
university = u"Université de la Méditerrannée Aix-Marseille II"
else:
university = u"Aix Marseille Université"
if u'502' in record and "b" in record[u'502']:
if isinstance(record[u'502']['b'], unicode):
if "Marseille" in record[u'502']['b']:
record[u'502']['b'] = university
elif isinstance(record[u'502']['b'], list):
for i in xrange(len(record[u'502']['b'])):
if "Marseille" in record[u'502']['b'][i]:
record[u'502']['b'][i] = university
# Other: replace U. by University
else:
university = current.T(UNIVERSITY).decode("utf8")
if u'502' in record and "b" in record[u'502']:
if isinstance(record[u'502']['b'], unicode):
value = record[u'502']['b']
if "U." in value:
value = value.replace('U.', university)
record[u'502']['b'] = value
elif isinstance(record[u'502']['b'], list):
for i in xrange(len(record[u'502']['b'])):
value = record[u'502']['b'][i]
if "U." in value:
value = value.replace('U.', university)
record[u'502']['b'][i] = value
def my_authors(self, record, reference=[], cmpFct=None): def my_authors(self, record, reference=[], cmpFct=None):
"""Check that authors of my institutes signed the record. """Check that authors of my institutes signed the record.
Fill the meta data record.my_authors. Fill the meta data record.my_authors.
...@@ -533,7 +586,7 @@ class CheckAndFix(object): ...@@ -533,7 +586,7 @@ class CheckAndFix(object):
record.my_authors = s record.my_authors = s
return return
raise CheckException(MSG_NO_MY_AUTHOR % reg_institute.encode("utf8")) raise CheckException(MSG_NO_MY_AUTHOR)
def oai(self, record): def oai(self, record):
"""Check that the OAI field is defined and well formed. """Check that the OAI field is defined and well formed.
......
...@@ -535,7 +535,7 @@ class Record(dict): ...@@ -535,7 +535,7 @@ class Record(dict):
return False return False
def is_thesis(self): def is_thesis(self):
"""C{True} when the record corresponf to a thesis. """C{True} when the record corresponds to a thesis.
@rtype: bool @rtype: bool
@return: @return:
......
# -*- coding: utf-8 -*-
"""PHD
http://cds.cern.ch/record/1394605
Search for New Physics in the $B^0_s \to J/\psi\phi$ decay channel at LHCb
B. Khanji
16 Sep 2011
Corrections are applied to the record.
Note:
* Only the first author is defined
* Record submitted is not defined
* The year is not defined
"""
import pytest
from harvest_tools import format_author_fr
from invenio_tools import CheckAndFix, load_record
@pytest.fixture(scope="module")
def record():
rec = load_record('cds.cern.ch', 1394605)
svc = CheckAndFix()
svc.authors(rec)
svc.format_authors(rec, format_author_fr)
svc.my_authors(rec)
svc.submitted(rec)
svc.year(rec)
svc.format_universities(rec)
return rec
def test_authors(record):
assert record.authors() == "B. Khanji"
def test_first_author(record):
assert record.first_author() == "B. Khanji"
def test_these_defense(record):
assert record.these_defense() == "16 Sep 2011"
def test_these_directors(record):
assert record.these_directors() == "R. Le Gac, O. Leroy"
def test_is_thesis(record):
assert record.is_thesis()
def test_submitted(record):
assert record.submitted() == ['2011-09-16']
def test_these_universities(record):
assert record.these_universities() == [u"Université de la Méditerrannée Aix-Marseille II"]
def test_year(record):
assert record.year() == "2011"
# -*- coding: utf-8 -*-
"""PHD
http://cds.cern.ch/record/1642541
Searches for B meson decays to purely leptonic final states
M. Perrin-Terrin
12 Jul 2013
Corrections are applied to the record.
Note:
* Only the first author is defined
* Record submitted is not defined
* The year is not defined
"""
import pytest
from harvest_tools import format_author_fr
from invenio_tools import CheckAndFix, load_record
@pytest.fixture(scope="module")
def record():
rec = load_record('cds.cern.ch', 1642541)
svc = CheckAndFix()
svc.authors(rec)
svc.format_authors(rec, format_author_fr)
svc.my_authors(rec)
svc.submitted(rec)
svc.year(rec)
svc.format_universities(rec)
return rec
def test_authors(record):
assert record.authors() == "M. Perrin-Terrin"
def test_first_author(record):
assert record.first_author() == "M. Perrin-Terrin"
def test_these_defense(record):
assert record.these_defense() == "23 Sep 2013"
def test_these_directors(record):
assert record.these_directors() == "G. Mancinelli, R. Le Gac"
def test_is_thesis(record):
assert record.is_thesis()
def test_submitted(record):
assert record.submitted() == ['2013-09-23']
def test_these_universities(record):
assert record.these_universities() == [u"Aix Marseille Université"]
def test_year(record):
assert record.year() == "2013"
# -*- coding: utf-8 -*-
"""PHD
http://cds.cern.ch/record/2015250
"""
import pytest
from gluon import current
from harvest_tools import format_author_fr
from invenio_tools import CheckAndFix, load_record
def test_these_universities():
record = load_record('cds.cern.ch', 2015250)
current.app.inspirehep_institute_id = 9999
svc = CheckAndFix()
svc.format_universities(record)
assert record.these_universities() == [u"Milan Bicocca Université"]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment