Commit 77341193 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Replace the function fix_amu by the method CheckAndFix.format_universities.

parent 009c5f94
......@@ -297,8 +297,8 @@
'insert new %s': 'insert new %s',
'install': 'installé',
'Institute': 'Institut',
'Institute identifier in inspirehep.net.': 'Identifiant du laboratoire dans inspirehep.net.',
'Institute number associated to CPPM authors': "Numéro de l'Institut associé aux auteurs du CPPM",
'Institute identifier in inspirehep.net.': "Identifiant du laboratoire dans inspirehep.net.",
'Invalid': 'Non conforme',
"Invalid database table '%s'": "Invalid database table '%s'",
'Invalid email': 'Invalid email',
......@@ -486,6 +486,7 @@
'Reject no %s authors': "Rejeté pas d'autheur(s) du %s",
'Reject no author(s)': "Rejeté pas d'autheur(s)",
'Reject no authors': "Rejeté pas d'auteurs",
'Reject no authors of my institute': "Rejeté pas d'auteurs de mon laboratoire",
'Reject no conference information': "Rejeté pas d'information sur la conférence",
'Reject no CPPM authors': "Rejeté pas d'auteurs du CPPM",
'Reject no OAI identifier': "Rejeté pas d'identifiant OAI",
......
......@@ -5,7 +5,6 @@ and to push them in the database.
"""
from base import (DRY_RUN,
family_name_fr,
fix_amu,
format_author_fr,
ToolException)
......
......@@ -25,29 +25,6 @@ def family_name_fr(full_name):
return full_name[full_name.find(' ') + 1:]
def fix_amu(record):
"""Fix the name of the C{Aix Marseille University}
@type record: L{Record}
@rtype: unicode
@return: the university names separated by comma.
"""
universities = record.these_universities()
for idx in range(len(universities)):
if re.search(current.app.reg_institute, universities[idx]):
year = re.search(r"(\d\d\d\d)", record.these_defense()).group(1)
if int(year) < 2012:
universities[idx] = \
u"Université de la Méditerrannée Aix-Marseille II"
else:
universities[idx] = u"Aix Marseille Université"
return ', '.join(universities)
def format_author_fr(name):
"""Format the author name according to French typographic rules,
I{i.e.} C{J.-P. Doe}.
......
......@@ -6,7 +6,7 @@ import re
import traceback
from base import family_name_fr, fix_amu, MSG_CRASH, MSG_LOAD
from base import family_name_fr, MSG_CRASH, MSG_LOAD
from invenio_tools import CheckException
from publicationstool import PublicationsTool
from plugin_dbui import get_id, UNDEF_ID
......@@ -39,7 +39,7 @@ class Thesis(PublicationsTool):
id_category = get_id(db.categories, code='PHD')
oai_url = record.oai_url()
title = record.title()
universities = fix_amu(record)
universities = ', '.join(record.these_universities())
# extract the year from the defense date
# this approach seems the most reliable
......@@ -99,6 +99,7 @@ class Thesis(PublicationsTool):
self.check.oai(record)
self.check.submitted(record)
self.check.year(record)
self.check.format_universities(record)
except CheckException as e:
self.logs[-1].reject(e, record.year())
......
......@@ -47,7 +47,7 @@ MSG_NO_AUTHOR = "Reject no author(s)"
MSG_NO_CONF = "Reject no conference information"
MSG_NO_COUNTRY = "Reject invalid country"
MSG_NO_DATE = "Reject no submission date"
MSG_NO_MY_AUTHOR = "Reject no %s authors"
MSG_NO_MY_AUTHOR = "Reject no authors of my institute"
MSG_NO_OAI = "Reject no OAI identifier"
MSG_NO_REF = "Reject incomplete paper reference"
MSG_NO_YEAR = "Reject no publication year"
......@@ -74,6 +74,8 @@ REG_CONF_DATES_2 = re.compile("(\d+) *([A-Z][a-z]{2}) *-? *(\d+) *([A-Z][a-z]{2}
REG_CONF_DATES = re.compile(regex.REG_CONF_DATES)
REG_SUBMITTED = re.compile(regex.REG_SUBMITTED)
UNIVERSITY = "University"
def load_record(host, record_id):
"""Helper function to load a single record from an invenio store.
......@@ -482,6 +484,57 @@ class CheckAndFix(object):
record["773"][i]["p"] = editor
record["773"][i]["v"] = volume
def format_universities(self, record):
"""Format the name of the university for PhD:
- Fix the name of Aix-Marseille University
- Replace U. by University
@type record: L{Record}
@param record:
"""
# protection
if not record.is_thesis():
return
# CPPM: fix the name of Aix-Marseille university
if current.app.inspirehep_institute_id == 902989:
year = REG_YEAR.search(record.these_defense()).group(1)
if int(year) < 2012:
university = u"Université de la Méditerrannée Aix-Marseille II"
else:
university = u"Aix Marseille Université"
if u'502' in record and "b" in record[u'502']:
if isinstance(record[u'502']['b'], unicode):
if "Marseille" in record[u'502']['b']:
record[u'502']['b'] = university
elif isinstance(record[u'502']['b'], list):
for i in xrange(len(record[u'502']['b'])):
if "Marseille" in record[u'502']['b'][i]:
record[u'502']['b'][i] = university
# Other: replace U. by University
else:
university = current.T(UNIVERSITY).decode("utf8")
if u'502' in record and "b" in record[u'502']:
if isinstance(record[u'502']['b'], unicode):
value = record[u'502']['b']
if "U." in value:
value = value.replace('U.', university)
record[u'502']['b'] = value
elif isinstance(record[u'502']['b'], list):
for i in xrange(len(record[u'502']['b'])):
value = record[u'502']['b'][i]
if "U." in value:
value = value.replace('U.', university)
record[u'502']['b'][i] = value
def my_authors(self, record, reference=[], cmpFct=None):
"""Check that authors of my institutes signed the record.
Fill the meta data record.my_authors.
......@@ -533,7 +586,7 @@ class CheckAndFix(object):
record.my_authors = s
return
raise CheckException(MSG_NO_MY_AUTHOR % reg_institute.encode("utf8"))
raise CheckException(MSG_NO_MY_AUTHOR)
def oai(self, record):
"""Check that the OAI field is defined and well formed.
......
......@@ -535,7 +535,7 @@ class Record(dict):
return False
def is_thesis(self):
"""C{True} when the record corresponf to a thesis.
"""C{True} when the record corresponds to a thesis.
@rtype: bool
@return:
......
# -*- coding: utf-8 -*-
"""PHD
http://cds.cern.ch/record/1394605
Search for New Physics in the $B^0_s \to J/\psi\phi$ decay channel at LHCb
B. Khanji
16 Sep 2011
Corrections are applied to the record.
Note:
* Only the first author is defined
* Record submitted is not defined
* The year is not defined
"""
import pytest
from harvest_tools import format_author_fr
from invenio_tools import CheckAndFix, load_record
@pytest.fixture(scope="module")
def record():
rec = load_record('cds.cern.ch', 1394605)
svc = CheckAndFix()
svc.authors(rec)
svc.format_authors(rec, format_author_fr)
svc.my_authors(rec)
svc.submitted(rec)
svc.year(rec)
svc.format_universities(rec)
return rec
def test_authors(record):
assert record.authors() == "B. Khanji"
def test_first_author(record):
assert record.first_author() == "B. Khanji"
def test_these_defense(record):
assert record.these_defense() == "16 Sep 2011"
def test_these_directors(record):
assert record.these_directors() == "R. Le Gac, O. Leroy"
def test_is_thesis(record):
assert record.is_thesis()
def test_submitted(record):
assert record.submitted() == ['2011-09-16']
def test_these_universities(record):
assert record.these_universities() == [u"Université de la Méditerrannée Aix-Marseille II"]
def test_year(record):
assert record.year() == "2011"
# -*- coding: utf-8 -*-
"""PHD
http://cds.cern.ch/record/1642541
Searches for B meson decays to purely leptonic final states
M. Perrin-Terrin
12 Jul 2013
Corrections are applied to the record.
Note:
* Only the first author is defined
* Record submitted is not defined
* The year is not defined
"""
import pytest
from harvest_tools import format_author_fr
from invenio_tools import CheckAndFix, load_record
@pytest.fixture(scope="module")
def record():
rec = load_record('cds.cern.ch', 1642541)
svc = CheckAndFix()
svc.authors(rec)
svc.format_authors(rec, format_author_fr)
svc.my_authors(rec)
svc.submitted(rec)
svc.year(rec)
svc.format_universities(rec)
return rec
def test_authors(record):
assert record.authors() == "M. Perrin-Terrin"
def test_first_author(record):
assert record.first_author() == "M. Perrin-Terrin"
def test_these_defense(record):
assert record.these_defense() == "23 Sep 2013"
def test_these_directors(record):
assert record.these_directors() == "G. Mancinelli, R. Le Gac"
def test_is_thesis(record):
assert record.is_thesis()
def test_submitted(record):
assert record.submitted() == ['2013-09-23']
def test_these_universities(record):
assert record.these_universities() == [u"Aix Marseille Université"]
def test_year(record):
assert record.year() == "2013"
# -*- coding: utf-8 -*-
"""PHD
http://cds.cern.ch/record/2015250
"""
import pytest
from gluon import current
from harvest_tools import format_author_fr
from invenio_tools import CheckAndFix, load_record
def test_these_universities():
record = load_record('cds.cern.ch', 2015250)
current.app.inspirehep_institute_id = 9999
svc = CheckAndFix()
svc.format_universities(record)
assert record.these_universities() == [u"Milan Bicocca Université"]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment