Commit aa080ce1 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Redesign the function recordpubli.to_intial.

parent 78090b4d
......@@ -30,33 +30,37 @@ MSG_INVALID_FMT = "Invalid format for author"
# the keys containing paper reference
PAPER_REFERENCE_KEYS = {"pagination", "title", "volume", "year"}
# extract initial of a first name
REG_INITIAL = initial = r"^(\w+)\.?(\-)* *(\w+)*\.?$"
def to_initial(name):
"""Help function to extract initial from a first name:
def to_initial(x, y, z):
"""Help function to extract initial from a first name split in x, y and z:
Albert (x="Albert", y="", z="")
Antonio Augusto (x="Antonio", y="", z="Augusto")
Jean-Pierre (x="Jean", y="-", z="Pierre")
+------------------+----------+
| name | initial |
+------------------+----------+
| Albert | A. |
| Antonio Augusto | A. A. |
| Kristof Antoon M | K. A. M. |
| Jean-Pierre | J.-P. |
| Marie-Hélène | M.-H. |
+------------------+----------+
Args:
x (str): first part
y (str): separator
z (str): second part
name (unicode):
Returns:
str
unicode
"""
if z == "":
return "%s." % x[0:1]
if len(name) == 0:
return u""
if y == "":
return "%s. %s." % (x[0:1], z[0:1])
else:
return "%s.%s%s." % (x[0:1], y[0:1], z[0:1])
li = []
for el in re.finditer(r"(\w+|-)", name, re.UNICODE):
val = el.group(1)[0:1]
val = (val if val == u"-" else "%s." % val)
li.append(val)
return (u"".join(li) if "-" in li else u" ".join(li))
def to_str(x):
......@@ -710,11 +714,9 @@ class RecordPubli(Record):
#
if fmt in ("F. Last", "Last F."):
dfm = (df.first_name.str.extract(REG_INITIAL, expand=True)
.fillna(""))
df["initial"] = dfm.apply(
lambda x: to_initial(x[0], x[1], x[2]), axis="columns")
df["initial"] = (df.first_name
.fillna("")
.apply(to_initial))
# ....................................................................
#
......
......@@ -26,6 +26,7 @@ import pandas as pd
import pytest
from invenio_tools import load_record
from invenio_tools.recordpubli import to_initial
@pytest.fixture(scope="module")
......@@ -67,6 +68,15 @@ def test_constructor(reccds):
assert paper.pagination == "P12005"
def test_to_initial():
assert to_initial(u"Albert") == u"A."
assert to_initial(u"Antonio Augusto") == u"A. A."
assert to_initial(u"Kristof Antoon M") == u"K. A. M."
assert to_initial(u"Jean-Pierre") == u"J.-P."
assert to_initial(u"Marie-Hélène") == u"M.-H."
assert to_initial(u"Marie - Pierre") == u"M.-P."
# ............................................................................
#
# Section devoted to authors
......
......@@ -38,6 +38,21 @@ def test_acti_cds1411352(svc):
assert reccds.submitted() == "2012-01-05"
def test_acti_cds2270937(svc):
"""
JSON:
* Author name not well formatted when using F. Last.
* More general design of the function recordpubli.to_initial
"""
reccds = load_record('cds.cern.ch', 2270937)
assert reccds.first_author() == "De Bruyn, Kristof Antoon M"
svc.format_authors(reccds, fmt="F. Last")
assert reccds.first_author() == "K. A. M. De Bruyn"
def test_acti_ins1276938(svc):
"""
XML:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment