base.py 3.72 KB
Newer Older
1 2 3 4
# -*- coding: utf-8 -*-
""" harvest_tools.base

"""
5
from invenio_tools import REG_AUTHOR
6

7
DRY_RUN = "dry run"
8 9

MSG_CRASH = "Crash: %s"
10 11 12
MSG_FIX_ORIGIN = "Fixed the origin field"
MSG_IN_DB = "Already in the database"
MSG_LOAD = "Load in the database"
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49


def family_name_fr(full_name):
    """Extract the family name when the full name is encoded as C{J. Doe}.

    @type full_name: unicode

    @rtype: unicode

    """
    return full_name[full_name.find(' ') + 1:]


def format_author_fr(name):
    """Format the author name according to French typographic rules,
    I{i.e.} C{J.-P. Doe}.
    The name stays unchanged when the formatting failed.

    @type name: unicode
    @param name:

    @rtype: unicode

    """
    # protection
    if name == '' or name is None:
        return name

    # name are encoded Family, L
    #                  Family, P L
    #                  Family, M -H
    #                  Family Name, J
    #                  Family-Name, J
    #                  Family, F Name
    #                  Family, First
    # To avoid to deal with unicode character
    # look for non empty string \S
50
    match = REG_AUTHOR.match(name)
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130

    # reformat the name as L. Family
    # or keep it as it is
    if match:
        if match.group(3) and match.group(4):
            result = '%s.%s%s. %s' % (match.group(2)[0], match.group(3)[0],
                                      match.group(4)[0], match.group(1))

        elif "-" in match.group(2):
            li = [el[0] for el in match.group(2).split("-")]
            li.append(match.group(1))
            result = "%s.-%s. %s" % tuple(li)

        else:
            result = '%s. %s' % (match.group(2)[0], match.group(1))
    else:
        result = name

    # avoid author name in upper case (R. LE FOO --> R. Le Foo)
    result = result.title()

    return result


def learn_my_authors(db,
                     authors=None,
                     id_project=None,
                     id_team=None,
                     year=None):
    """Train the rescue list of the authors of my institute,
    stored in the database, using the list C{authors} provided in argument.

    @note: all keyword arguments have to be defined.

    @type db: gluon.dal.DAL
    @param db:

    @type authors: list
    @param authors: authors names

    @type id_project: int
    @param id_project: project identifier

    @type id_team: int
    @param id_team: team identifier

    @type year: int
    @param year:

    """
    # get the list of authors store in the database
    row = db.my_authors(id_projects=id_project,
                        id_teams=id_team,
                        year=year)

    # no entry in the database
    if not row:
        db.my_authors[0] = dict(authors=authors,
                                id_projects=id_project,
                                id_teams=id_team,
                                year=year)
        return

    database_authors = row.authors.split(', ')

    # compare with the input list
    # and extract authors which are not in the db
    new = set(authors.split(', '))
    ref = set(database_authors)
    diff = new.difference(ref)

    # update the database
    if diff:

        # NOTE1: be careful with the string encoding
        # NOTE2: handle the case J. Foo and J. M. Foo are the same person
        elems = []
        for elem in diff:
            if isinstance(elem, unicode):
                elem = elem.encode('utf8')
LE GAC Renaud's avatar
LE GAC Renaud committed
131
                family_name = elem[elem.rfind('. ') + 2:]
132 133 134 135 136 137 138 139 140

                if family_name not in row.authors:
                    elems.append(elem)

        database_authors.extend(elems)
        database_authors.sort(key=family_name_fr)
        db.my_authors[row.id] = dict(authors=', '.join(database_authors))


LE GAC Renaud's avatar
LE GAC Renaud committed
141 142
class ToolException(Exception):
    pass