filters.py 2.25 KB
Newer Older
LE GAC Renaud's avatar
LE GAC Renaud committed
1
# -*- coding: utf-8 -*-
2 3 4 5
"""a collections of functions to correct entries in the database.

"""

LE GAC Renaud's avatar
LE GAC Renaud committed
6

7 8
def CLEAN_COLLABORATION(value):
    """Correct stupid mistakes on the collaboration field.
LE GAC Renaud's avatar
LE GAC Renaud committed
9

10 11 12 13
        * No heading and trailing spaces.
        * No duplicate entries.
        * Remove entries starting with *on behalf*.
        * Collaboration always start with a Capital letter.
LE GAC Renaud's avatar
LE GAC Renaud committed
14

15 16
    Args:
        value (unicode): string where collaborations are separated by comma
LE GAC Renaud's avatar
LE GAC Renaud committed
17

18 19
    Returns:
        unicode:
LE GAC Renaud's avatar
LE GAC Renaud committed
20

21 22
    """
    li = []
LE GAC Renaud's avatar
LE GAC Renaud committed
23

24 25 26
    for el in value.split(','):
        # Fix to remove space at the beginning and at the end
        el = el.strip()
LE GAC Renaud's avatar
LE GAC Renaud committed
27

28 29
        # Fix "XXX collaboration" as "XXX Collaboration"
        el = el.replace('collaboration', 'Collaboration')
30 31
        el = el.replace('consortium', 'Consortium')
        el = el.replace('group', 'Group')
LE GAC Renaud's avatar
LE GAC Renaud committed
32

33 34 35
        # Fix to avoid duplicate entries
        if el in li:
            continue
LE GAC Renaud's avatar
LE GAC Renaud committed
36

37 38 39
        # Fix to remove 'on behalf of the LHCb Collaboration'
        if el.startswith('on behalf'):
            continue
LE GAC Renaud's avatar
LE GAC Renaud committed
40

41
        li.append(el)
LE GAC Renaud's avatar
LE GAC Renaud committed
42

43 44 45
    return ', '.join(li)


46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
def CLEAN_COLLABORATION_SYNONYM(value):
    """Correct stupid mistakes in the synonym field for collaboration
    A synonym can contain several names separated by a comma. It should follow
    standard typographic rules:

        * No heading and trailing spaces
        * One space after comma
        * One space between word

    Args:
        value (list):

    Returns:
        list:

    """
    if not isinstance(value, list):
        return value

    values = []

    for synonym in value:
        # remove leading an trailing spaces
        li = [el.strip() for el in synonym.split(",")]

        # remove ,,
        li = [el for el in li if len(el) > 0]

        # one space between word
        li = [" ".join(el.split()) for el in li]
        values.append(", ".join(li))

    return values


81
def CLEAN_THESIS_DEFENSE(value):
82
    """Correct stupid mistakes on the thesis defence field.
83

84
        * Remove prefix like *Presented*, *on*, *etc*.
LE GAC Renaud's avatar
LE GAC Renaud committed
85

86 87
    Args:
        value (unicode): string with the defence date
LE GAC Renaud's avatar
LE GAC Renaud committed
88

89 90
    Returns:
        unicode:
LE GAC Renaud's avatar
LE GAC Renaud committed
91

92 93
    """
    value = value.replace('Presented ', '')
94
    value = value.replace('presented ', '')
95
    value = value.replace('on ', '')
LE GAC Renaud's avatar
LE GAC Renaud committed
96
    return value