filters.py 1.85 KB
Newer Older
1 2 3 4 5 6
"""a collections of functions to correct entries in the database.

@author: R. Le Gac

"""

LE GAC Renaud's avatar
LE GAC Renaud committed
7

8 9
def CLEAN_COLLABORATION(value):
    """Correct stupid mistakes on the collaboration field.
LE GAC Renaud's avatar
LE GAC Renaud committed
10

11 12 13 14
        - No heading and trailing spaces
        - No duplicate entries
        - Remove entries starting with C{on behalf}
        - Collaboration always start with a Capital letter
LE GAC Renaud's avatar
LE GAC Renaud committed
15

16 17
    @type value: str
    @param value: string where collaborations are separated by comma
LE GAC Renaud's avatar
LE GAC Renaud committed
18

19
    @rtype: str
LE GAC Renaud's avatar
LE GAC Renaud committed
20

21 22
    """
    li = []
LE GAC Renaud's avatar
LE GAC Renaud committed
23

24 25 26
    for el in value.split(','):
        # Fix to remove space at the beginning and at the end
        el = el.strip()
LE GAC Renaud's avatar
LE GAC Renaud committed
27

28 29
        # Fix "XXX collaboration" as "XXX Collaboration"
        el = el.replace('collaboration', 'Collaboration')
30 31
        el = el.replace('consortium', 'Consortium')
        el = el.replace('group', 'Group')
LE GAC Renaud's avatar
LE GAC Renaud committed
32

33 34 35
        # Fix to avoid duplicate entries
        if el in li:
            continue
LE GAC Renaud's avatar
LE GAC Renaud committed
36

37 38 39
        # Fix to remove 'on behalf of the LHCb Collaboration'
        if el.startswith('on behalf'):
            continue
LE GAC Renaud's avatar
LE GAC Renaud committed
40

41
        li.append(el)
LE GAC Renaud's avatar
LE GAC Renaud committed
42

43 44 45 46 47
    return ', '.join(li)


def CLEAN_REVIEW(value):
    """Correct stupid mistakes on the paper_editor field.
LE GAC Renaud's avatar
LE GAC Renaud committed
48

49 50
        - Remove dot and coma
        - No heading and trailing spaces
LE GAC Renaud's avatar
LE GAC Renaud committed
51

52 53
    @type value: str
    @param value: review abbreviation
LE GAC Renaud's avatar
LE GAC Renaud committed
54

55
    @rtype: str
LE GAC Renaud's avatar
LE GAC Renaud committed
56

57 58 59
    """
    # Fix to remove dot and comma
    value = value.replace(".", "").replace(",", "")
LE GAC Renaud's avatar
LE GAC Renaud committed
60

61 62
    # Fix to have only one space between word
    value = ' '.join(value.split())
LE GAC Renaud's avatar
LE GAC Renaud committed
63 64

    return value
65 66 67 68


def CLEAN_THESIS_DEFENSE(value):
    """Correct stupid mistakes on the thesis_defense field.
69 70

        - Remove prefix like C{Presented}, C{on}, ...
LE GAC Renaud's avatar
LE GAC Renaud committed
71

72 73
    @type value: str
    @param value: string with the defense date
LE GAC Renaud's avatar
LE GAC Renaud committed
74

75
    @rtype: str
LE GAC Renaud's avatar
LE GAC Renaud committed
76

77 78
    """
    value = value.replace('Presented ', '')
79
    value = value.replace('presented ', '')
80
    value = value.replace('on ', '')
LE GAC Renaud's avatar
LE GAC Renaud committed
81
    return value