filters.py 2.23 KB
Newer Older
1 2 3 4
"""a collections of functions to correct entries in the database.

"""

LE GAC Renaud's avatar
LE GAC Renaud committed
5

6 7
def CLEAN_COLLABORATION(value):
    """Correct stupid mistakes on the collaboration field.
LE GAC Renaud's avatar
LE GAC Renaud committed
8

9 10 11 12
        * No heading and trailing spaces.
        * No duplicate entries.
        * Remove entries starting with *on behalf*.
        * Collaboration always start with a Capital letter.
LE GAC Renaud's avatar
LE GAC Renaud committed
13

14 15
    Args:
        value (unicode): string where collaborations are separated by comma
LE GAC Renaud's avatar
LE GAC Renaud committed
16

17 18
    Returns:
        unicode:
LE GAC Renaud's avatar
LE GAC Renaud committed
19

20 21
    """
    li = []
LE GAC Renaud's avatar
LE GAC Renaud committed
22

LE GAC Renaud's avatar
LE GAC Renaud committed
23
    for el in value.split(","):
24 25
        # Fix to remove space at the beginning and at the end
        el = el.strip()
LE GAC Renaud's avatar
LE GAC Renaud committed
26

27
        # Fix "XXX collaboration" as "XXX Collaboration"
LE GAC Renaud's avatar
LE GAC Renaud committed
28 29 30
        el = el.replace("collaboration", "Collaboration")
        el = el.replace("consortium", "Consortium")
        el = el.replace("group", "Group")
LE GAC Renaud's avatar
LE GAC Renaud committed
31

32 33 34
        # Fix to avoid duplicate entries
        if el in li:
            continue
LE GAC Renaud's avatar
LE GAC Renaud committed
35

LE GAC Renaud's avatar
LE GAC Renaud committed
36 37
        # Fix to remove "on behalf of the LHCb Collaboration"
        if el.startswith("on behalf"):
38
            continue
LE GAC Renaud's avatar
LE GAC Renaud committed
39

40
        li.append(el)
LE GAC Renaud's avatar
LE GAC Renaud committed
41

LE GAC Renaud's avatar
LE GAC Renaud committed
42
    return ", ".join(li)
43 44


45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
def CLEAN_COLLABORATION_SYNONYM(value):
    """Correct stupid mistakes in the synonym field for collaboration
    A synonym can contain several names separated by a comma. It should follow
    standard typographic rules:

        * No heading and trailing spaces
        * One space after comma
        * One space between word

    Args:
        value (list):

    Returns:
        list:

    """
    if not isinstance(value, list):
        return value

    values = []

    for synonym in value:
        # remove leading an trailing spaces
        li = [el.strip() for el in synonym.split(",")]

        # remove ,,
        li = [el for el in li if len(el) > 0]

        # one space between word
        li = [" ".join(el.split()) for el in li]
        values.append(", ".join(li))

    return values


80
def CLEAN_THESIS_DEFENSE(value):
81
    """Correct stupid mistakes on the thesis defence field.
82

83
        * Remove prefix like *Presented*, *on*, *etc*.
LE GAC Renaud's avatar
LE GAC Renaud committed
84

85 86
    Args:
        value (unicode): string with the defence date
LE GAC Renaud's avatar
LE GAC Renaud committed
87

88 89
    Returns:
        unicode:
LE GAC Renaud's avatar
LE GAC Renaud committed
90

91
    """
LE GAC Renaud's avatar
LE GAC Renaud committed
92 93 94
    value = value.replace("Presented ", "")
    value = value.replace("presented ", "")
    value = value.replace("on ", "")
LE GAC Renaud's avatar
LE GAC Renaud committed
95
    return value