filters.py 1.96 KB
Newer Older
1 2 3 4 5 6 7 8 9
"""a collections of functions to correct entries in the database.

@author: R. Le Gac

"""

def CLEAN_COLLABORATION(value):
    """Correct stupid mistakes on the collaboration field.
    
10 11 12 13 14
        - No heading and trailing spaces
        - No duplicate entries
        - Remove entries starting with C{on behalf}
        - Collaboration always start with a Capital letter
        
15 16 17 18 19 20 21 22 23 24 25 26 27 28
    @type value: str
    @param value: string where collaborations are separated by comma
    
    @rtype: str
    
    """
    li = []
    
    for el in value.split(','):
        # Fix to remove space at the beginning and at the end
        el = el.strip()
        
        # Fix "XXX collaboration" as "XXX Collaboration"
        el = el.replace('collaboration', 'Collaboration')
29 30
        el = el.replace('consortium', 'Consortium')
        el = el.replace('group', 'Group')
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
    
        # Fix to avoid duplicate entries
        if el in li:
            continue
    
        # Fix to remove 'on behalf of the LHCb Collaboration'
        if el.startswith('on behalf'):
            continue
        
        li.append(el)
        
    return ', '.join(li)


def CLEAN_REVIEW(value):
    """Correct stupid mistakes on the paper_editor field.
    
48 49 50
        - Remove dot and coma
        - No heading and trailing spaces
        
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
    @type value: str
    @param value: review abbreviation
    
    @rtype: str
    
    """
    # Fix to remove dot and comma
    value = value.replace(".", "").replace(",", "")
        
    # Fix to have only one space between word
    value = ' '.join(value.split())
    
    return value    


def CLEAN_THESIS_DEFENSE(value):
    """Correct stupid mistakes on the thesis_defense field.
68 69 70

        - Remove prefix like C{Presented}, C{on}, ...
            
71 72 73 74 75 76 77
    @type value: str
    @param value: string with the defense date
    
    @rtype: str
    
    """
    value = value.replace('Presented ', '')
78
    value = value.replace('presented ', '')
79 80
    value = value.replace('on ', '')
    return value