filters.py 1.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
"""a collections of functions to correct entries in the database.

@author: R. Le Gac

"""

def CLEAN_COLLABORATION(value):
    """Correct stupid mistakes on the collaboration field.
    
    @type value: str
    @param value: string where collaborations are separated by comma
    
    @rtype: str
    
    """
    li = []
    
    for el in value.split(','):
        # Fix to remove space at the beginning and at the end
        el = el.strip()
        
        # Fix "XXX collaboration" as "XXX Collaboration"
        el = el.replace('collaboration', 'Collaboration')
    
        # Fix to avoid duplicate entries
        if el in li:
            continue
    
        # Fix to remove 'on behalf of the LHCb Collaboration'
        if el.startswith('on behalf'):
            continue
        
        li.append(el)
        
    return ', '.join(li)


def CLEAN_REVIEW(value):
    """Correct stupid mistakes on the paper_editor field.
    
    @type value: str
    @param value: review abbreviation
    
    @rtype: str
    
    """
    # Fix to remove dot and comma
    value = value.replace(".", "").replace(",", "")
        
    # Fix to have only one space between word
    value = ' '.join(value.split())
    
    return value    


def CLEAN_THESIS_DEFENSE(value):
    """Correct stupid mistakes on the thesis_defense field.
    
    @type value: str
    @param value: string with the defense date
    
    @rtype: str
    
    """
    value = value.replace('Presented ', '')
    value = value.replace('on ', '')
    return value