"""a collections of functions to correct entries in the database. """ def CLEAN_COLLABORATION(value): """Correct stupid mistakes on the collaboration field. * No heading and trailing spaces. * No duplicate entries. * Remove entries starting with *on behalf*. * Collaboration always start with a Capital letter. Args: value (unicode): string where collaborations are separated by comma Returns: unicode: """ li = [] for el in value.split(","): # Fix to remove space at the beginning and at the end el = el.strip() # Fix "XXX collaboration" as "XXX Collaboration" el = el.replace("collaboration", "Collaboration") el = el.replace("consortium", "Consortium") el = el.replace("group", "Group") # Fix to avoid duplicate entries if el in li: continue # Fix to remove "on behalf of the LHCb Collaboration" if el.startswith("on behalf"): continue li.append(el) return ", ".join(li) def CLEAN_COLLABORATION_SYNONYM(value): """Correct stupid mistakes in the synonym field for collaboration A synonym can contain several names separated by a comma. It should follow standard typographic rules: * No heading and trailing spaces * One space after comma * One space between word Args: value (list): Returns: list: """ if not isinstance(value, list): return value values = [] for synonym in value: # remove leading an trailing spaces li = [el.strip() for el in synonym.split(",")] # remove ,, li = [el for el in li if len(el) > 0] # one space between word li = [" ".join(el.split()) for el in li] values.append(", ".join(li)) return values def CLEAN_THESIS_DEFENSE(value): """Correct stupid mistakes on the thesis defence field. * Remove prefix like *Presented*, *on*, *etc*. Args: value (unicode): string with the defence date Returns: unicode: """ value = value.replace("Presented ", "") value = value.replace("presented ", "") value = value.replace("on ", "") return value