package pl.edu.icm.saos.common.util; import org.apache.commons.lang3.text.WordUtils; import com.google.common.base.Preconditions; /** * @author Łukasz Dumiszewski */ public final class PersonNameNormalizer { //------------------------ CONSTRUCTORS -------------------------- private PersonNameNormalizer() { throw new IllegalStateException("may not be instantiated"); } //------------------------ LOGIC -------------------------- /** * Normalizes the given firstLastName, i.e. removes white-spaces, capitalize first letters (using Locale.ROOT) and removes * non-alphabetic characters <br/><br/> * * <pre> * normalize("%, Jan \t kowalski") -> "Jan Kowalski" * </pre> */ public static String normalize(String firstLastName) { Preconditions.checkNotNull(firstLastName); firstLastName = unify(firstLastName); firstLastName = removeNonAlphabetic(firstLastName); firstLastName = StringTools.squashAndTrim(firstLastName); // after removing nonAlphabetic characters there can be new long spaces return capitalizeFirstLetters(firstLastName); } /** * Returns the given first last name unified. Performs the given operations: * <ul> * <li>{@link StringTools#toRootLowerCase(String)}</li> * <li>{@link StringTools#squashAndTrimm(String)}</li> * <li>{@link #replaceLongDashWithShort(String)}</li> * </ul> */ public static String unify(String firstLastName) { firstLastName = StringTools.toRootLowerCase(firstLastName); firstLastName = StringTools.squashAndTrim(firstLastName); firstLastName = replaceLongDashWithShort(firstLastName); return firstLastName; } //------------------------ PRIVATE -------------------------- private static String removeNonAlphabetic(String value) { return value.replaceAll("[^\\p{L} \\-\\.]+", ""); } private static String capitalizeFirstLetters(String value) { value = WordUtils.capitalize(value); value = WordUtils.capitalize(value, '-'); value = value.replace(" Von ", " von "); return value; } private static String replaceLongDashWithShort(String value) { return value.replaceAll("–", "-").replaceAll("\\s*\\-\\s*", "-"); } }