package no.priv.garshol.duke.cleaners; import no.priv.garshol.duke.Cleaner; /** * A cleaner which removes non-text characters. Specifically it strips * control characters (0-0x1F, 0x7F-0x9F) and special symbols in the * range 0xA1-0xBF. */ public class StripNontextCharacters implements Cleaner { public String clean(String value) { char[] tmp = new char[value.length()]; int pos = 0; for (int ix = 0; ix < value.length(); ix++) { char ch = value.charAt(ix); if (ch < 0x20 || (ch >= 0x7F && ch < 0xA0) || (ch > 0xA0 && ch < 0xC0)) continue; // skip Euro symbol, soft hyphen, etc etc tmp[pos++] = ch; } return new String(tmp, 0, pos); } }