package no.priv.garshol.duke.cleaners;
import no.priv.garshol.duke.cleaners.RegexpCleaner;
import org.junit.Test;
import static junit.framework.Assert.assertEquals;
public class RegexpCleanerTest extends LowerCaseNormalizeCleanerTest {
@Test
public void testNoMatch() {
test("^(\\d\\d\\d\\d)-", "gurble", null);
}
@Test
public void testStartYear() {
test("^(\\d\\d\\d\\d)-", "1850-1888", "1850");
}
@Test
public void testEndYear() {
test("-(\\d\\d\\d\\d)$", "1850-1888", "1888");
}
@Test
public void discardSecondGroup() {
RegexpCleaner cl = new RegexpCleaner();
cl.setDiscardGroup(true);
cl.setGroup(2);
cl.setRegexp("([a-zA-Z])(\\d+)");
assertEquals("IDontLikeDigitsBut53inTheEndIsOk", cl.clean("ID42ontLikeDigitsBut53inTheEndIsOk"));
}
@Test
public void discardAll() {
RegexpCleaner cl = new RegexpCleaner();
cl.setDiscardGroup(false); //independent of discard flag
cl.setDiscardAllGroup(true);
cl.setRegexp("(\\d+)");
assertEquals("IDontLikeDigits $", cl.clean("I123Dont454Like450Di3gits 4234 0234$"));
}
@Test
public void discardAllSecondGroup() {
RegexpCleaner cl = new RegexpCleaner();
cl.setDiscardAllGroup(true);
cl.setGroup(2);
cl.setRegexp("([A-Z])(\\d+\\s?)");
assertEquals("This is DUKE",cl.clean("This is D1 U312 K1231 E4332"));
}
private void test(String regexp, String value, String result) {
RegexpCleaner cl = new RegexpCleaner();
cl.setRegexp(regexp);
assertEquals(result, cl.clean(value));
}
}