package net.yacy.repository; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.regex.Pattern; import org.junit.Assert; import org.junit.Test; import static org.junit.Assert.*; public class BlacklistTest { /** * Simulates contains method, of class Blacklist as proof for pattern.toString * needed and works */ @Test public void testContains() { String path = ".*"; // simplest test pattern Pattern pattern = Pattern.compile(path, Pattern.CASE_INSENSITIVE); // pattern list as in Blacklist class // ConcurrentMap<BlacklistType, Map<String, Set<Pattern>>> hostpaths_matchable; // simulate last part, path pattern set Set<Pattern> hostList = new HashSet<Pattern>(); hostList.add(pattern); // proof assumption pattern(path) != path boolean ret = hostList.contains(path); assertFalse("match blacklist pattern " + path, ret); // proof pattern.toString match works for (Pattern hp : hostList) { String hpxs = hp.pattern(); if (hpxs.equals(path)) { ret = true; break; } } assertTrue("match blacklist pattern " + path, ret); } /** * Tests static Blacklist.isListed() function with some sample patterns. */ @Test public void testIsListed() { final Map<String, Set<Pattern>> blacklistMapMatched = new HashMap<>(); Set<Pattern> patterns = new HashSet<>(); patterns.add(Pattern.compile(".*")); blacklistMapMatched.put("stats.example.com", patterns); patterns = new HashSet<>(); patterns.add(Pattern.compile(".*")); blacklistMapMatched.put("site.blacklisted.net", patterns); patterns = new HashSet<>(); patterns.add(Pattern.compile("data/js/\\d*\\.js")); blacklistMapMatched.put("js.blacklisted.org", patterns); patterns = new HashSet<>(); patterns.add(Pattern.compile(".*")); blacklistMapMatched.put("ftp.*", patterns); patterns = new HashSet<>(); patterns.add(Pattern.compile("bestenlisten/.*")); patterns.add(Pattern.compile("produkte/.*")); blacklistMapMatched.put("esample.de", patterns); final Map<String, Set<Pattern>> blacklistMapNotMatched = new HashMap<>(); patterns = new HashSet<>(); patterns.add(Pattern.compile(".*")); blacklistMapNotMatched.put("mobil\\..*", patterns); patterns = new HashSet<>(); patterns.add(Pattern.compile("counter\\?.*")); blacklistMapNotMatched.put(".*samples.fr", patterns); patterns = new HashSet<>(); patterns.add(Pattern.compile(".*\\.js")); patterns.add(Pattern.compile(".*\\.jpg")); patterns.add(Pattern.compile(".*BannerAd.*")); // Form "(.*/|)term.*" should be preferred over "(.*/)*term.*" which is consuming far too much CPU on JDK 7 and URLs with many path segments patterns.add(Pattern.compile("(.*/|)search.*")); patterns.add(Pattern.compile("(.*/|)bizad.*")); patterns.add(Pattern.compile("(.*/|)member/.*")); blacklistMapNotMatched.put(".*.*", patterns); Assert.assertTrue(Blacklist.isListed("site.blacklisted.net", "", blacklistMapMatched, blacklistMapNotMatched)); Assert.assertTrue(Blacklist.isListed("site.blacklisted.net", "/index.html", blacklistMapMatched, blacklistMapNotMatched)); Assert.assertTrue(Blacklist.isListed("mobil.news.fr", "/index.htm", blacklistMapMatched, blacklistMapNotMatched)); Assert.assertTrue(Blacklist.isListed("mobil.news.fr", "/news/latest.html", blacklistMapMatched, blacklistMapNotMatched)); Assert.assertTrue(Blacklist.isListed("fr.notblacklisted.org", "/script.js", blacklistMapMatched, blacklistMapNotMatched)); Assert.assertTrue(Blacklist.isListed("fr.notblacklisted.org", "/js/script.js", blacklistMapMatched, blacklistMapNotMatched)); Assert.assertFalse(Blacklist.isListed("fr.notblacklisted.org", "/index.html", blacklistMapMatched, blacklistMapNotMatched)); Assert.assertFalse(Blacklist.isListed("js.blacklisted.org", "/index.html", blacklistMapMatched, blacklistMapNotMatched)); Assert.assertTrue(Blacklist.isListed("fr.notblacklisted.org", "/search.html", blacklistMapMatched, blacklistMapNotMatched)); Assert.assertTrue(Blacklist.isListed("fr.notblacklisted.org", "/aa/search.html", blacklistMapMatched, blacklistMapNotMatched)); Assert.assertTrue(Blacklist.isListed("fr.notblacklisted.org", "/aa/bb/search.html", blacklistMapMatched, blacklistMapNotMatched)); Assert.assertTrue(Blacklist.isListed("fr.notblacklisted.org", "/aa/bb/search/index.html", blacklistMapMatched, blacklistMapNotMatched)); Assert.assertTrue(Blacklist.isListed("fr.notblacklisted.org", "/search/index.html", blacklistMapMatched, blacklistMapNotMatched)); Assert.assertTrue(Blacklist.isListed("fr.notblacklisted.org", "/searchengine/index.html", blacklistMapMatched, blacklistMapNotMatched)); Assert.assertTrue(Blacklist.isListed("fr.notblacklisted.org", "/searchengine", blacklistMapMatched, blacklistMapNotMatched)); Assert.assertTrue(Blacklist.isListed("fr.notblacklisted.org", "/aaa/searchengine", blacklistMapMatched, blacklistMapNotMatched)); Assert.assertFalse(Blacklist.isListed("fr.notblacklisted.org", "/thesearch.html", blacklistMapMatched, blacklistMapNotMatched)); Assert.assertFalse(Blacklist.isListed("fr.notblacklisted.org", "/aa/thesearch.html", blacklistMapMatched, blacklistMapNotMatched)); Assert.assertFalse(Blacklist.isListed("fr.notblacklisted.org", "/path/with/many/segments/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/file.html", blacklistMapMatched, blacklistMapNotMatched)); } }