package net.yacy.crawler; import java.io.File; import java.io.IOException; import java.util.Iterator; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.util.SpaceExceededException; import net.yacy.crawler.retrieval.Request; import net.yacy.crawler.robots.RobotsTxt; import net.yacy.data.WorkTables; import static net.yacy.kelondro.util.FileUtils.deletedelete; import org.junit.Test; import static org.junit.Assert.*; public class HostBalancerTest { final File queuesRoot = new File("test/DATA/INDEX/QUEUES"); final File datadir = new File("test/DATA"); /** * Test of reopen existing HostBalancer cache to test/demonstrate issue with * HostQueue for file: protocol */ @Test public void testReopen() throws IOException, SpaceExceededException, InterruptedException { boolean exceed134217727 = true; int onDemandLimit = 1000; String hostDir = "C:\\filedirectory"; // prepare one urls for push test String urlstr = "file:///" + hostDir; DigestURL url = new DigestURL(urlstr); Request req = new Request(url, null); deletedelete(queuesRoot); // start clean test HostBalancer hb = new HostBalancer(queuesRoot, onDemandLimit, exceed134217727); Thread.sleep(100); // wait for file operation hb.clear(); Thread.sleep(100); assertEquals("After clear", 0, hb.size()); WorkTables wt = new WorkTables(datadir); RobotsTxt rob = new RobotsTxt(wt, null, 10); String res = hb.push(req, null, rob); // push url assertNull(res); // should have no error text assertTrue(hb.has(url.hash())); // check existence assertEquals("first push of one url", 1, hb.size()); // expected size=1 res = hb.push(req, null, rob); // push same url (should be rejected = double occurence) assertNotNull(res); // should state double occurrence assertTrue(hb.has(url.hash())); assertEquals("second push of same url", 1, hb.size()); hb.close(); // close Thread.sleep(200); // wait a bit for file operation hb = new HostBalancer(queuesRoot, onDemandLimit, exceed134217727); // reopen balancer Thread.sleep(200); // wait a bit for file operation assertEquals("size after reopen (with one existing url)", 1, hb.size()); // expect size=1 from previous push assertTrue("check existance of pushed url", hb.has(url.hash())); // check url exists (it fails as after reopen internal queue.hosthash is wrong) res = hb.push(req, null, rob); // push same url as before (should be rejected, but isn't due to hosthash mismatch afte reopen) assertNotNull("should state double occurence", res); assertEquals("first push of same url after reopen", 1, hb.size()); // should stay size=1 assertTrue("check existance of pushed url", hb.has(url.hash())); res = hb.push(req, null, rob); assertNotNull("should state double occurence", res); assertTrue("check existance of pushed url", hb.has(url.hash())); assertEquals("second push of same url after reopen", 1, hb.size()); // double check, should stay size=1 // list all urls in hostbalancer Iterator<Request> it = hb.iterator(); while (it.hasNext()) { Request rres = it.next(); System.out.println(rres.toString()); } hb.close(); } }