/** * ReferenceContainerTest * part of YaCy * Copyright 2016 by reger24; https://github.com/reger24 * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program in the file lgpl21.txt * If not, see <http://www.gnu.org/licenses/>. */ package net.yacy.kelondro.rwi; import java.util.Queue; import java.util.concurrent.LinkedBlockingQueue; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.crawler.retrieval.Response; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.WordReference; import net.yacy.kelondro.data.word.WordReferenceFactory; import net.yacy.kelondro.data.word.WordReferenceVars; import net.yacy.kelondro.util.Bitfield; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import org.junit.Test; /** * Unit tests for ReferenceContainer class. * * @author reger24 */ public class ReferenceContainerTest { /** * Test of add method, of class ReferenceContainer. this also demonstrates a * issue with word.distance() used in ranking */ @Test public void testAdd() throws Exception { ReferenceFactory<WordReference> wordReferenceFactory = new WordReferenceFactory(); byte[] termHash = Word.word2hash("test"); ReferenceContainer<WordReference> rc = new ReferenceContainer(wordReferenceFactory, termHash); // prepare a WordReference to be added to the container DigestURL url = new DigestURL("http://test.org/test.html"); int urlComps = MultiProtocolURL.urlComps(url.toNormalform(true)).length; int urlLength = url.toNormalform(true).length(); Queue<Integer> positions = new LinkedBlockingQueue<Integer>(); positions.add(10); WordReferenceVars wentry = new WordReferenceVars( url.hash(), urlLength, // byte-length of complete URL urlComps, // number of path components 0, // length of description/length (longer are better?) 1, // how often appears this word in the text 1, // total number of words 1, // total number of phrases 1, // first position of word in text positions, // positions of words that are joined into the reference 1, // position of word in its phrase 1, // number of the phrase where word appears 0, // last-modified time of the document where word appears "en", // (guessed) language of document Response.DT_TEXT, // type of document 0, // outlinks to same domain 0, // outlinks to other domain new Bitfield(4), // attributes to the url and to the word according the url 0.0d ); rc.add(wentry); // add the ref assertTrue("size after add", rc.size() > 0); WordReference wc = rc.getReference(url.hash()); // retrieve the ref assertNotNull("getReference failed", wc); // TODO: ReferenceContainer used for rwi results. As distance doesn't persist after adding ref to container making the distance ranking obsolete -> remove or fix System.out.println("-----------------------------------------------------------"); System.out.println("WordReference (word distance) before add to container: " + wentry.distance()); System.out.println("WordReference (word distance) after get from container: " + wc.distance()); System.out.println("-----------------------------------------------------------"); assertEquals("distance()", wentry.distance(), wc.distance()); } }