package org.voyanttools.trombone.model; import static org.junit.Assert.*; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.junit.Test; import org.voyanttools.trombone.input.index.LuceneIndexer; import org.voyanttools.trombone.input.source.InputSource; import org.voyanttools.trombone.input.source.StringInputSource; import org.voyanttools.trombone.lucene.CorpusMapper; import org.voyanttools.trombone.storage.Storage; import org.voyanttools.trombone.storage.StoredDocumentSourceStorage; import org.voyanttools.trombone.util.FlexibleParameters; import org.voyanttools.trombone.util.TestHelper; public class CorpusTermMinimalsDBTest { @Test public void test() throws IOException { Storage storage = TestHelper.getDefaultTestStorage(); InputSource one = new StringInputSource("dark and stormy night in document one"); InputSource two = new StringInputSource("It was a dark and stormy night."); InputSource three = new StringInputSource("It was the best of times it was the worst of times."); StoredDocumentSourceStorage storedDocumentSourceStorage = storage.getStoredDocumentSourceStorage(); List<StoredDocumentSource> storedDocumentSources = new ArrayList<StoredDocumentSource>(); storedDocumentSources.add(storedDocumentSourceStorage.getStoredDocumentSource(one)); LuceneIndexer luceneIndexer = new LuceneIndexer(storage, new FlexibleParameters()); luceneIndexer.index(storedDocumentSources); storedDocumentSources.clear(); storedDocumentSources.add(storedDocumentSourceStorage.getStoredDocumentSource(two)); storedDocumentSources.add(storedDocumentSourceStorage.getStoredDocumentSource(three)); String id = luceneIndexer.index(storedDocumentSources); CorpusMetadata metadata = new CorpusMetadata(id); List<String> ids = new ArrayList<String>(); for (StoredDocumentSource storedDocumentSource : storedDocumentSources) {ids.add(storedDocumentSource.getId());} metadata.setDocumentIds(ids); Corpus corpus = new Corpus(storage, metadata); CorpusMapper corpusMapper = new CorpusMapper(storage, corpus); CorpusTermMinimalsDB corpusTermMinimalsDB = null; try { corpusTermMinimalsDB = CorpusTermMinimalsDB.getInstance(corpusMapper, TokenType.lexical); assertNull(corpusTermMinimalsDB.get("document")); // from first document added, not in this corpus assertEquals(1, corpusTermMinimalsDB.get("night").getRawFreq()); assertEquals(3, corpusTermMinimalsDB.get("was").getRawFreq()); } finally { if (corpusTermMinimalsDB!=null) {corpusTermMinimalsDB.close();} } } }