import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.RAMDirectory;
import org.junit.Test;
import org.wikibrain.conf.Configuration;
import org.wikibrain.core.WikiBrainException;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.lang.LanguageSet;
import org.wikibrain.lucene.LuceneOptions;
import org.wikibrain.lucene.WikiBrainAnalyzer;
import java.io.IOException;
import java.util.List;
/**
*/
public class TestLanguageTokenizer {
private Field textField = new TextField("test", "wrap around the world", Field.Store.YES);
@Test
public void shortTest() throws IOException, WikiBrainException {
LuceneOptions opts = LuceneOptions.getDefaultOptions();
WikiBrainAnalyzer wa = new WikiBrainAnalyzer(Language.getByLangCode("en"));
IndexWriterConfig iwc = new IndexWriterConfig(opts.matchVersion, wa);
iwc.setRAMBufferSizeMB(1024.0);
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
IndexWriter writer = new IndexWriter(new RAMDirectory(), iwc);
Document d = new Document();
d.add(textField);
writer.addDocument(d);
writer.close();
}
@Test
public void test() throws IOException {
LuceneOptions opts = LuceneOptions.getDefaultOptions();
List<String> langCodes = new Configuration().get().getStringList("languages.big-economies.langCodes");
langCodes.add("he");
langCodes.add("sk");
LanguageSet langSet = new LanguageSet(langCodes);
for(Language language : langSet){
WikiBrainAnalyzer wa = new WikiBrainAnalyzer(language, opts);
IndexWriterConfig iwc = new IndexWriterConfig(opts.matchVersion, wa);
iwc.setRAMBufferSizeMB(1024.0);
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
IndexWriter writer = new IndexWriter(new RAMDirectory(), iwc);
Document d = new Document();
d.add(textField);
writer.addDocument(d);
writer.close();
}
}
}