package org.wikibrain.phrases;
import com.typesafe.config.Config;
import org.wikibrain.conf.Configuration;
import org.wikibrain.conf.ConfigurationException;
import org.wikibrain.conf.Configurator;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.dao.LocalPageDao;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.lang.LanguageSet;
import org.wikibrain.core.lang.LocalId;
import org.wikibrain.core.model.LocalPage;
import org.wikibrain.lucene.*;
import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Using Lucene in a phrase analyzer.
* @author Yulun Li
*/
public class OldLucenePhraseAnalyzer implements PhraseAnalyzer {
private static final Logger LOG = LoggerFactory.getLogger(PhraseAnalyzer.class);
private final LuceneSearcher searcher;
protected LocalPageDao localPageDao;
public OldLucenePhraseAnalyzer(LocalPageDao localPageDao, LuceneSearcher searcher) {
this.localPageDao = localPageDao;
this.searcher = searcher;
}
@Override
public LinkedHashMap<LocalId, Float> resolve(Language language, String phrase, int maxPages) throws DaoException {
LinkedHashMap<LocalId, Float> result = new LinkedHashMap<LocalId, Float>();
WikiBrainScoreDoc[] wikibrainScoreDocs = searcher.getQueryBuilderByLanguage(language)
.setPhraseQuery(phrase)
.setNumHits(10)
.search();
if (wikibrainScoreDocs.length == 0 && phrase.indexOf(" ") < 0) {
String phraseMultiVersion = "";
for (int i = 1; i < phrase.length(); i++) {
phraseMultiVersion += (i > 2 ? phrase.substring(0, i) + " " : "");
phraseMultiVersion += (phrase.length() - i > 2 ? phrase.substring(i, phrase.length()) + " " : "");
}
wikibrainScoreDocs = searcher.getQueryBuilderByLanguage(language)
.setPhraseQuery(phraseMultiVersion)
.setNumHits(10)
.search();
}
float totalScore = 0;
for (WikiBrainScoreDoc wikibrainScoreDoc : wikibrainScoreDocs) {
totalScore += wikibrainScoreDoc.score;
}
for (WikiBrainScoreDoc wikibrainScoreDoc : wikibrainScoreDocs) {
int localPageId = searcher.getLocalIdFromDocId(wikibrainScoreDoc.luceneId, language);
LocalId localId = new LocalId(language, localPageId);
result.put(localId, wikibrainScoreDoc.score / totalScore);
}
return result;
}
public static class Provider extends org.wikibrain.conf.Provider<PhraseAnalyzer> {
public Provider(Configurator configurator, Configuration config) throws ConfigurationException {
super(configurator, config);
}
@Override
public Class<PhraseAnalyzer> getType() {
return PhraseAnalyzer.class;
}
@Override
public String getPath() {
return "phrases.analyzer";
}
@Override
public PhraseAnalyzer get(String name, Config config, Map<String, String> runtimeParams) throws ConfigurationException {
if (!config.getString("type").equals("olucene")) {
return null;
}
LocalPageDao localPageDao = getConfigurator().get(LocalPageDao.class, config.getString("localPageDao"));
LuceneSearcher searcher = new LuceneSearcher(
new LanguageSet("simple"),
getConfigurator().get(LuceneOptions.class));
return new LucenePhraseAnalyzer(localPageDao, searcher);
}
}
@Override
public LinkedHashMap<String, Float> describe(Language language, LocalPage page, int maxPhrases) throws DaoException {
return null;
}
@Override
public int loadCorpus(LanguageSet langs) throws DaoException, IOException {
return -1;
}
}