package org.wikibrain.phrases;
import com.typesafe.config.Config;
import org.apache.commons.collections.Predicate;
import org.apache.commons.collections.Transformer;
import org.apache.commons.collections.iterators.FilterIterator;
import org.apache.commons.collections.iterators.TransformIterator;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.wikibrain.conf.Configuration;
import org.wikibrain.conf.ConfigurationException;
import org.wikibrain.conf.Configurator;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.lang.StringNormalizer;
import org.wikibrain.utils.ObjectDb;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
/**
* Persists information about phrases to page relationships using an object database.
*/
public class PhraseAnalyzerLangDao {
private final StringNormalizer normalizer;
private final Language lang;
private File dir;
private ObjectDb<PrunedCounts<String>> describeDb;
private ObjectDb<PrunedCounts<Integer>> resolveDb;
/**
* Creates a new dao using the given directory.
* @param path
* @param isNew If true, delete any information contained in the directory.
* @throws DaoException
*/
public PhraseAnalyzerLangDao(StringNormalizer normalizer, Language lang, File path, boolean isNew) throws DaoException {
this.dir = path;
this.lang = lang;
this.normalizer = normalizer;
if (isNew) {
if (path.exists()) FileUtils.deleteQuietly(path);
path.mkdirs();
}
try {
describeDb = new ObjectDb<PrunedCounts<String>>(new File(path, "describe"), isNew);
resolveDb = new ObjectDb<PrunedCounts<Integer>>(new File(path, "resolve"), isNew);
} catch (IOException e) {
throw new DaoException(e);
}
}
public void savePageCounts(int wpId, PrunedCounts<String> counts) throws DaoException {
try {
describeDb.put(""+wpId, counts);
} catch (IOException e) {
throw new DaoException(e);
}
}
public void savePhraseCounts(String phrase, PrunedCounts<Integer> counts) throws DaoException {
phrase = normalizer.normalize(lang, phrase);
try {
resolveDb.put(phrase, counts);
} catch (IOException e) {
throw new DaoException(e);
}
}
public Iterator<String> getAllPhrases() {
return resolveDb.keyIterator();
}
public Iterator<Pair<String, PrunedCounts<Integer>>> getAllPhraseCounts() {
return resolveDb.iterator();
}
public PrunedCounts<Integer> getPhraseCounts(String phrase, int maxPages) throws DaoException {
phrase = normalizer.normalize(lang, phrase);
try {
PrunedCounts<Integer> counts = resolveDb.get(phrase);
if (counts == null || counts.size() <= maxPages) {
return counts;
}
PrunedCounts<Integer> result = new PrunedCounts<Integer>(counts.getTotal());
for (int id : counts.keySet()) {
if (result.size() >= maxPages) {
break;
}
result.put(id, counts.get(id));
}
return result;
} catch (IOException e) {
throw new DaoException(e);
} catch (ClassNotFoundException e) {
throw new DaoException(e);
}
}
public PrunedCounts<String> getPageCounts(int wpId, int maxPhrases) throws DaoException {
try {
PrunedCounts<String> counts = describeDb.get("" + wpId);
if (counts == null || counts.size() <= maxPhrases) {
return counts;
}
PrunedCounts<String> result = new PrunedCounts<String>(counts.getTotal());
for (String phrase : counts.keySet()) {
if (result.size() >= maxPhrases) {
break;
}
result.put(phrase, counts.get(phrase));
}
return result;
} catch (IOException e) {
throw new DaoException(e);
} catch (ClassNotFoundException e) {
throw new DaoException(e);
}
}
public void flush() {
this.describeDb.flush();
this.resolveDb.flush();
}
public void close() {
this.describeDb.close();
this.resolveDb.close();
}
}