package org.wikibrain.phrases;
import org.apache.commons.lang3.tuple.Pair;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.lang.LanguageSet;
import org.wikibrain.core.lang.StringNormalizer;
import org.wikibrain.phrases.PrunedCounts;
import java.util.Iterator;
/**
* Stores and retrieves information related to phrase to page relationships.
* @author Shilad Sen
*/
public interface PhraseAnalyzerDao {
/**
* Adds information mapping a page to phrases.
* Multiple invocations of the method with the same page override counts.
* @param lang
* @param wpId
* @param counts
* @throws org.wikibrain.core.dao.DaoException
*/
public void savePageCounts(Language lang, int wpId, PrunedCounts<String> counts) throws DaoException;
/**
* Adds information mapping a phrase to pages.
* Phrases are normalized, and phrases that normalize to the same string are
* treated as identical. Multiple invocations of the method with the same phrase
* override counts.
* @param lang
* @param phrase
* @param counts
* @throws org.wikibrain.core.dao.DaoException
*/
public void savePhraseCounts(Language lang, String phrase, PrunedCounts<Integer> counts) throws DaoException;
/**
* Returns all phrases in the specified language.
* @param lang
* @return
*/
public Iterator<String> getAllPhrases(Language lang);
/**
* Returns all phrase counts in the specified language.
* @param lang
* @return
*/
public Iterator<Pair<String, PrunedCounts<Integer>>> getAllPhraseCounts(Language lang);
/**
* @return The string normalizer used to determine canonical string representations.
*/
public StringNormalizer getStringNormalizer();
/**
* Gets pages related to a phrase. Phrases are normalized before looking them up.
* @param lang
* @param phrase
* @return Map from page ids (in the local language) to the number of occurrences
* ordered by decreasing count.
* @throws DaoException
*/
public PrunedCounts<Integer> getPhraseCounts(Language lang, String phrase, int maxPages) throws DaoException;
/**
* Gets phrases related to a page.
* @param lang
* @param wpId Local page id
* @return Map from phrasese (in the local language) to the number of occurrences
* ordered by decreasing count.
* @throws DaoException
*/
public PrunedCounts<String> getPageCounts(Language lang, int wpId, int maxPhrases) throws DaoException;
void flush();
/**
* Closes the dao and flushes any unwritten data to disk.
* @throws DaoException
*/
public void close() throws DaoException;
}