package org.wikibrain.phrases; import org.wikibrain.core.dao.DaoException; import org.wikibrain.core.lang.Language; import org.wikibrain.core.lang.LanguageSet; import org.wikibrain.core.lang.LocalId; import org.wikibrain.core.model.LocalPage; import org.wikibrain.core.model.UniversalPage; import java.io.IOException; import java.util.LinkedHashMap; /** * Given a page, returns the most common phrases */ public interface PhraseAnalyzer { /** * Loads a specific corpus into the dao. * Returns the number of phrases loaded. * * @throws DaoException * @throws IOException */ int loadCorpus(LanguageSet langs) throws DaoException, IOException; /** * Returns the most descriptive phrases for a wikipedia page. * @param language The language for the phrase and the returned LocalPages. * @param page The page to be described. * @param maxPhrases The maximum number of phrases to be returned. * @return An map from phrase to score, ordered by decreasing probability. * The scores can be considered probabilities that sum to 1.0 across all possibilities. */ public LinkedHashMap<String, Float> describe(Language language, LocalPage page, int maxPhrases) throws DaoException; /** * Returns the most likely wikipedia pages for a phrase. * @param language The language for the phrase and the returned LocalPages. * @param phrase The phrase to be resolved. * @param maxPages The maximum number of pages to be returned. * @return An map from page to score, ordered by decreasing probability. * The scores can be considered probabilities that sum to 1.0 across all possibilities. */ public LinkedHashMap<LocalId, Float> resolve(Language language, String phrase, int maxPages) throws DaoException; }