package no.priv.garshol.duke; import java.util.List; import java.util.Collection; public interface Configuration { /** * Returns the data sources to use (in deduplication mode; don't use * this method in record linkage mode). */ public Collection<DataSource> getDataSources(); /** * Returns the data sources belonging to a particular group of data * sources. Data sources are grouped in record linkage mode, but not * in deduplication mode, so only use this method in record linkage * mode. The group numbers are 1 and 2. */ public Collection<DataSource> getDataSources(int groupno); /** * Returns the database to be used for processing. * @param overwrite Whether to overwrite existing contents or not. * @since 1.2 */ public Database getDatabase(boolean overwrite); /** * Returns the database to be used for a specific group. * @param overwrite Whether to overwrite existing contents or not. * @since 1.3 */ public Database getDatabase(int groupno, boolean overwrite); /** * The probability threshold used to decide whether two records * represent the same entity. If the probability is higher than this * value, the two records are considered to represent the same * entity. */ public double getThreshold(); /** * The probability threshold used to decide whether two records may * represent the same entity. If the probability is higher than this * value, the two records are considered possible matches. Can be 0, * in which case no records are considered possible matches. */ public double getMaybeThreshold(); /** * Returns true iff we are in deduplication mode. */ public boolean isDeduplicationMode(); /** * The set of properties Duke records can have, and their associated * cleaners, comparators, and probabilities. */ public List<Property> getProperties(); /** * The properties which are used to identify records, rather than * compare them. */ public Collection<Property> getIdentityProperties(); /** * Returns the property with the given name, or null if there is no * such property. */ public Property getPropertyByName(String name); /** * Returns the properties Duke queries for in the Lucene index. This * is a subset of getProperties(), and is computed based on the * probabilities and the threshold. */ public Collection<Property> getLookupProperties(); /** * Validates the configuration to verify that it makes sense. * Rejects configurations that will fail during runtime. */ public void validate(); /** * Sets the threshold. * @since 1.1 */ public void setThreshold(double threshold); /** * Returns an exact copy of the configuration. * @since 1.1 */ public Configuration copy(); /** * Adds a database object. Used by ConfigLoader. If called only once * there is only a single database. Can be called twice. * @since 1.3 */ public void addDatabase(Database database); /** * Adds a custom comparator. * @since 1.3 */ public void addCustomComparator(Comparator comparator); /** * Returns any customized comparators declared using object tags * in the config file. * @since 1.3 */ public List<Comparator> getCustomComparators(); }