package no.priv.garshol.duke.databases;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.NavigableMap;
import no.priv.garshol.duke.Configuration;
import no.priv.garshol.duke.Database;
import no.priv.garshol.duke.Property;
import no.priv.garshol.duke.Record;
/**
* An abstract database using blocking to find candidate records. It
* has different concrete implementations depending on where the
* actual data is stored.
* @since 1.2
*/
public abstract class AbstractBlockingDatabase implements Database {
protected Configuration config;
protected Collection<KeyFunction> functions;
protected Map<String, Record> idmap;
protected Map<KeyFunction, NavigableMap> func_to_map;
// config
protected int window_size;
public AbstractBlockingDatabase() {
this.functions = new ArrayList();
this.func_to_map = new HashMap();
this.window_size = 5;
}
public void setConfiguration(Configuration config) {
this.config = config;
}
public void setOverwrite(boolean overwrite) {
}
/**
* Sets the minimum number of records to gather from blocks on each
* side of the start block. If the start block has more records than
* twice the window size no neighbouring blocks are searched.
* Setting window_size = 0 disables searching of neighbouring
* blocks.
*/
public void setWindowSize(int window_size) {
this.window_size = window_size;
}
/**
* Sets the key functions used for blocking.
*/
public void setKeyFunctions(Collection<KeyFunction> functions) {
this.functions = functions;
}
public Collection<KeyFunction> getKeyFunctions() {
return functions;
}
protected void indexById(Record record) {
for (Property idprop : config.getIdentityProperties())
for (String id : record.getValues(idprop.getName()))
idmap.put(id, record);
}
public Record findRecordById(String id) {
return idmap.get(id);
}
public Collection<Record> findCandidateMatches(Record record) {
Collection<Record> candidates = new HashSet(); //ArrayList();
for (KeyFunction keyfunc : functions) {
NavigableMap<String, Object> blocks = getBlocks(keyfunc);
String key = keyfunc.makeKey(record);
// System.out.println("key: '" + key + "'");
// look up the first block
Map.Entry<String, Object> start = blocks.ceilingEntry(key);
Map.Entry<String, Object> entry = start;
if (start == null)
continue;
// add all records from this block
int added = addBlock(candidates, start);
// System.out.println("entry '" + entry.getKey() + "' " + added);
// System.out.println("start: " + start.getValue() + " " + added);
if (added > window_size * 2)
continue; // we can't add more candidates from this key function
// then we navigate downwards from the key
int added_this_way = added / 2;
entry = blocks.lowerEntry(entry.getKey());
while (entry != null && added_this_way < window_size) {
// System.out.println("entry low: " + entry.getValue() + " " + added_this_way);
added_this_way += addBlock(candidates, entry);
// System.out.println("entry '" + entry.getKey() + "' " + entry.getValue().size());
entry = blocks.lowerEntry(entry.getKey());
}
// then we navigate upwards from the key
added_this_way = added / 2;
entry = blocks.higherEntry(start.getKey());
while (entry != null && added_this_way < window_size) {
// System.out.println("entry high: " + entry.getValue() + " " + added_this_way);
added_this_way += addBlock(candidates, entry);
// System.out.println("entry '" + entry.getKey() + "' " + entry.getValue().size());
entry = blocks.higherEntry(entry.getKey());
}
}
return candidates;
}
public void commit() {
}
public void close() {
}
public NavigableMap getBlocks(KeyFunction keyfunc) {
NavigableMap map = func_to_map.get(keyfunc);
if (map == null) {
map = makeMap(keyfunc);
func_to_map.put(keyfunc, map);
}
return map;
}
// --- extension points
// must also implement index(Record)
// returns number of records added
protected abstract int addBlock(Collection<Record> candidates,
Map.Entry block);
protected abstract NavigableMap makeMap(KeyFunction keyfunc);
// --- BLOCK CONTAINER
public static class Block implements Serializable {
private int free;
private String[] ids;
public Block() {
this.ids = new String[10];
}
public Block(int free, String[] ids) {
this.free = free;
this.ids = ids;
}
public String[] getIds() {
return ids;
}
public void add(String id) {
if (free >= ids.length) {
String[] newids = new String[ids.length * 2];
for (int ix = 0; ix < ids.length; ix++)
newids[ix] = ids[ix];
ids = newids;
}
ids[free++] = id;
}
public void remove(String id) {
for (int ix = 0; ix < free; ix++) {
if (ids[ix].equals(id)) {
free--;
ids[ix] = ids[free];
// we don't need to null out the free cell in the array.
// reducing 'free' is sufficient.
return;
}
}
// FIXME: if we get here something's wrong. add a check?
}
public int size() {
return free;
}
}
}