package no.priv.garshol.duke.databases;
import java.util.HashSet;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField;
import no.priv.garshol.duke.Record;
/**
* Wraps a Lucene Document to provide a representation of it as a Record.
*/
public class DocumentRecord implements Record {
/**
* Beware: this document number will change when changes are made to
* the Lucene index. So while it's safe to use right now, it is not
* safe if record objects persist across batch process calls. It
* might also not be safe in a multi-threaded setting. So
* longer-term we may need a better solution for removing duplicate
* candidates.
*/
private int docno;
private Document doc;
public DocumentRecord(int docno, Document doc) {
this.docno = docno;
this.doc = doc;
}
public Collection<String> getProperties() {
Collection<String> props = new HashSet();
for (IndexableField f : doc.getFields())
props.add(f.name());
return props;
}
public String getValue(String prop) {
return doc.get(prop);
}
public Collection<String> getValues(String prop) {
IndexableField[] fields = doc.getFields(prop);
if (fields.length == 1)
return Collections.singleton(fields[0].stringValue());
Collection<String> values = new ArrayList(fields.length);
for (int ix = 0; ix < fields.length; ix++)
values.add(fields[ix].stringValue());
return values;
}
public void merge(Record other) {
throw new UnsupportedOperationException();
}
public String toString() {
return "[DocumentRecord " + docno + " " + doc + "]";
}
public int hashCode() {
return docno;
}
public boolean equals(Object other) {
if (!(other instanceof DocumentRecord))
return false;
return ((DocumentRecord) other).docno == docno;
}
}