/**
*
*/
package org.voyanttools.trombone.lucene.search;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.util.BitSet;
import org.voyanttools.trombone.lucene.CorpusMapper;
/**
* @author sgs
*
*/
public class LuceneDocIdsCollector extends SimpleCollector {
private Map<Integer, Integer> luceneDocIds = new HashMap<Integer,Integer>();
private int base = 0;
private Scorer scorer = null;
private int rawFreq = 0;
private BitSet bitSet;
public LuceneDocIdsCollector(CorpusMapper corpusMapper) throws IOException {
bitSet = corpusMapper.getBitSet();
}
public void collect(int doc) throws IOException {
int absoluteDoc = base+doc;
// FIXME: determine if we're slowly iterating over all documents in the index and if we can use another doc id iterator
if (bitSet.get(doc) && isSeen(absoluteDoc)==false) {
scorer.score();
int freq = scorer.freq();
rawFreq += freq;
luceneDocIds.put(absoluteDoc, freq);
}
}
public int getRawFreq() {
return rawFreq;
}
public int getInDocumentsCount() {
return luceneDocIds.size();
}
protected boolean isSeen(int doc) {
return luceneDocIds.containsKey(doc);
}
@Override
public void doSetNextReader(LeafReaderContext context) {
base = context.docBase;
}
@Override
public void setScorer(Scorer scorer) {
this.scorer = scorer;
}
public Set<Integer> getLuceneDocIds() {
return luceneDocIds.keySet();
}
@Override
public boolean needsScores() {
return true; // can this be set to false while ensuring that setScorer is called?
}
}