package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
/** @version $Id */
/**
* <p><b>NOTE:</b> This API is new and still experimental
* (subject to change suddenly in the next release)</p>
*/
public class SegmentReader extends IndexReader implements Cloneable {
protected boolean readOnly;
private SegmentInfo si;
private int readBufferSize;
CloseableThreadLocal fieldsReaderLocal = new FieldsReaderLocal();
CloseableThreadLocal termVectorsLocal = new CloseableThreadLocal();
BitVector deletedDocs = null;
Ref deletedDocsRef = null;
private boolean deletedDocsDirty = false;
private boolean normsDirty = false;
private int pendingDeleteCount;
private boolean rollbackHasChanges = false;
private boolean rollbackDeletedDocsDirty = false;
private boolean rollbackNormsDirty = false;
private int rollbackPendingDeleteCount;
// optionally used for the .nrm file shared by multiple norms
private IndexInput singleNormStream;
private Ref singleNormRef;
CoreReaders core;
// Holds core readers that are shared (unchanged) when
// SegmentReader is cloned or reopened
static final class CoreReaders {
// Counts how many other reader share the core objects
// (freqStream, proxStream, tis, etc.) of this reader;
// when coreRef drops to 0, these core objects may be
// closed. A given instance of SegmentReader may be
// closed, even those it shares core objects with other
// SegmentReaders:
private final Ref ref = new Ref();
final String segment;
final FieldInfos fieldInfos;
final IndexInput freqStream;
final IndexInput proxStream;
final TermInfosReader tisNoIndex;
final Directory dir;
final Directory cfsDir;
final int readBufferSize;
final int termsIndexDivisor;
private final SegmentReader origInstance;
TermInfosReader tis;
FieldsReader fieldsReaderOrig;
TermVectorsReader termVectorsReaderOrig;
CompoundFileReader cfsReader;
CompoundFileReader storeCFSReader;
CoreReaders(SegmentReader origInstance, Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor) throws IOException {
segment = si.name;
this.readBufferSize = readBufferSize;
this.dir = dir;
boolean success = false;
try {
Directory dir0 = dir;
if (si.getUseCompoundFile()) {
cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
dir0 = cfsReader;
}
cfsDir = dir0;
fieldInfos = new FieldInfos(cfsDir, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION);
this.termsIndexDivisor = termsIndexDivisor;
TermInfosReader reader = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor);
if (termsIndexDivisor == -1) {
tisNoIndex = reader;
} else {
tis = reader;
tisNoIndex = null;
}
// make sure that all index files have been read or are kept open
// so that if an index update removes them we'll still have them
freqStream = cfsDir.openInput(segment + "." + IndexFileNames.FREQ_EXTENSION, readBufferSize);
if (fieldInfos.hasProx()) {
proxStream = cfsDir.openInput(segment + "." + IndexFileNames.PROX_EXTENSION, readBufferSize);
} else {
proxStream = null;
}
success = true;
} finally {
if (!success) {
decRef();
}
}
// Must assign this at the end -- if we hit an
// exception above core, we don't want to attempt to
// purge the FieldCache (will hit NPE because core is
// not assigned yet).
this.origInstance = origInstance;
}
synchronized TermVectorsReader getTermVectorsReaderOrig() {
return termVectorsReaderOrig;
}
synchronized FieldsReader getFieldsReaderOrig() {
return fieldsReaderOrig;
}
synchronized void incRef() {
ref.incRef();
}
synchronized Directory getCFSReader() {
return cfsReader;
}
synchronized TermInfosReader getTermsReader() {
if (tis != null) {
return tis;
} else {
return tisNoIndex;
}
}
synchronized boolean termsIndexIsLoaded() {
return tis != null;
}
// NOTE: only called from IndexWriter when a near
// real-time reader is opened, or applyDeletes is run,
// sharing a segment that's still being merged. This
// method is not fully thread safe, and relies on the
// synchronization in IndexWriter
synchronized void loadTermsIndex(SegmentInfo si, int termsIndexDivisor) throws IOException {
if (tis == null) {
Directory dir0;
if (si.getUseCompoundFile()) {
// In some cases, we were originally opened when CFS
// was not used, but then we are asked to open the
// terms reader with index, the segment has switched
// to CFS
if (cfsReader == null) {
cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
}
dir0 = cfsReader;
} else {
dir0 = dir;
}
tis = new TermInfosReader(dir0, segment, fieldInfos, readBufferSize, termsIndexDivisor);
}
}
synchronized void decRef() throws IOException {
if (ref.decRef() == 0) {
// close everything, nothing is shared anymore with other readers
if (tis != null) {
tis.close();
// null so if an app hangs on to us we still free most ram
tis = null;
}
if (tisNoIndex != null) {
tisNoIndex.close();
}
if (freqStream != null) {
freqStream.close();
}
if (proxStream != null) {
proxStream.close();
}
if (termVectorsReaderOrig != null) {
termVectorsReaderOrig.close();
}
if (fieldsReaderOrig != null) {
fieldsReaderOrig.close();
}
if (cfsReader != null) {
cfsReader.close();
}
if (storeCFSReader != null) {
storeCFSReader.close();
}
// Force FieldCache to evict our entries at this point
if (origInstance != null) {
FieldCache.DEFAULT.purge(origInstance);
}
}
}
synchronized void openDocStores(SegmentInfo si) throws IOException {
assert si.name.equals(segment);
if (fieldsReaderOrig == null) {
final Directory storeDir;
if (si.getDocStoreOffset() != -1) {
if (si.getDocStoreIsCompoundFile()) {
assert storeCFSReader == null;
storeCFSReader = new CompoundFileReader(dir,
si.getDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION,
readBufferSize);
storeDir = storeCFSReader;
assert storeDir != null;
} else {
storeDir = dir;
assert storeDir != null;
}
} else if (si.getUseCompoundFile()) {
// In some cases, we were originally opened when CFS
// was not used, but then we are asked to open doc
// stores after the segment has switched to CFS
if (cfsReader == null) {
cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
}
storeDir = cfsReader;
assert storeDir != null;
} else {
storeDir = dir;
assert storeDir != null;
}
final String storesSegment;
if (si.getDocStoreOffset() != -1) {
storesSegment = si.getDocStoreSegment();
} else {
storesSegment = segment;
}
fieldsReaderOrig = new FieldsReader(storeDir, storesSegment, fieldInfos, readBufferSize,
si.getDocStoreOffset(), si.docCount);
// Verify two sources of "maxDoc" agree:
if (si.getDocStoreOffset() == -1 && fieldsReaderOrig.size() != si.docCount) {
throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + fieldsReaderOrig.size() + " but segmentInfo shows " + si.docCount);
}
if (fieldInfos.hasVectors()) { // open term vector files only as needed
termVectorsReaderOrig = new TermVectorsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.getDocStoreOffset(), si.docCount);
}
}
}
}
/**
* Sets the initial value
*/
private class FieldsReaderLocal extends CloseableThreadLocal {
protected Object initialValue() {
return core.getFieldsReaderOrig().clone();
}
}
static class Ref {
private int refCount = 1;
public String toString() {
return "refcount: "+refCount;
}
public synchronized int refCount() {
return refCount;
}
public synchronized int incRef() {
assert refCount > 0;
refCount++;
return refCount;
}
public synchronized int decRef() {
assert refCount > 0;
refCount--;
return refCount;
}
}
/**
* Byte[] referencing is used because a new norm object needs
* to be created for each clone, and the byte array is all
* that is needed for sharing between cloned readers. The
* current norm referencing is for sharing between readers
* whereas the byte[] referencing is for copy on write which
* is independent of reader references (i.e. incRef, decRef).
*/
final class Norm implements Cloneable {
private int refCount = 1;
// If this instance is a clone, the originalNorm
// references the Norm that has a real open IndexInput:
private Norm origNorm;
private IndexInput in;
private long normSeek;
// null until bytes is set
private Ref bytesRef;
private byte[] bytes;
private boolean dirty;
private int number;
private boolean rollbackDirty;
public Norm(IndexInput in, int number, long normSeek) {
this.in = in;
this.number = number;
this.normSeek = normSeek;
}
public synchronized void incRef() {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
refCount++;
}
private void closeInput() throws IOException {
if (in != null) {
if (in != singleNormStream) {
// It's private to us -- just close it
in.close();
} else {
// We are sharing this with others -- decRef and
// maybe close the shared norm stream
if (singleNormRef.decRef() == 0) {
singleNormStream.close();
singleNormStream = null;
}
}
in = null;
}
}
public synchronized void decRef() throws IOException {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
if (--refCount == 0) {
if (origNorm != null) {
origNorm.decRef();
origNorm = null;
} else {
closeInput();
}
if (bytes != null) {
assert bytesRef != null;
bytesRef.decRef();
bytes = null;
bytesRef = null;
} else {
assert bytesRef == null;
}
}
}
// Load bytes but do not cache them if they were not
// already cached
public synchronized void bytes(byte[] bytesOut, int offset, int len) throws IOException {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
if (bytes != null) {
// Already cached -- copy from cache:
assert len <= maxDoc();
System.arraycopy(bytes, 0, bytesOut, offset, len);
} else {
// Not cached
if (origNorm != null) {
// Ask origNorm to load
origNorm.bytes(bytesOut, offset, len);
} else {
// We are orig -- read ourselves from disk:
synchronized(in) {
in.seek(normSeek);
in.readBytes(bytesOut, offset, len, false);
}
}
}
}
// Load & cache full bytes array. Returns bytes.
public synchronized byte[] bytes() throws IOException {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
if (bytes == null) { // value not yet read
assert bytesRef == null;
if (origNorm != null) {
// Ask origNorm to load so that for a series of
// reopened readers we share a single read-only
// byte[]
bytes = origNorm.bytes();
bytesRef = origNorm.bytesRef;
bytesRef.incRef();
// Once we've loaded the bytes we no longer need
// origNorm:
origNorm.decRef();
origNorm = null;
} else {
// We are the origNorm, so load the bytes for real
// ourself:
final int count = maxDoc();
bytes = new byte[count];
// Since we are orig, in must not be null
assert in != null;
// Read from disk.
synchronized(in) {
in.seek(normSeek);
in.readBytes(bytes, 0, count, false);
}
bytesRef = new Ref();
closeInput();
}
}
return bytes;
}
// Only for testing
Ref bytesRef() {
return bytesRef;
}
// Called if we intend to change a norm value. We make a
// private copy of bytes if it's shared with others:
public synchronized byte[] copyOnWrite() throws IOException {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
bytes();
assert bytes != null;
assert bytesRef != null;
if (bytesRef.refCount() > 1) {
// I cannot be the origNorm for another norm
// instance if I'm being changed. Ie, only the
// "head Norm" can be changed:
assert refCount == 1;
final Ref oldRef = bytesRef;
bytes = cloneNormBytes(bytes);
bytesRef = new Ref();
oldRef.decRef();
}
dirty = true;
return bytes;
}
// Returns a copy of this Norm instance that shares
// IndexInput & bytes with the original one
public synchronized Object clone() {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
Norm clone;
try {
clone = (Norm) super.clone();
} catch (CloneNotSupportedException cnse) {
// Cannot happen
throw new RuntimeException("unexpected CloneNotSupportedException", cnse);
}
clone.refCount = 1;
if (bytes != null) {
assert bytesRef != null;
assert origNorm == null;
// Clone holds a reference to my bytes:
clone.bytesRef.incRef();
} else {
assert bytesRef == null;
if (origNorm == null) {
// I become the origNorm for the clone:
clone.origNorm = this;
}
clone.origNorm.incRef();
}
// Only the origNorm will actually readBytes from in:
clone.in = null;
return clone;
}
// Flush all pending changes to the next generation
// separate norms file.
public void reWrite(SegmentInfo si) throws IOException {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0): "refCount=" + refCount + " origNorm=" + origNorm;
// NOTE: norms are re-written in regular directory, not cfs
si.advanceNormGen(this.number);
IndexOutput out = directory().createOutput(si.getNormFileName(this.number));
try {
out.writeBytes(bytes, maxDoc());
} finally {
out.close();
}
this.dirty = false;
}
}
Map norms = new HashMap();
/** The class which implements SegmentReader. */
// @deprecated (LUCENE-1677)
private static Class IMPL;
static {
try {
String name =
System.getProperty("org.apache.lucene.SegmentReader.class",
SegmentReader.class.getName());
IMPL = Class.forName(name);
} catch (ClassNotFoundException e) {
throw new RuntimeException("cannot load SegmentReader class: " + e, e);
} catch (SecurityException se) {
try {
IMPL = Class.forName(SegmentReader.class.getName());
} catch (ClassNotFoundException e) {
throw new RuntimeException("cannot load default SegmentReader class: " + e, e);
}
}
}
// @deprecated (LUCENE-1677)
private static Class READONLY_IMPL;
static {
try {
String name =
System.getProperty("org.apache.lucene.ReadOnlySegmentReader.class",
ReadOnlySegmentReader.class.getName());
READONLY_IMPL = Class.forName(name);
} catch (ClassNotFoundException e) {
throw new RuntimeException("cannot load ReadOnlySegmentReader class: " + e, e);
} catch (SecurityException se) {
try {
READONLY_IMPL = Class.forName(ReadOnlySegmentReader.class.getName());
} catch (ClassNotFoundException e) {
throw new RuntimeException("cannot load default ReadOnlySegmentReader class: " + e, e);
}
}
}
/**
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
* @deprecated
*/
public static SegmentReader get(SegmentInfo si) throws CorruptIndexException, IOException {
return get(false, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
}
/**
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
public static SegmentReader get(boolean readOnly, SegmentInfo si, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
return get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, termInfosIndexDivisor);
}
/**
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
* @deprecated
*/
static SegmentReader get(SegmentInfo si, int readBufferSize, boolean doOpenStores, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
return get(false, si.dir, si, readBufferSize, doOpenStores, termInfosIndexDivisor);
}
/**
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
public static SegmentReader get(boolean readOnly,
Directory dir,
SegmentInfo si,
int readBufferSize,
boolean doOpenStores,
int termInfosIndexDivisor)
throws CorruptIndexException, IOException {
SegmentReader instance;
try {
if (readOnly)
instance = (SegmentReader)READONLY_IMPL.newInstance();
else
instance = (SegmentReader)IMPL.newInstance();
} catch (Exception e) {
throw new RuntimeException("cannot load SegmentReader class: " + e, e);
}
instance.readOnly = readOnly;
instance.si = si;
instance.readBufferSize = readBufferSize;
boolean success = false;
try {
instance.core = new CoreReaders(instance, dir, si, readBufferSize, termInfosIndexDivisor);
if (doOpenStores) {
instance.core.openDocStores(si);
}
instance.loadDeletedDocs();
instance.openNorms(instance.core.cfsDir, readBufferSize);
success = true;
} finally {
// With lock-less commits, it's entirely possible (and
// fine) to hit a FileNotFound exception above. In
// this case, we want to explicitly close any subset
// of things that were opened so that we don't have to
// wait for a GC to do so.
if (!success) {
instance.doClose();
}
}
return instance;
}
void openDocStores() throws IOException {
core.openDocStores(si);
}
private boolean checkDeletedCounts() throws IOException {
final int recomputedCount = deletedDocs.getRecomputedCount();
assert deletedDocs.count() == recomputedCount : "deleted count=" + deletedDocs.count() + " vs recomputed count=" + recomputedCount;
assert si.getDelCount() == recomputedCount :
"delete count mismatch: info=" + si.getDelCount() + " vs BitVector=" + recomputedCount;
// Verify # deletes does not exceed maxDoc for this
// segment:
assert si.getDelCount() <= maxDoc() :
"delete count mismatch: " + recomputedCount + ") exceeds max doc (" + maxDoc() + ") for segment " + si.name;
return true;
}
private void loadDeletedDocs() throws IOException {
// NOTE: the bitvector is stored using the regular directory, not cfs
if (hasDeletions(si)) {
deletedDocs = new BitVector(directory(), si.getDelFileName());
deletedDocsRef = new Ref();
assert checkDeletedCounts();
} else
assert si.getDelCount() == 0;
}
/**
* Clones the norm bytes. May be overridden by subclasses. New and experimental.
* @param bytes Byte array to clone
* @return New BitVector
*/
protected byte[] cloneNormBytes(byte[] bytes) {
byte[] cloneBytes = new byte[bytes.length];
System.arraycopy(bytes, 0, cloneBytes, 0, bytes.length);
return cloneBytes;
}
/**
* Clones the deleteDocs BitVector. May be overridden by subclasses. New and experimental.
* @param bv BitVector to clone
* @return New BitVector
*/
protected BitVector cloneDeletedDocs(BitVector bv) {
return (BitVector)bv.clone();
}
public final synchronized Object clone() {
try {
return clone(readOnly); // Preserve current readOnly
} catch (Exception ex) {
throw new RuntimeException(ex);
}
}
public final synchronized IndexReader clone(boolean openReadOnly) throws CorruptIndexException, IOException {
return reopenSegment(si, true, openReadOnly);
}
synchronized SegmentReader reopenSegment(SegmentInfo si, boolean doClone, boolean openReadOnly) throws CorruptIndexException, IOException {
boolean deletionsUpToDate = (this.si.hasDeletions() == si.hasDeletions())
&& (!si.hasDeletions() || this.si.getDelFileName().equals(si.getDelFileName()));
boolean normsUpToDate = true;
boolean[] fieldNormsChanged = new boolean[core.fieldInfos.size()];
final int fieldCount = core.fieldInfos.size();
for (int i = 0; i < fieldCount; i++) {
if (!this.si.getNormFileName(i).equals(si.getNormFileName(i))) {
normsUpToDate = false;
fieldNormsChanged[i] = true;
}
}
// if we're cloning we need to run through the reopenSegment logic
// also if both old and new readers aren't readonly, we clone to avoid sharing modifications
if (normsUpToDate && deletionsUpToDate && !doClone && openReadOnly && readOnly) {
return this;
}
// When cloning, the incoming SegmentInfos should not
// have any changes in it:
assert !doClone || (normsUpToDate && deletionsUpToDate);
// clone reader
SegmentReader clone;
try {
if (openReadOnly)
clone = (SegmentReader) READONLY_IMPL.newInstance();
else
clone = (SegmentReader) IMPL.newInstance();
} catch (Exception e) {
throw new RuntimeException("cannot load SegmentReader class: " + e, e);
}
boolean success = false;
try {
core.incRef();
clone.core = core;
clone.readOnly = openReadOnly;
clone.si = si;
clone.readBufferSize = readBufferSize;
if (!openReadOnly && hasChanges) {
// My pending changes transfer to the new reader
clone.pendingDeleteCount = pendingDeleteCount;
clone.deletedDocsDirty = deletedDocsDirty;
clone.normsDirty = normsDirty;
clone.hasChanges = hasChanges;
hasChanges = false;
}
if (doClone) {
if (deletedDocs != null) {
deletedDocsRef.incRef();
clone.deletedDocs = deletedDocs;
clone.deletedDocsRef = deletedDocsRef;
}
} else {
if (!deletionsUpToDate) {
// load deleted docs
assert clone.deletedDocs == null;
clone.loadDeletedDocs();
} else if (deletedDocs != null) {
deletedDocsRef.incRef();
clone.deletedDocs = deletedDocs;
clone.deletedDocsRef = deletedDocsRef;
}
}
clone.setDisableFakeNorms(getDisableFakeNorms());
clone.norms = new HashMap();
// Clone norms
for (int i = 0; i < fieldNormsChanged.length; i++) {
// Clone unchanged norms to the cloned reader
if (doClone || !fieldNormsChanged[i]) {
final String curField = core.fieldInfos.fieldInfo(i).name;
Norm norm = (Norm) this.norms.get(curField);
if (norm != null)
clone.norms.put(curField, norm.clone());
}
}
// If we are not cloning, then this will open anew
// any norms that have changed:
clone.openNorms(si.getUseCompoundFile() ? core.getCFSReader() : directory(), readBufferSize);
success = true;
} finally {
if (!success) {
// An exception occurred during reopen, we have to decRef the norms
// that we incRef'ed already and close singleNormsStream and FieldsReader
clone.decRef();
}
}
return clone;
}
/** @deprecated */
protected void doCommit() throws IOException {
doCommit(null);
}
protected void doCommit(Map commitUserData) throws IOException {
if (hasChanges) {
if (deletedDocsDirty) { // re-write deleted
si.advanceDelGen();
// We can write directly to the actual name (vs to a
// .tmp & renaming it) because the file is not live
// until segments file is written:
deletedDocs.write(directory(), si.getDelFileName());
si.setDelCount(si.getDelCount()+pendingDeleteCount);
pendingDeleteCount = 0;
assert deletedDocs.count() == si.getDelCount(): "delete count mismatch during commit: info=" + si.getDelCount() + " vs BitVector=" + deletedDocs.count();
} else {
assert pendingDeleteCount == 0;
}
if (normsDirty) { // re-write norms
si.setNumFields(core.fieldInfos.size());
Iterator it = norms.values().iterator();
while (it.hasNext()) {
Norm norm = (Norm) it.next();
if (norm.dirty) {
norm.reWrite(si);
}
}
}
deletedDocsDirty = false;
normsDirty = false;
hasChanges = false;
}
}
FieldsReader getFieldsReader() {
return (FieldsReader) fieldsReaderLocal.get();
}
protected void doClose() throws IOException {
termVectorsLocal.close();
fieldsReaderLocal.close();
if (deletedDocs != null) {
deletedDocsRef.decRef();
// null so if an app hangs on to us we still free most ram
deletedDocs = null;
}
Iterator it = norms.values().iterator();
while (it.hasNext()) {
((Norm) it.next()).decRef();
}
if (core != null) {
core.decRef();
}
}
static boolean hasDeletions(SegmentInfo si) throws IOException {
// Don't call ensureOpen() here (it could affect performance)
return si.hasDeletions();
}
public boolean hasDeletions() {
// Don't call ensureOpen() here (it could affect performance)
return deletedDocs != null;
}
static boolean usesCompoundFile(SegmentInfo si) throws IOException {
return si.getUseCompoundFile();
}
static boolean hasSeparateNorms(SegmentInfo si) throws IOException {
return si.hasSeparateNorms();
}
protected void doDelete(int docNum) {
if (deletedDocs == null) {
deletedDocs = new BitVector(maxDoc());
deletedDocsRef = new Ref();
}
// there is more than 1 SegmentReader with a reference to this
// deletedDocs BitVector so decRef the current deletedDocsRef,
// clone the BitVector, create a new deletedDocsRef
if (deletedDocsRef.refCount() > 1) {
Ref oldRef = deletedDocsRef;
deletedDocs = cloneDeletedDocs(deletedDocs);
deletedDocsRef = new Ref();
oldRef.decRef();
}
deletedDocsDirty = true;
if (!deletedDocs.getAndSet(docNum))
pendingDeleteCount++;
}
protected void doUndeleteAll() {
deletedDocsDirty = false;
if (deletedDocs != null) {
assert deletedDocsRef != null;
deletedDocsRef.decRef();
deletedDocs = null;
deletedDocsRef = null;
pendingDeleteCount = 0;
si.clearDelGen();
si.setDelCount(0);
} else {
assert deletedDocsRef == null;
assert pendingDeleteCount == 0;
}
}
List files() throws IOException {
return new ArrayList(si.files());
}
public TermEnum terms() {
ensureOpen();
return core.getTermsReader().terms();
}
public TermEnum terms(Term t) throws IOException {
ensureOpen();
return core.getTermsReader().terms(t);
}
FieldInfos fieldInfos() {
return core.fieldInfos;
}
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
ensureOpen();
return getFieldsReader().doc(n, fieldSelector);
}
public synchronized boolean isDeleted(int n) {
return (deletedDocs != null && deletedDocs.get(n));
}
public TermDocs termDocs(Term term) throws IOException {
if (term == null) {
return new AllTermDocs(this);
} else {
return super.termDocs(term);
}
}
public TermDocs termDocs() throws IOException {
ensureOpen();
return new SegmentTermDocs(this);
}
public TermPositions termPositions() throws IOException {
ensureOpen();
return new SegmentTermPositions(this);
}
public int docFreq(Term t) throws IOException {
ensureOpen();
TermInfo ti = core.getTermsReader().get(t);
if (ti != null)
return ti.docFreq;
else
return 0;
}
public int numDocs() {
// Don't call ensureOpen() here (it could affect performance)
int n = maxDoc();
if (deletedDocs != null)
n -= deletedDocs.count();
return n;
}
public int maxDoc() {
// Don't call ensureOpen() here (it could affect performance)
return si.docCount;
}
/**
* @see IndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)
*/
public Collection getFieldNames(IndexReader.FieldOption fieldOption) {
ensureOpen();
Set fieldSet = new HashSet();
for (int i = 0; i < core.fieldInfos.size(); i++) {
FieldInfo fi = core.fieldInfos.fieldInfo(i);
if (fieldOption == IndexReader.FieldOption.ALL) {
fieldSet.add(fi.name);
}
else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
fieldSet.add(fi.name);
}
else if (fi.omitTermFreqAndPositions && fieldOption == IndexReader.FieldOption.OMIT_TERM_FREQ_AND_POSITIONS) {
fieldSet.add(fi.name);
}
else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
fieldSet.add(fi.name);
}
else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED) {
fieldSet.add(fi.name);
}
else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) {
fieldSet.add(fi.name);
}
else if (fi.storeTermVector == true &&
fi.storePositionWithTermVector == false &&
fi.storeOffsetWithTermVector == false &&
fieldOption == IndexReader.FieldOption.TERMVECTOR) {
fieldSet.add(fi.name);
}
else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) {
fieldSet.add(fi.name);
}
else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) {
fieldSet.add(fi.name);
}
else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) {
fieldSet.add(fi.name);
}
else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) &&
fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
fieldSet.add(fi.name);
}
}
return fieldSet;
}
public synchronized boolean hasNorms(String field) {
ensureOpen();
return norms.containsKey(field);
}
static byte[] createFakeNorms(int size) {
byte[] ones = new byte[size];
Arrays.fill(ones, DefaultSimilarity.encodeNorm(1.0f));
return ones;
}
private byte[] ones;
private byte[] fakeNorms() {
assert !getDisableFakeNorms();
if (ones==null) ones=createFakeNorms(maxDoc());
return ones;
}
// can return null if norms aren't stored
protected synchronized byte[] getNorms(String field) throws IOException {
Norm norm = (Norm) norms.get(field);
if (norm == null) return null; // not indexed, or norms not stored
return norm.bytes();
}
// returns fake norms if norms aren't available
public synchronized byte[] norms(String field) throws IOException {
ensureOpen();
byte[] bytes = getNorms(field);
if (bytes==null && !getDisableFakeNorms()) bytes=fakeNorms();
return bytes;
}
protected void doSetNorm(int doc, String field, byte value)
throws IOException {
Norm norm = (Norm) norms.get(field);
if (norm == null) // not an indexed field
return;
normsDirty = true;
norm.copyOnWrite()[doc] = value; // set the value
}
/** Read norms into a pre-allocated array. */
public synchronized void norms(String field, byte[] bytes, int offset)
throws IOException {
ensureOpen();
Norm norm = (Norm) norms.get(field);
if (norm == null) {
Arrays.fill(bytes, offset, bytes.length, DefaultSimilarity.encodeNorm(1.0f));
return;
}
norm.bytes(bytes, offset, maxDoc());
}
private void openNorms(Directory cfsDir, int readBufferSize) throws IOException {
long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now)
int maxDoc = maxDoc();
for (int i = 0; i < core.fieldInfos.size(); i++) {
FieldInfo fi = core.fieldInfos.fieldInfo(i);
if (norms.containsKey(fi.name)) {
// in case this SegmentReader is being re-opened, we might be able to
// reuse some norm instances and skip loading them here
continue;
}
if (fi.isIndexed && !fi.omitNorms) {
Directory d = directory();
String fileName = si.getNormFileName(fi.number);
if (!si.hasSeparateNorms(fi.number)) {
d = cfsDir;
}
// singleNormFile means multiple norms share this file
boolean singleNormFile = fileName.endsWith("." + IndexFileNames.NORMS_EXTENSION);
IndexInput normInput = null;
long normSeek;
if (singleNormFile) {
normSeek = nextNormSeek;
if (singleNormStream == null) {
singleNormStream = d.openInput(fileName, readBufferSize);
singleNormRef = new Ref();
} else {
singleNormRef.incRef();
}
// All norms in the .nrm file can share a single IndexInput since
// they are only used in a synchronized context.
// If this were to change in the future, a clone could be done here.
normInput = singleNormStream;
} else {
normSeek = 0;
normInput = d.openInput(fileName);
}
norms.put(fi.name, new Norm(normInput, fi.number, normSeek));
nextNormSeek += maxDoc; // increment also if some norms are separate
}
}
}
boolean termsIndexLoaded() {
return core.termsIndexIsLoaded();
}
// NOTE: only called from IndexWriter when a near
// real-time reader is opened, or applyDeletes is run,
// sharing a segment that's still being merged. This
// method is not thread safe, and relies on the
// synchronization in IndexWriter
void loadTermsIndex(int termsIndexDivisor) throws IOException {
core.loadTermsIndex(si, termsIndexDivisor);
}
// for testing only
boolean normsClosed() {
if (singleNormStream != null) {
return false;
}
Iterator it = norms.values().iterator();
while (it.hasNext()) {
Norm norm = (Norm) it.next();
if (norm.refCount > 0) {
return false;
}
}
return true;
}
// for testing only
boolean normsClosed(String field) {
Norm norm = (Norm) norms.get(field);
return norm.refCount == 0;
}
/**
* Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.
* @return TermVectorsReader
*/
TermVectorsReader getTermVectorsReader() {
TermVectorsReader tvReader = (TermVectorsReader) termVectorsLocal.get();
if (tvReader == null) {
TermVectorsReader orig = core.getTermVectorsReaderOrig();
if (orig == null) {
return null;
} else {
try {
tvReader = (TermVectorsReader) orig.clone();
} catch (CloneNotSupportedException cnse) {
return null;
}
}
termVectorsLocal.set(tvReader);
}
return tvReader;
}
TermVectorsReader getTermVectorsReaderOrig() {
return core.getTermVectorsReaderOrig();
}
/** Return a term frequency vector for the specified document and field. The
* vector returned contains term numbers and frequencies for all terms in
* the specified field of this document, if the field had storeTermVector
* flag set. If the flag was not set, the method returns null.
* @throws IOException
*/
public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
// Check if this field is invalid or has no stored term vector
ensureOpen();
FieldInfo fi = core.fieldInfos.fieldInfo(field);
if (fi == null || !fi.storeTermVector)
return null;
TermVectorsReader termVectorsReader = getTermVectorsReader();
if (termVectorsReader == null)
return null;
return termVectorsReader.get(docNumber, field);
}
public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
ensureOpen();
FieldInfo fi = core.fieldInfos.fieldInfo(field);
if (fi == null || !fi.storeTermVector)
return;
TermVectorsReader termVectorsReader = getTermVectorsReader();
if (termVectorsReader == null) {
return;
}
termVectorsReader.get(docNumber, field, mapper);
}
public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
ensureOpen();
TermVectorsReader termVectorsReader = getTermVectorsReader();
if (termVectorsReader == null)
return;
termVectorsReader.get(docNumber, mapper);
}
/** Return an array of term frequency vectors for the specified document.
* The array contains a vector for each vectorized field in the document.
* Each vector vector contains term numbers and frequencies for all terms
* in a given vectorized field.
* If no such fields existed, the method returns null.
* @throws IOException
*/
public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
ensureOpen();
TermVectorsReader termVectorsReader = getTermVectorsReader();
if (termVectorsReader == null)
return null;
return termVectorsReader.get(docNumber);
}
/**
* Return the name of the segment this reader is reading.
*/
public String getSegmentName() {
return core.segment;
}
/**
* Return the SegmentInfo of the segment this reader is reading.
*/
SegmentInfo getSegmentInfo() {
return si;
}
void setSegmentInfo(SegmentInfo info) {
si = info;
}
void startCommit() {
rollbackHasChanges = hasChanges;
rollbackDeletedDocsDirty = deletedDocsDirty;
rollbackNormsDirty = normsDirty;
rollbackPendingDeleteCount = pendingDeleteCount;
Iterator it = norms.values().iterator();
while (it.hasNext()) {
Norm norm = (Norm) it.next();
norm.rollbackDirty = norm.dirty;
}
}
void rollbackCommit() {
hasChanges = rollbackHasChanges;
deletedDocsDirty = rollbackDeletedDocsDirty;
normsDirty = rollbackNormsDirty;
pendingDeleteCount = rollbackPendingDeleteCount;
Iterator it = norms.values().iterator();
while (it.hasNext()) {
Norm norm = (Norm) it.next();
norm.dirty = norm.rollbackDirty;
}
}
/** Returns the directory this index resides in. */
public Directory directory() {
// Don't ensureOpen here -- in certain cases, when a
// cloned/reopened reader needs to commit, it may call
// this method on the closed original reader
return core.dir;
}
// This is necessary so that cloned SegmentReaders (which
// share the underlying postings data) will map to the
// same entry in the FieldCache. See LUCENE-1579.
public final Object getFieldCacheKey() {
return core.freqStream;
}
public Object getDeletesCacheKey() {
return deletedDocs;
}
public long getUniqueTermCount() {
return core.getTermsReader().size();
}
/**
* Lotsa tests did hacks like:<br/>
* SegmentReader reader = (SegmentReader) IndexReader.open(dir);<br/>
* They broke. This method serves as a hack to keep hacks working
*/
static SegmentReader getOnlySegmentReader(Directory dir) throws IOException {
return getOnlySegmentReader(IndexReader.open(dir));
}
static SegmentReader getOnlySegmentReader(IndexReader reader) {
if (reader instanceof SegmentReader)
return (SegmentReader) reader;
if (reader instanceof DirectoryReader) {
IndexReader[] subReaders = reader.getSequentialSubReaders();
if (subReaders.length != 1)
throw new IllegalArgumentException(reader + " has " + subReaders.length + " segments instead of exactly one");
return (SegmentReader) subReaders[0];
}
throw new IllegalArgumentException(reader + " is not a SegmentReader or a single-segment DirectoryReader");
}
public int getTermInfosIndexDivisor() {
return core.termsIndexDivisor;
}
}