package org.apache.lucene.index; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.BitVector; import org.apache.lucene.util.CloseableThreadLocal; import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close /** @version $Id */ /** * <p><b>NOTE:</b> This API is new and still experimental * (subject to change suddenly in the next release)</p> */ public class SegmentReader extends IndexReader implements Cloneable { protected boolean readOnly; private SegmentInfo si; private int readBufferSize; CloseableThreadLocal fieldsReaderLocal = new FieldsReaderLocal(); CloseableThreadLocal termVectorsLocal = new CloseableThreadLocal(); BitVector deletedDocs = null; Ref deletedDocsRef = null; private boolean deletedDocsDirty = false; private boolean normsDirty = false; private int pendingDeleteCount; private boolean rollbackHasChanges = false; private boolean rollbackDeletedDocsDirty = false; private boolean rollbackNormsDirty = false; private int rollbackPendingDeleteCount; // optionally used for the .nrm file shared by multiple norms private IndexInput singleNormStream; private Ref singleNormRef; CoreReaders core; // Holds core readers that are shared (unchanged) when // SegmentReader is cloned or reopened static final class CoreReaders { // Counts how many other reader share the core objects // (freqStream, proxStream, tis, etc.) of this reader; // when coreRef drops to 0, these core objects may be // closed. A given instance of SegmentReader may be // closed, even those it shares core objects with other // SegmentReaders: private final Ref ref = new Ref(); final String segment; final FieldInfos fieldInfos; final IndexInput freqStream; final IndexInput proxStream; final TermInfosReader tisNoIndex; final Directory dir; final Directory cfsDir; final int readBufferSize; final int termsIndexDivisor; private final SegmentReader origInstance; TermInfosReader tis; FieldsReader fieldsReaderOrig; TermVectorsReader termVectorsReaderOrig; CompoundFileReader cfsReader; CompoundFileReader storeCFSReader; CoreReaders(SegmentReader origInstance, Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor) throws IOException { segment = si.name; this.readBufferSize = readBufferSize; this.dir = dir; boolean success = false; try { Directory dir0 = dir; if (si.getUseCompoundFile()) { cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); dir0 = cfsReader; } cfsDir = dir0; fieldInfos = new FieldInfos(cfsDir, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION); this.termsIndexDivisor = termsIndexDivisor; TermInfosReader reader = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor); if (termsIndexDivisor == -1) { tisNoIndex = reader; } else { tis = reader; tisNoIndex = null; } // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.openInput(segment + "." + IndexFileNames.FREQ_EXTENSION, readBufferSize); if (fieldInfos.hasProx()) { proxStream = cfsDir.openInput(segment + "." + IndexFileNames.PROX_EXTENSION, readBufferSize); } else { proxStream = null; } success = true; } finally { if (!success) { decRef(); } } // Must assign this at the end -- if we hit an // exception above core, we don't want to attempt to // purge the FieldCache (will hit NPE because core is // not assigned yet). this.origInstance = origInstance; } synchronized TermVectorsReader getTermVectorsReaderOrig() { return termVectorsReaderOrig; } synchronized FieldsReader getFieldsReaderOrig() { return fieldsReaderOrig; } synchronized void incRef() { ref.incRef(); } synchronized Directory getCFSReader() { return cfsReader; } synchronized TermInfosReader getTermsReader() { if (tis != null) { return tis; } else { return tisNoIndex; } } synchronized boolean termsIndexIsLoaded() { return tis != null; } // NOTE: only called from IndexWriter when a near // real-time reader is opened, or applyDeletes is run, // sharing a segment that's still being merged. This // method is not fully thread safe, and relies on the // synchronization in IndexWriter synchronized void loadTermsIndex(SegmentInfo si, int termsIndexDivisor) throws IOException { if (tis == null) { Directory dir0; if (si.getUseCompoundFile()) { // In some cases, we were originally opened when CFS // was not used, but then we are asked to open the // terms reader with index, the segment has switched // to CFS if (cfsReader == null) { cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); } dir0 = cfsReader; } else { dir0 = dir; } tis = new TermInfosReader(dir0, segment, fieldInfos, readBufferSize, termsIndexDivisor); } } synchronized void decRef() throws IOException { if (ref.decRef() == 0) { // close everything, nothing is shared anymore with other readers if (tis != null) { tis.close(); // null so if an app hangs on to us we still free most ram tis = null; } if (tisNoIndex != null) { tisNoIndex.close(); } if (freqStream != null) { freqStream.close(); } if (proxStream != null) { proxStream.close(); } if (termVectorsReaderOrig != null) { termVectorsReaderOrig.close(); } if (fieldsReaderOrig != null) { fieldsReaderOrig.close(); } if (cfsReader != null) { cfsReader.close(); } if (storeCFSReader != null) { storeCFSReader.close(); } // Force FieldCache to evict our entries at this point if (origInstance != null) { FieldCache.DEFAULT.purge(origInstance); } } } synchronized void openDocStores(SegmentInfo si) throws IOException { assert si.name.equals(segment); if (fieldsReaderOrig == null) { final Directory storeDir; if (si.getDocStoreOffset() != -1) { if (si.getDocStoreIsCompoundFile()) { assert storeCFSReader == null; storeCFSReader = new CompoundFileReader(dir, si.getDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); storeDir = storeCFSReader; assert storeDir != null; } else { storeDir = dir; assert storeDir != null; } } else if (si.getUseCompoundFile()) { // In some cases, we were originally opened when CFS // was not used, but then we are asked to open doc // stores after the segment has switched to CFS if (cfsReader == null) { cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); } storeDir = cfsReader; assert storeDir != null; } else { storeDir = dir; assert storeDir != null; } final String storesSegment; if (si.getDocStoreOffset() != -1) { storesSegment = si.getDocStoreSegment(); } else { storesSegment = segment; } fieldsReaderOrig = new FieldsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.getDocStoreOffset(), si.docCount); // Verify two sources of "maxDoc" agree: if (si.getDocStoreOffset() == -1 && fieldsReaderOrig.size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + fieldsReaderOrig.size() + " but segmentInfo shows " + si.docCount); } if (fieldInfos.hasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.getDocStoreOffset(), si.docCount); } } } } /** * Sets the initial value */ private class FieldsReaderLocal extends CloseableThreadLocal { protected Object initialValue() { return core.getFieldsReaderOrig().clone(); } } static class Ref { private int refCount = 1; public String toString() { return "refcount: "+refCount; } public synchronized int refCount() { return refCount; } public synchronized int incRef() { assert refCount > 0; refCount++; return refCount; } public synchronized int decRef() { assert refCount > 0; refCount--; return refCount; } } /** * Byte[] referencing is used because a new norm object needs * to be created for each clone, and the byte array is all * that is needed for sharing between cloned readers. The * current norm referencing is for sharing between readers * whereas the byte[] referencing is for copy on write which * is independent of reader references (i.e. incRef, decRef). */ final class Norm implements Cloneable { private int refCount = 1; // If this instance is a clone, the originalNorm // references the Norm that has a real open IndexInput: private Norm origNorm; private IndexInput in; private long normSeek; // null until bytes is set private Ref bytesRef; private byte[] bytes; private boolean dirty; private int number; private boolean rollbackDirty; public Norm(IndexInput in, int number, long normSeek) { this.in = in; this.number = number; this.normSeek = normSeek; } public synchronized void incRef() { assert refCount > 0 && (origNorm == null || origNorm.refCount > 0); refCount++; } private void closeInput() throws IOException { if (in != null) { if (in != singleNormStream) { // It's private to us -- just close it in.close(); } else { // We are sharing this with others -- decRef and // maybe close the shared norm stream if (singleNormRef.decRef() == 0) { singleNormStream.close(); singleNormStream = null; } } in = null; } } public synchronized void decRef() throws IOException { assert refCount > 0 && (origNorm == null || origNorm.refCount > 0); if (--refCount == 0) { if (origNorm != null) { origNorm.decRef(); origNorm = null; } else { closeInput(); } if (bytes != null) { assert bytesRef != null; bytesRef.decRef(); bytes = null; bytesRef = null; } else { assert bytesRef == null; } } } // Load bytes but do not cache them if they were not // already cached public synchronized void bytes(byte[] bytesOut, int offset, int len) throws IOException { assert refCount > 0 && (origNorm == null || origNorm.refCount > 0); if (bytes != null) { // Already cached -- copy from cache: assert len <= maxDoc(); System.arraycopy(bytes, 0, bytesOut, offset, len); } else { // Not cached if (origNorm != null) { // Ask origNorm to load origNorm.bytes(bytesOut, offset, len); } else { // We are orig -- read ourselves from disk: synchronized(in) { in.seek(normSeek); in.readBytes(bytesOut, offset, len, false); } } } } // Load & cache full bytes array. Returns bytes. public synchronized byte[] bytes() throws IOException { assert refCount > 0 && (origNorm == null || origNorm.refCount > 0); if (bytes == null) { // value not yet read assert bytesRef == null; if (origNorm != null) { // Ask origNorm to load so that for a series of // reopened readers we share a single read-only // byte[] bytes = origNorm.bytes(); bytesRef = origNorm.bytesRef; bytesRef.incRef(); // Once we've loaded the bytes we no longer need // origNorm: origNorm.decRef(); origNorm = null; } else { // We are the origNorm, so load the bytes for real // ourself: final int count = maxDoc(); bytes = new byte[count]; // Since we are orig, in must not be null assert in != null; // Read from disk. synchronized(in) { in.seek(normSeek); in.readBytes(bytes, 0, count, false); } bytesRef = new Ref(); closeInput(); } } return bytes; } // Only for testing Ref bytesRef() { return bytesRef; } // Called if we intend to change a norm value. We make a // private copy of bytes if it's shared with others: public synchronized byte[] copyOnWrite() throws IOException { assert refCount > 0 && (origNorm == null || origNorm.refCount > 0); bytes(); assert bytes != null; assert bytesRef != null; if (bytesRef.refCount() > 1) { // I cannot be the origNorm for another norm // instance if I'm being changed. Ie, only the // "head Norm" can be changed: assert refCount == 1; final Ref oldRef = bytesRef; bytes = cloneNormBytes(bytes); bytesRef = new Ref(); oldRef.decRef(); } dirty = true; return bytes; } // Returns a copy of this Norm instance that shares // IndexInput & bytes with the original one public synchronized Object clone() { assert refCount > 0 && (origNorm == null || origNorm.refCount > 0); Norm clone; try { clone = (Norm) super.clone(); } catch (CloneNotSupportedException cnse) { // Cannot happen throw new RuntimeException("unexpected CloneNotSupportedException", cnse); } clone.refCount = 1; if (bytes != null) { assert bytesRef != null; assert origNorm == null; // Clone holds a reference to my bytes: clone.bytesRef.incRef(); } else { assert bytesRef == null; if (origNorm == null) { // I become the origNorm for the clone: clone.origNorm = this; } clone.origNorm.incRef(); } // Only the origNorm will actually readBytes from in: clone.in = null; return clone; } // Flush all pending changes to the next generation // separate norms file. public void reWrite(SegmentInfo si) throws IOException { assert refCount > 0 && (origNorm == null || origNorm.refCount > 0): "refCount=" + refCount + " origNorm=" + origNorm; // NOTE: norms are re-written in regular directory, not cfs si.advanceNormGen(this.number); IndexOutput out = directory().createOutput(si.getNormFileName(this.number)); try { out.writeBytes(bytes, maxDoc()); } finally { out.close(); } this.dirty = false; } } Map norms = new HashMap(); /** The class which implements SegmentReader. */ // @deprecated (LUCENE-1677) private static Class IMPL; static { try { String name = System.getProperty("org.apache.lucene.SegmentReader.class", SegmentReader.class.getName()); IMPL = Class.forName(name); } catch (ClassNotFoundException e) { throw new RuntimeException("cannot load SegmentReader class: " + e, e); } catch (SecurityException se) { try { IMPL = Class.forName(SegmentReader.class.getName()); } catch (ClassNotFoundException e) { throw new RuntimeException("cannot load default SegmentReader class: " + e, e); } } } // @deprecated (LUCENE-1677) private static Class READONLY_IMPL; static { try { String name = System.getProperty("org.apache.lucene.ReadOnlySegmentReader.class", ReadOnlySegmentReader.class.getName()); READONLY_IMPL = Class.forName(name); } catch (ClassNotFoundException e) { throw new RuntimeException("cannot load ReadOnlySegmentReader class: " + e, e); } catch (SecurityException se) { try { READONLY_IMPL = Class.forName(ReadOnlySegmentReader.class.getName()); } catch (ClassNotFoundException e) { throw new RuntimeException("cannot load default ReadOnlySegmentReader class: " + e, e); } } } /** * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error * @deprecated */ public static SegmentReader get(SegmentInfo si) throws CorruptIndexException, IOException { return get(false, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); } /** * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static SegmentReader get(boolean readOnly, SegmentInfo si, int termInfosIndexDivisor) throws CorruptIndexException, IOException { return get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, termInfosIndexDivisor); } /** * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error * @deprecated */ static SegmentReader get(SegmentInfo si, int readBufferSize, boolean doOpenStores, int termInfosIndexDivisor) throws CorruptIndexException, IOException { return get(false, si.dir, si, readBufferSize, doOpenStores, termInfosIndexDivisor); } /** * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static SegmentReader get(boolean readOnly, Directory dir, SegmentInfo si, int readBufferSize, boolean doOpenStores, int termInfosIndexDivisor) throws CorruptIndexException, IOException { SegmentReader instance; try { if (readOnly) instance = (SegmentReader)READONLY_IMPL.newInstance(); else instance = (SegmentReader)IMPL.newInstance(); } catch (Exception e) { throw new RuntimeException("cannot load SegmentReader class: " + e, e); } instance.readOnly = readOnly; instance.si = si; instance.readBufferSize = readBufferSize; boolean success = false; try { instance.core = new CoreReaders(instance, dir, si, readBufferSize, termInfosIndexDivisor); if (doOpenStores) { instance.core.openDocStores(si); } instance.loadDeletedDocs(); instance.openNorms(instance.core.cfsDir, readBufferSize); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { instance.doClose(); } } return instance; } void openDocStores() throws IOException { core.openDocStores(si); } private boolean checkDeletedCounts() throws IOException { final int recomputedCount = deletedDocs.getRecomputedCount(); assert deletedDocs.count() == recomputedCount : "deleted count=" + deletedDocs.count() + " vs recomputed count=" + recomputedCount; assert si.getDelCount() == recomputedCount : "delete count mismatch: info=" + si.getDelCount() + " vs BitVector=" + recomputedCount; // Verify # deletes does not exceed maxDoc for this // segment: assert si.getDelCount() <= maxDoc() : "delete count mismatch: " + recomputedCount + ") exceeds max doc (" + maxDoc() + ") for segment " + si.name; return true; } private void loadDeletedDocs() throws IOException { // NOTE: the bitvector is stored using the regular directory, not cfs if (hasDeletions(si)) { deletedDocs = new BitVector(directory(), si.getDelFileName()); deletedDocsRef = new Ref(); assert checkDeletedCounts(); } else assert si.getDelCount() == 0; } /** * Clones the norm bytes. May be overridden by subclasses. New and experimental. * @param bytes Byte array to clone * @return New BitVector */ protected byte[] cloneNormBytes(byte[] bytes) { byte[] cloneBytes = new byte[bytes.length]; System.arraycopy(bytes, 0, cloneBytes, 0, bytes.length); return cloneBytes; } /** * Clones the deleteDocs BitVector. May be overridden by subclasses. New and experimental. * @param bv BitVector to clone * @return New BitVector */ protected BitVector cloneDeletedDocs(BitVector bv) { return (BitVector)bv.clone(); } public final synchronized Object clone() { try { return clone(readOnly); // Preserve current readOnly } catch (Exception ex) { throw new RuntimeException(ex); } } public final synchronized IndexReader clone(boolean openReadOnly) throws CorruptIndexException, IOException { return reopenSegment(si, true, openReadOnly); } synchronized SegmentReader reopenSegment(SegmentInfo si, boolean doClone, boolean openReadOnly) throws CorruptIndexException, IOException { boolean deletionsUpToDate = (this.si.hasDeletions() == si.hasDeletions()) && (!si.hasDeletions() || this.si.getDelFileName().equals(si.getDelFileName())); boolean normsUpToDate = true; boolean[] fieldNormsChanged = new boolean[core.fieldInfos.size()]; final int fieldCount = core.fieldInfos.size(); for (int i = 0; i < fieldCount; i++) { if (!this.si.getNormFileName(i).equals(si.getNormFileName(i))) { normsUpToDate = false; fieldNormsChanged[i] = true; } } // if we're cloning we need to run through the reopenSegment logic // also if both old and new readers aren't readonly, we clone to avoid sharing modifications if (normsUpToDate && deletionsUpToDate && !doClone && openReadOnly && readOnly) { return this; } // When cloning, the incoming SegmentInfos should not // have any changes in it: assert !doClone || (normsUpToDate && deletionsUpToDate); // clone reader SegmentReader clone; try { if (openReadOnly) clone = (SegmentReader) READONLY_IMPL.newInstance(); else clone = (SegmentReader) IMPL.newInstance(); } catch (Exception e) { throw new RuntimeException("cannot load SegmentReader class: " + e, e); } boolean success = false; try { core.incRef(); clone.core = core; clone.readOnly = openReadOnly; clone.si = si; clone.readBufferSize = readBufferSize; if (!openReadOnly && hasChanges) { // My pending changes transfer to the new reader clone.pendingDeleteCount = pendingDeleteCount; clone.deletedDocsDirty = deletedDocsDirty; clone.normsDirty = normsDirty; clone.hasChanges = hasChanges; hasChanges = false; } if (doClone) { if (deletedDocs != null) { deletedDocsRef.incRef(); clone.deletedDocs = deletedDocs; clone.deletedDocsRef = deletedDocsRef; } } else { if (!deletionsUpToDate) { // load deleted docs assert clone.deletedDocs == null; clone.loadDeletedDocs(); } else if (deletedDocs != null) { deletedDocsRef.incRef(); clone.deletedDocs = deletedDocs; clone.deletedDocsRef = deletedDocsRef; } } clone.setDisableFakeNorms(getDisableFakeNorms()); clone.norms = new HashMap(); // Clone norms for (int i = 0; i < fieldNormsChanged.length; i++) { // Clone unchanged norms to the cloned reader if (doClone || !fieldNormsChanged[i]) { final String curField = core.fieldInfos.fieldInfo(i).name; Norm norm = (Norm) this.norms.get(curField); if (norm != null) clone.norms.put(curField, norm.clone()); } } // If we are not cloning, then this will open anew // any norms that have changed: clone.openNorms(si.getUseCompoundFile() ? core.getCFSReader() : directory(), readBufferSize); success = true; } finally { if (!success) { // An exception occurred during reopen, we have to decRef the norms // that we incRef'ed already and close singleNormsStream and FieldsReader clone.decRef(); } } return clone; } /** @deprecated */ protected void doCommit() throws IOException { doCommit(null); } protected void doCommit(Map commitUserData) throws IOException { if (hasChanges) { if (deletedDocsDirty) { // re-write deleted si.advanceDelGen(); // We can write directly to the actual name (vs to a // .tmp & renaming it) because the file is not live // until segments file is written: deletedDocs.write(directory(), si.getDelFileName()); si.setDelCount(si.getDelCount()+pendingDeleteCount); pendingDeleteCount = 0; assert deletedDocs.count() == si.getDelCount(): "delete count mismatch during commit: info=" + si.getDelCount() + " vs BitVector=" + deletedDocs.count(); } else { assert pendingDeleteCount == 0; } if (normsDirty) { // re-write norms si.setNumFields(core.fieldInfos.size()); Iterator it = norms.values().iterator(); while (it.hasNext()) { Norm norm = (Norm) it.next(); if (norm.dirty) { norm.reWrite(si); } } } deletedDocsDirty = false; normsDirty = false; hasChanges = false; } } FieldsReader getFieldsReader() { return (FieldsReader) fieldsReaderLocal.get(); } protected void doClose() throws IOException { termVectorsLocal.close(); fieldsReaderLocal.close(); if (deletedDocs != null) { deletedDocsRef.decRef(); // null so if an app hangs on to us we still free most ram deletedDocs = null; } Iterator it = norms.values().iterator(); while (it.hasNext()) { ((Norm) it.next()).decRef(); } if (core != null) { core.decRef(); } } static boolean hasDeletions(SegmentInfo si) throws IOException { // Don't call ensureOpen() here (it could affect performance) return si.hasDeletions(); } public boolean hasDeletions() { // Don't call ensureOpen() here (it could affect performance) return deletedDocs != null; } static boolean usesCompoundFile(SegmentInfo si) throws IOException { return si.getUseCompoundFile(); } static boolean hasSeparateNorms(SegmentInfo si) throws IOException { return si.hasSeparateNorms(); } protected void doDelete(int docNum) { if (deletedDocs == null) { deletedDocs = new BitVector(maxDoc()); deletedDocsRef = new Ref(); } // there is more than 1 SegmentReader with a reference to this // deletedDocs BitVector so decRef the current deletedDocsRef, // clone the BitVector, create a new deletedDocsRef if (deletedDocsRef.refCount() > 1) { Ref oldRef = deletedDocsRef; deletedDocs = cloneDeletedDocs(deletedDocs); deletedDocsRef = new Ref(); oldRef.decRef(); } deletedDocsDirty = true; if (!deletedDocs.getAndSet(docNum)) pendingDeleteCount++; } protected void doUndeleteAll() { deletedDocsDirty = false; if (deletedDocs != null) { assert deletedDocsRef != null; deletedDocsRef.decRef(); deletedDocs = null; deletedDocsRef = null; pendingDeleteCount = 0; si.clearDelGen(); si.setDelCount(0); } else { assert deletedDocsRef == null; assert pendingDeleteCount == 0; } } List files() throws IOException { return new ArrayList(si.files()); } public TermEnum terms() { ensureOpen(); return core.getTermsReader().terms(); } public TermEnum terms(Term t) throws IOException { ensureOpen(); return core.getTermsReader().terms(t); } FieldInfos fieldInfos() { return core.fieldInfos; } public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { ensureOpen(); return getFieldsReader().doc(n, fieldSelector); } public synchronized boolean isDeleted(int n) { return (deletedDocs != null && deletedDocs.get(n)); } public TermDocs termDocs(Term term) throws IOException { if (term == null) { return new AllTermDocs(this); } else { return super.termDocs(term); } } public TermDocs termDocs() throws IOException { ensureOpen(); return new SegmentTermDocs(this); } public TermPositions termPositions() throws IOException { ensureOpen(); return new SegmentTermPositions(this); } public int docFreq(Term t) throws IOException { ensureOpen(); TermInfo ti = core.getTermsReader().get(t); if (ti != null) return ti.docFreq; else return 0; } public int numDocs() { // Don't call ensureOpen() here (it could affect performance) int n = maxDoc(); if (deletedDocs != null) n -= deletedDocs.count(); return n; } public int maxDoc() { // Don't call ensureOpen() here (it could affect performance) return si.docCount; } /** * @see IndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption) */ public Collection getFieldNames(IndexReader.FieldOption fieldOption) { ensureOpen(); Set fieldSet = new HashSet(); for (int i = 0; i < core.fieldInfos.size(); i++) { FieldInfo fi = core.fieldInfos.fieldInfo(i); if (fieldOption == IndexReader.FieldOption.ALL) { fieldSet.add(fi.name); } else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) { fieldSet.add(fi.name); } else if (fi.omitTermFreqAndPositions && fieldOption == IndexReader.FieldOption.OMIT_TERM_FREQ_AND_POSITIONS) { fieldSet.add(fi.name); } else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) { fieldSet.add(fi.name); } else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED) { fieldSet.add(fi.name); } else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) { fieldSet.add(fi.name); } else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR) { fieldSet.add(fi.name); } else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) { fieldSet.add(fi.name); } else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) { fieldSet.add(fi.name); } else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) { fieldSet.add(fi.name); } else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) { fieldSet.add(fi.name); } } return fieldSet; } public synchronized boolean hasNorms(String field) { ensureOpen(); return norms.containsKey(field); } static byte[] createFakeNorms(int size) { byte[] ones = new byte[size]; Arrays.fill(ones, DefaultSimilarity.encodeNorm(1.0f)); return ones; } private byte[] ones; private byte[] fakeNorms() { assert !getDisableFakeNorms(); if (ones==null) ones=createFakeNorms(maxDoc()); return ones; } // can return null if norms aren't stored protected synchronized byte[] getNorms(String field) throws IOException { Norm norm = (Norm) norms.get(field); if (norm == null) return null; // not indexed, or norms not stored return norm.bytes(); } // returns fake norms if norms aren't available public synchronized byte[] norms(String field) throws IOException { ensureOpen(); byte[] bytes = getNorms(field); if (bytes==null && !getDisableFakeNorms()) bytes=fakeNorms(); return bytes; } protected void doSetNorm(int doc, String field, byte value) throws IOException { Norm norm = (Norm) norms.get(field); if (norm == null) // not an indexed field return; normsDirty = true; norm.copyOnWrite()[doc] = value; // set the value } /** Read norms into a pre-allocated array. */ public synchronized void norms(String field, byte[] bytes, int offset) throws IOException { ensureOpen(); Norm norm = (Norm) norms.get(field); if (norm == null) { Arrays.fill(bytes, offset, bytes.length, DefaultSimilarity.encodeNorm(1.0f)); return; } norm.bytes(bytes, offset, maxDoc()); } private void openNorms(Directory cfsDir, int readBufferSize) throws IOException { long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now) int maxDoc = maxDoc(); for (int i = 0; i < core.fieldInfos.size(); i++) { FieldInfo fi = core.fieldInfos.fieldInfo(i); if (norms.containsKey(fi.name)) { // in case this SegmentReader is being re-opened, we might be able to // reuse some norm instances and skip loading them here continue; } if (fi.isIndexed && !fi.omitNorms) { Directory d = directory(); String fileName = si.getNormFileName(fi.number); if (!si.hasSeparateNorms(fi.number)) { d = cfsDir; } // singleNormFile means multiple norms share this file boolean singleNormFile = fileName.endsWith("." + IndexFileNames.NORMS_EXTENSION); IndexInput normInput = null; long normSeek; if (singleNormFile) { normSeek = nextNormSeek; if (singleNormStream == null) { singleNormStream = d.openInput(fileName, readBufferSize); singleNormRef = new Ref(); } else { singleNormRef.incRef(); } // All norms in the .nrm file can share a single IndexInput since // they are only used in a synchronized context. // If this were to change in the future, a clone could be done here. normInput = singleNormStream; } else { normSeek = 0; normInput = d.openInput(fileName); } norms.put(fi.name, new Norm(normInput, fi.number, normSeek)); nextNormSeek += maxDoc; // increment also if some norms are separate } } } boolean termsIndexLoaded() { return core.termsIndexIsLoaded(); } // NOTE: only called from IndexWriter when a near // real-time reader is opened, or applyDeletes is run, // sharing a segment that's still being merged. This // method is not thread safe, and relies on the // synchronization in IndexWriter void loadTermsIndex(int termsIndexDivisor) throws IOException { core.loadTermsIndex(si, termsIndexDivisor); } // for testing only boolean normsClosed() { if (singleNormStream != null) { return false; } Iterator it = norms.values().iterator(); while (it.hasNext()) { Norm norm = (Norm) it.next(); if (norm.refCount > 0) { return false; } } return true; } // for testing only boolean normsClosed(String field) { Norm norm = (Norm) norms.get(field); return norm.refCount == 0; } /** * Create a clone from the initial TermVectorsReader and store it in the ThreadLocal. * @return TermVectorsReader */ TermVectorsReader getTermVectorsReader() { TermVectorsReader tvReader = (TermVectorsReader) termVectorsLocal.get(); if (tvReader == null) { TermVectorsReader orig = core.getTermVectorsReaderOrig(); if (orig == null) { return null; } else { try { tvReader = (TermVectorsReader) orig.clone(); } catch (CloneNotSupportedException cnse) { return null; } } termVectorsLocal.set(tvReader); } return tvReader; } TermVectorsReader getTermVectorsReaderOrig() { return core.getTermVectorsReaderOrig(); } /** Return a term frequency vector for the specified document and field. The * vector returned contains term numbers and frequencies for all terms in * the specified field of this document, if the field had storeTermVector * flag set. If the flag was not set, the method returns null. * @throws IOException */ public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException { // Check if this field is invalid or has no stored term vector ensureOpen(); FieldInfo fi = core.fieldInfos.fieldInfo(field); if (fi == null || !fi.storeTermVector) return null; TermVectorsReader termVectorsReader = getTermVectorsReader(); if (termVectorsReader == null) return null; return termVectorsReader.get(docNumber, field); } public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException { ensureOpen(); FieldInfo fi = core.fieldInfos.fieldInfo(field); if (fi == null || !fi.storeTermVector) return; TermVectorsReader termVectorsReader = getTermVectorsReader(); if (termVectorsReader == null) { return; } termVectorsReader.get(docNumber, field, mapper); } public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException { ensureOpen(); TermVectorsReader termVectorsReader = getTermVectorsReader(); if (termVectorsReader == null) return; termVectorsReader.get(docNumber, mapper); } /** Return an array of term frequency vectors for the specified document. * The array contains a vector for each vectorized field in the document. * Each vector vector contains term numbers and frequencies for all terms * in a given vectorized field. * If no such fields existed, the method returns null. * @throws IOException */ public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException { ensureOpen(); TermVectorsReader termVectorsReader = getTermVectorsReader(); if (termVectorsReader == null) return null; return termVectorsReader.get(docNumber); } /** * Return the name of the segment this reader is reading. */ public String getSegmentName() { return core.segment; } /** * Return the SegmentInfo of the segment this reader is reading. */ SegmentInfo getSegmentInfo() { return si; } void setSegmentInfo(SegmentInfo info) { si = info; } void startCommit() { rollbackHasChanges = hasChanges; rollbackDeletedDocsDirty = deletedDocsDirty; rollbackNormsDirty = normsDirty; rollbackPendingDeleteCount = pendingDeleteCount; Iterator it = norms.values().iterator(); while (it.hasNext()) { Norm norm = (Norm) it.next(); norm.rollbackDirty = norm.dirty; } } void rollbackCommit() { hasChanges = rollbackHasChanges; deletedDocsDirty = rollbackDeletedDocsDirty; normsDirty = rollbackNormsDirty; pendingDeleteCount = rollbackPendingDeleteCount; Iterator it = norms.values().iterator(); while (it.hasNext()) { Norm norm = (Norm) it.next(); norm.dirty = norm.rollbackDirty; } } /** Returns the directory this index resides in. */ public Directory directory() { // Don't ensureOpen here -- in certain cases, when a // cloned/reopened reader needs to commit, it may call // this method on the closed original reader return core.dir; } // This is necessary so that cloned SegmentReaders (which // share the underlying postings data) will map to the // same entry in the FieldCache. See LUCENE-1579. public final Object getFieldCacheKey() { return core.freqStream; } public Object getDeletesCacheKey() { return deletedDocs; } public long getUniqueTermCount() { return core.getTermsReader().size(); } /** * Lotsa tests did hacks like:<br/> * SegmentReader reader = (SegmentReader) IndexReader.open(dir);<br/> * They broke. This method serves as a hack to keep hacks working */ static SegmentReader getOnlySegmentReader(Directory dir) throws IOException { return getOnlySegmentReader(IndexReader.open(dir)); } static SegmentReader getOnlySegmentReader(IndexReader reader) { if (reader instanceof SegmentReader) return (SegmentReader) reader; if (reader instanceof DirectoryReader) { IndexReader[] subReaders = reader.getSequentialSubReaders(); if (subReaders.length != 1) throw new IllegalArgumentException(reader + " has " + subReaders.length + " segments instead of exactly one"); return (SegmentReader) subReaders[0]; } throw new IllegalArgumentException(reader + " is not a SegmentReader or a single-segment DirectoryReader"); } public int getTermInfosIndexDivisor() { return core.termsIndexDivisor; } }