/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Linq; using Lucene.Net.Support; using Lucene.Net.Util; using Document = Lucene.Net.Documents.Document; using FieldSelector = Lucene.Net.Documents.FieldSelector; using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput; using Directory = Lucene.Net.Store.Directory; using IndexInput = Lucene.Net.Store.IndexInput; using IndexOutput = Lucene.Net.Store.IndexOutput; using BitVector = Lucene.Net.Util.BitVector; using DefaultSimilarity = Lucene.Net.Search.DefaultSimilarity; namespace Lucene.Net.Index { /// $Id /// ///

NOTE: This API is new and still experimental /// (subject to change suddenly in the next release)

///

public class SegmentReader : IndexReader { public SegmentReader() { InitBlock(); } private void InitBlock() { fieldsReaderLocal = new FieldsReaderLocal(this); } protected internal bool readOnly; private SegmentInfo si; private int readBufferSize; internal CloseableThreadLocal fieldsReaderLocal; internal CloseableThreadLocal termVectorsLocal = new CloseableThreadLocal(); internal BitVector deletedDocs = null; internal Ref deletedDocsRef = null; private bool deletedDocsDirty = false; private bool normsDirty = false; private int pendingDeleteCount; private bool rollbackHasChanges = false; private bool rollbackDeletedDocsDirty = false; private bool rollbackNormsDirty = false; private SegmentInfo rollbackSegmentInfo; private int rollbackPendingDeleteCount; // optionally used for the .nrm file shared by multiple norms private IndexInput singleNormStream; private Ref singleNormRef; internal CoreReaders core; // Holds core readers that are shared (unchanged) when // SegmentReader is cloned or reopened public /*internal*/ sealed class CoreReaders { // Counts how many other reader share the core objects // (freqStream, proxStream, tis, etc.) of this reader; // when coreRef drops to 0, these core objects may be // closed. A given insance of SegmentReader may be // closed, even those it shares core objects with other // SegmentReaders: private readonly Ref ref_Renamed = new Ref(); internal System.String segment; internal FieldInfos fieldInfos; internal IndexInput freqStream; internal IndexInput proxStream; internal TermInfosReader tisNoIndex; internal Directory dir; internal Directory cfsDir; internal int readBufferSize; internal int termsIndexDivisor; internal SegmentReader origInstance; internal TermInfosReader tis; internal FieldsReader fieldsReaderOrig; internal TermVectorsReader termVectorsReaderOrig; internal CompoundFileReader cfsReader; internal CompoundFileReader storeCFSReader; internal CoreReaders(SegmentReader origInstance, Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor) { segment = si.name; this.readBufferSize = readBufferSize; this.dir = dir; bool success = false; try { Directory dir0 = dir; if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); dir0 = cfsReader; } cfsDir = dir0; fieldInfos = new FieldInfos(cfsDir, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION); this.termsIndexDivisor = termsIndexDivisor; var reader = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor); if (termsIndexDivisor == - 1) { tisNoIndex = reader; } else { tis = reader; tisNoIndex = null; } // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + "." + IndexFileNames.FREQ_EXTENSION, readBufferSize); proxStream = fieldInfos.HasProx() ? cfsDir.OpenInput(segment + "." + IndexFileNames.PROX_EXTENSION, readBufferSize) : null; success = true; } finally { if (!success) { DecRef(); } } // Must assign this at the end -- if we hit an // exception above core, we don't want to attempt to // purge the FieldCache (will hit NPE because core is // not assigned yet). this.origInstance = origInstance; } internal TermVectorsReader GetTermVectorsReaderOrig() { lock (this) { return termVectorsReaderOrig; } } internal FieldsReader GetFieldsReaderOrig() { lock (this) { return fieldsReaderOrig; } } internal void IncRef() { lock (this) { ref_Renamed.IncRef(); } } internal Directory GetCFSReader() { lock (this) { return cfsReader; } } internal TermInfosReader GetTermsReader() { lock (this) { if (tis != null) { return tis; } else { return tisNoIndex; } } } internal bool TermsIndexIsLoaded() { lock (this) { return tis != null; } } // NOTE: only called from IndexWriter when a near // real-time reader is opened, or applyDeletes is run, // sharing a segment that's still being merged. This // method is not fully thread safe, and relies on the // synchronization in IndexWriter internal void LoadTermsIndex(SegmentInfo si, int termsIndexDivisor) { lock (this) { if (tis == null) { Directory dir0; if (si.GetUseCompoundFile()) { // In some cases, we were originally opened when CFS // was not used, but then we are asked to open the // terms reader with index, the segment has switched // to CFS if (cfsReader == null) { cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); } dir0 = cfsReader; } else { dir0 = dir; } tis = new TermInfosReader(dir0, segment, fieldInfos, readBufferSize, termsIndexDivisor); } } } internal void DecRef() { lock (this) { if (ref_Renamed.DecRef() == 0) { // close everything, nothing is shared anymore with other readers if (tis != null) { tis.Dispose(); // null so if an app hangs on to us we still free most ram tis = null; } if (tisNoIndex != null) { tisNoIndex.Dispose(); } if (freqStream != null) { freqStream.Close(); } if (proxStream != null) { proxStream.Close(); } if (termVectorsReaderOrig != null) { termVectorsReaderOrig.Dispose(); } if (fieldsReaderOrig != null) { fieldsReaderOrig.Dispose(); } if (cfsReader != null) { cfsReader.Close(); } if (storeCFSReader != null) { storeCFSReader.Close(); } // Force FieldCache to evict our entries at this point if (origInstance != null) { Lucene.Net.Search.FieldCache_Fields.DEFAULT.Purge(origInstance); } } } } internal void OpenDocStores(SegmentInfo si) { lock (this) { System.Diagnostics.Debug.Assert(si.name.Equals(segment)); if (fieldsReaderOrig == null) { Directory storeDir; if (si.DocStoreOffset != - 1) { if (si.DocStoreIsCompoundFile) { System.Diagnostics.Debug.Assert(storeCFSReader == null); storeCFSReader = new CompoundFileReader(dir, si.DocStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); storeDir = storeCFSReader; System.Diagnostics.Debug.Assert(storeDir != null); } else { storeDir = dir; System.Diagnostics.Debug.Assert(storeDir != null); } } else if (si.GetUseCompoundFile()) { // In some cases, we were originally opened when CFS // was not used, but then we are asked to open doc // stores after the segment has switched to CFS if (cfsReader == null) { cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); } storeDir = cfsReader; System.Diagnostics.Debug.Assert(storeDir != null); } else { storeDir = dir; System.Diagnostics.Debug.Assert(storeDir != null); } string storesSegment = si.DocStoreOffset != - 1 ? si.DocStoreSegment : segment; fieldsReaderOrig = new FieldsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.DocStoreOffset, si.docCount); // Verify two sources of "maxDoc" agree: if (si.DocStoreOffset == - 1 && fieldsReaderOrig.Size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + fieldsReaderOrig.Size() + " but segmentInfo shows " + si.docCount); } if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.DocStoreOffset, si.docCount); } } } } public FieldInfos fieldInfos_ForNUnit { get { return fieldInfos; } } } /// Sets the initial value private class FieldsReaderLocal : CloseableThreadLocal { public FieldsReaderLocal(SegmentReader enclosingInstance) { InitBlock(enclosingInstance); } private void InitBlock(SegmentReader enclosingInstance) { this.enclosingInstance = enclosingInstance; } private SegmentReader enclosingInstance; public SegmentReader Enclosing_Instance { get { return enclosingInstance; } } public /*protected internal*/ override FieldsReader InitialValue() { return (FieldsReader) Enclosing_Instance.core.GetFieldsReaderOrig().Clone(); } } public /*internal*/ class Ref { private int refCount = 1; public override System.String ToString() { return "refcount: " + refCount; } public virtual int RefCount() { lock (this) { return refCount; } } public virtual int IncRef() { lock (this) { System.Diagnostics.Debug.Assert(refCount > 0); refCount++; return refCount; } } public virtual int DecRef() { lock (this) { System.Diagnostics.Debug.Assert(refCount > 0); refCount--; return refCount; } } } /// Byte[] referencing is used because a new norm object needs /// to be created for each clone, and the byte array is all /// that is needed for sharing between cloned readers. The /// current norm referencing is for sharing between readers /// whereas the byte[] referencing is for copy on write which /// is independent of reader references (i.e. incRef, decRef). /// public /*internal*/ sealed class Norm : System.ICloneable { private void InitBlock(SegmentReader enclosingInstance) { this.enclosingInstance = enclosingInstance; } private SegmentReader enclosingInstance; public SegmentReader Enclosing_Instance { get { return enclosingInstance; } } internal /*private*/ int refCount = 1; // If this instance is a clone, the originalNorm // references the Norm that has a real open IndexInput: private Norm origNorm; private IndexInput in_Renamed; private readonly long normSeek; // null until bytes is set private Ref bytesRef; internal /*private*/ byte[] bytes; internal /*private*/ bool dirty; internal /*private*/ int number; internal /*private*/ bool rollbackDirty; public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number, long normSeek) { InitBlock(enclosingInstance); this.in_Renamed = in_Renamed; this.number = number; this.normSeek = normSeek; } public void IncRef() { lock (this) { System.Diagnostics.Debug.Assert(refCount > 0 &&(origNorm == null || origNorm.refCount > 0)); refCount++; } } private void CloseInput() { if (in_Renamed != null) { if (in_Renamed != Enclosing_Instance.singleNormStream) { // It's private to us -- just close it in_Renamed.Dispose(); } else { // We are sharing this with others -- decRef and // maybe close the shared norm stream if (Enclosing_Instance.singleNormRef.DecRef() == 0) { Enclosing_Instance.singleNormStream.Dispose(); Enclosing_Instance.singleNormStream = null; } } in_Renamed = null; } } public void DecRef() { lock (this) { System.Diagnostics.Debug.Assert(refCount > 0 &&(origNorm == null || origNorm.refCount > 0)); if (--refCount == 0) { if (origNorm != null) { origNorm.DecRef(); origNorm = null; } else { CloseInput(); } if (bytes != null) { System.Diagnostics.Debug.Assert(bytesRef != null); bytesRef.DecRef(); bytes = null; bytesRef = null; } else { System.Diagnostics.Debug.Assert(bytesRef == null); } } } } // Load bytes but do not cache them if they were not // already cached public void Bytes(byte[] bytesOut, int offset, int len) { lock (this) { System.Diagnostics.Debug.Assert(refCount > 0 &&(origNorm == null || origNorm.refCount > 0)); if (bytes != null) { // Already cached -- copy from cache: System.Diagnostics.Debug.Assert(len <= Enclosing_Instance.MaxDoc); Array.Copy(bytes, 0, bytesOut, offset, len); } else { // Not cached if (origNorm != null) { // Ask origNorm to load origNorm.Bytes(bytesOut, offset, len); } else { // We are orig -- read ourselves from disk: lock (in_Renamed) { in_Renamed.Seek(normSeek); in_Renamed.ReadBytes(bytesOut, offset, len, false); } } } } } // Load & cache full bytes array. Returns bytes. public byte[] Bytes() { lock (this) { System.Diagnostics.Debug.Assert(refCount > 0 &&(origNorm == null || origNorm.refCount > 0)); if (bytes == null) { // value not yet read System.Diagnostics.Debug.Assert(bytesRef == null); if (origNorm != null) { // Ask origNorm to load so that for a series of // reopened readers we share a single read-only // byte[] bytes = origNorm.Bytes(); bytesRef = origNorm.bytesRef; bytesRef.IncRef(); // Once we've loaded the bytes we no longer need // origNorm: origNorm.DecRef(); origNorm = null; } else { // We are the origNorm, so load the bytes for real // ourself: int count = Enclosing_Instance.MaxDoc; bytes = new byte[count]; // Since we are orig, in must not be null System.Diagnostics.Debug.Assert(in_Renamed != null); // Read from disk. lock (in_Renamed) { in_Renamed.Seek(normSeek); in_Renamed.ReadBytes(bytes, 0, count, false); } bytesRef = new Ref(); CloseInput(); } } return bytes; } } // Only for testing public /*internal*/ Ref BytesRef() { return bytesRef; } // Called if we intend to change a norm value. We make a // private copy of bytes if it's shared with others: public byte[] CopyOnWrite() { lock (this) { System.Diagnostics.Debug.Assert(refCount > 0 &&(origNorm == null || origNorm.refCount > 0)); Bytes(); System.Diagnostics.Debug.Assert(bytes != null); System.Diagnostics.Debug.Assert(bytesRef != null); if (bytesRef.RefCount() > 1) { // I cannot be the origNorm for another norm // instance if I'm being changed. Ie, only the // "head Norm" can be changed: System.Diagnostics.Debug.Assert(refCount == 1); Ref oldRef = bytesRef; bytes = Enclosing_Instance.CloneNormBytes(bytes); bytesRef = new Ref(); oldRef.DecRef(); } dirty = true; return bytes; } } // Returns a copy of this Norm instance that shares // IndexInput & bytes with the original one public System.Object Clone() { lock (this) //LUCENENET-375 { System.Diagnostics.Debug.Assert(refCount > 0 && (origNorm == null || origNorm.refCount > 0)); Norm clone; try { clone = (Norm)base.MemberwiseClone(); } catch (System.Exception cnse) { // Cannot happen throw new System.SystemException("unexpected CloneNotSupportedException", cnse); } clone.refCount = 1; if (bytes != null) { System.Diagnostics.Debug.Assert(bytesRef != null); System.Diagnostics.Debug.Assert(origNorm == null); // Clone holds a reference to my bytes: clone.bytesRef.IncRef(); } else { System.Diagnostics.Debug.Assert(bytesRef == null); if (origNorm == null) { // I become the origNorm for the clone: clone.origNorm = this; } clone.origNorm.IncRef(); } // Only the origNorm will actually readBytes from in: clone.in_Renamed = null; return clone; } } // Flush all pending changes to the next generation // separate norms file. public void ReWrite(SegmentInfo si) { System.Diagnostics.Debug.Assert(refCount > 0 && (origNorm == null || origNorm.refCount > 0), "refCount=" + refCount + " origNorm=" + origNorm); // NOTE: norms are re-written in regular directory, not cfs si.AdvanceNormGen(this.number); string normFileName = si.GetNormFileName(this.number); IndexOutput @out = enclosingInstance.Directory().CreateOutput(normFileName); bool success = false; try { try { @out.WriteBytes(bytes, enclosingInstance.MaxDoc); } finally { @out.Close(); } success = true; } finally { if (!success) { try { enclosingInstance.Directory().DeleteFile(normFileName); } catch (Exception) { // suppress this so we keep throwing the // original exception } } } this.dirty = false; } } internal System.Collections.Generic.IDictionary norms = new HashMap(); /// CorruptIndexException if the index is corrupt /// IOException if there is a low-level IO error public static SegmentReader Get(bool readOnly, SegmentInfo si, int termInfosIndexDivisor) { return Get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, termInfosIndexDivisor); } /// CorruptIndexException if the index is corrupt /// IOException if there is a low-level IO error public static SegmentReader Get(bool readOnly, Directory dir, SegmentInfo si, int readBufferSize, bool doOpenStores, int termInfosIndexDivisor) { SegmentReader instance = readOnly ? new ReadOnlySegmentReader() : new SegmentReader(); instance.readOnly = readOnly; instance.si = si; instance.readBufferSize = readBufferSize; bool success = false; try { instance.core = new CoreReaders(instance, dir, si, readBufferSize, termInfosIndexDivisor); if (doOpenStores) { instance.core.OpenDocStores(si); } instance.LoadDeletedDocs(); instance.OpenNorms(instance.core.cfsDir, readBufferSize); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { instance.DoClose(); } } return instance; } internal virtual void OpenDocStores() { core.OpenDocStores(si); } private bool CheckDeletedCounts() { int recomputedCount = deletedDocs.GetRecomputedCount(); System.Diagnostics.Debug.Assert(deletedDocs.Count() == recomputedCount, "deleted count=" + deletedDocs.Count() + " vs recomputed count=" + recomputedCount); System.Diagnostics.Debug.Assert(si.GetDelCount() == recomputedCount, "delete count mismatch: info=" + si.GetDelCount() + " vs BitVector=" + recomputedCount); // Verify # deletes does not exceed maxDoc for this // segment: System.Diagnostics.Debug.Assert(si.GetDelCount() <= MaxDoc, "delete count mismatch: " + recomputedCount + ") exceeds max doc (" + MaxDoc + ") for segment " + si.name); return true; } private void LoadDeletedDocs() { // NOTE: the bitvector is stored using the regular directory, not cfs //if(HasDeletions(si)) if (si.HasDeletions()) { deletedDocs = new BitVector(Directory(), si.GetDelFileName()); deletedDocsRef = new Ref(); System.Diagnostics.Debug.Assert(CheckDeletedCounts()); } else System.Diagnostics.Debug.Assert(si.GetDelCount() == 0); } /// Clones the norm bytes. May be overridden by subclasses. New and experimental. /// Byte array to clone /// /// New BitVector /// protected internal virtual byte[] CloneNormBytes(byte[] bytes) { var cloneBytes = new byte[bytes.Length]; Array.Copy(bytes, 0, cloneBytes, 0, bytes.Length); return cloneBytes; } /// Clones the deleteDocs BitVector. May be overridden by subclasses. New and experimental. /// BitVector to clone /// /// New BitVector /// protected internal virtual BitVector CloneDeletedDocs(BitVector bv) { return (BitVector) bv.Clone(); } public override System.Object Clone() { lock (this) { try { return Clone(readOnly); // Preserve current readOnly } catch (System.Exception ex) { throw new System.SystemException(ex.Message, ex); } } } public override IndexReader Clone(bool openReadOnly) { lock (this) { return ReopenSegment(si, true, openReadOnly); } } internal virtual SegmentReader ReopenSegment(SegmentInfo si, bool doClone, bool openReadOnly) { lock (this) { bool deletionsUpToDate = (this.si.HasDeletions() == si.HasDeletions()) && (!si.HasDeletions() || this.si.GetDelFileName().Equals(si.GetDelFileName())); bool normsUpToDate = true; bool[] fieldNormsChanged = new bool[core.fieldInfos.Size()]; int fieldCount = core.fieldInfos.Size(); for (int i = 0; i < fieldCount; i++) { if (!this.si.GetNormFileName(i).Equals(si.GetNormFileName(i))) { normsUpToDate = false; fieldNormsChanged[i] = true; } } // if we're cloning we need to run through the reopenSegment logic // also if both old and new readers aren't readonly, we clone to avoid sharing modifications if (normsUpToDate && deletionsUpToDate && !doClone && openReadOnly && readOnly) { return this; } // When cloning, the incoming SegmentInfos should not // have any changes in it: System.Diagnostics.Debug.Assert(!doClone ||(normsUpToDate && deletionsUpToDate)); // clone reader SegmentReader clone = openReadOnly ? new ReadOnlySegmentReader() : new SegmentReader(); bool success = false; try { core.IncRef(); clone.core = core; clone.readOnly = openReadOnly; clone.si = si; clone.readBufferSize = readBufferSize; if (!openReadOnly && hasChanges) { // My pending changes transfer to the new reader clone.pendingDeleteCount = pendingDeleteCount; clone.deletedDocsDirty = deletedDocsDirty; clone.normsDirty = normsDirty; clone.hasChanges = hasChanges; hasChanges = false; } if (doClone) { if (deletedDocs != null) { deletedDocsRef.IncRef(); clone.deletedDocs = deletedDocs; clone.deletedDocsRef = deletedDocsRef; } } else { if (!deletionsUpToDate) { // load deleted docs System.Diagnostics.Debug.Assert(clone.deletedDocs == null); clone.LoadDeletedDocs(); } else if (deletedDocs != null) { deletedDocsRef.IncRef(); clone.deletedDocs = deletedDocs; clone.deletedDocsRef = deletedDocsRef; } } clone.norms = new HashMap(); // Clone norms for (int i = 0; i < fieldNormsChanged.Length; i++) { // Clone unchanged norms to the cloned reader if (doClone || !fieldNormsChanged[i]) { System.String curField = core.fieldInfos.FieldInfo(i).name; Norm norm = this.norms[curField]; if (norm != null) clone.norms[curField] = (Norm)norm.Clone(); } } // If we are not cloning, then this will open anew // any norms that have changed: clone.OpenNorms(si.GetUseCompoundFile()?core.GetCFSReader():Directory(), readBufferSize); success = true; } finally { if (!success) { // An exception occured during reopen, we have to decRef the norms // that we incRef'ed already and close singleNormsStream and FieldsReader clone.DecRef(); } } return clone; } } protected internal override void DoCommit(System.Collections.Generic.IDictionary commitUserData) { if (hasChanges) { StartCommit(); bool success = false; try { CommitChanges(commitUserData); success = true; } finally { if (!success) { RollbackCommit(); } } } } private void CommitChanges(System.Collections.Generic.IDictionary commitUserData) { if (deletedDocsDirty) { // re-write deleted si.AdvanceDelGen(); // We can write directly to the actual name (vs to a // .tmp & renaming it) because the file is not live // until segments file is written: string delFileName = si.GetDelFileName(); bool success = false; try { deletedDocs.Write(Directory(), delFileName); success = true; } finally { if (!success) { try { Directory().DeleteFile(delFileName); } catch (Exception) { // suppress this so we keep throwing the // original exception } } } si.SetDelCount(si.GetDelCount() + pendingDeleteCount); pendingDeleteCount = 0; System.Diagnostics.Debug.Assert(deletedDocs.Count() == si.GetDelCount(), "delete count mismatch during commit: info=" + si.GetDelCount() + " vs BitVector=" + deletedDocs.Count()); } else { System.Diagnostics.Debug.Assert(pendingDeleteCount == 0); } if (normsDirty) { // re-write norms si.SetNumFields(core.fieldInfos.Size()); foreach (Norm norm in norms.Values) { if (norm.dirty) { norm.ReWrite(si); } } } deletedDocsDirty = false; normsDirty = false; hasChanges = false; } internal virtual FieldsReader GetFieldsReader() { return fieldsReaderLocal.Get(); } protected internal override void DoClose() { termVectorsLocal.Close(); fieldsReaderLocal.Close(); if (deletedDocs != null) { deletedDocsRef.DecRef(); // null so if an app hangs on to us we still free most ram deletedDocs = null; } foreach(Norm norm in norms.Values) { norm.DecRef(); } if (core != null) { core.DecRef(); } } //internal static bool HasDeletions(SegmentInfo si) //{ // // Don't call ensureOpen() here (it could affect performance) // return si.HasDeletions(); //} public override bool HasDeletions { get { // Don't call ensureOpen() here (it could affect performance) return deletedDocs != null; } } internal static bool UsesCompoundFile(SegmentInfo si) { return si.GetUseCompoundFile(); } internal static bool HasSeparateNorms(SegmentInfo si) { return si.HasSeparateNorms(); } protected internal override void DoDelete(int docNum) { if (deletedDocs == null) { deletedDocs = new BitVector(MaxDoc); deletedDocsRef = new Ref(); } // there is more than 1 SegmentReader with a reference to this // deletedDocs BitVector so decRef the current deletedDocsRef, // clone the BitVector, create a new deletedDocsRef if (deletedDocsRef.RefCount() > 1) { Ref oldRef = deletedDocsRef; deletedDocs = CloneDeletedDocs(deletedDocs); deletedDocsRef = new Ref(); oldRef.DecRef(); } deletedDocsDirty = true; if (!deletedDocs.GetAndSet(docNum)) pendingDeleteCount++; } protected internal override void DoUndeleteAll() { deletedDocsDirty = false; if (deletedDocs != null) { System.Diagnostics.Debug.Assert(deletedDocsRef != null); deletedDocsRef.DecRef(); deletedDocs = null; deletedDocsRef = null; pendingDeleteCount = 0; si.ClearDelGen(); si.SetDelCount(0); } else { System.Diagnostics.Debug.Assert(deletedDocsRef == null); System.Diagnostics.Debug.Assert(pendingDeleteCount == 0); } } internal virtual System.Collections.Generic.IList Files() { return si.Files(); } public override TermEnum Terms() { EnsureOpen(); return core.GetTermsReader().Terms(); } public override TermEnum Terms(Term t) { EnsureOpen(); return core.GetTermsReader().Terms(t); } public /*internal*/ virtual FieldInfos FieldInfos() { return core.fieldInfos; } public override Document Document(int n, FieldSelector fieldSelector) { EnsureOpen(); return GetFieldsReader().Doc(n, fieldSelector); } public override bool IsDeleted(int n) { lock (this) { return (deletedDocs != null && deletedDocs.Get(n)); } } public override TermDocs TermDocs(Term term) { if (term == null) { return new AllTermDocs(this); } else { return base.TermDocs(term); } } public override TermDocs TermDocs() { EnsureOpen(); return new SegmentTermDocs(this); } public override TermPositions TermPositions() { EnsureOpen(); return new SegmentTermPositions(this); } public override int DocFreq(Term t) { EnsureOpen(); TermInfo ti = core.GetTermsReader().Get(t); if (ti != null) return ti.docFreq; else return 0; } public override int NumDocs() { // Don't call ensureOpen() here (it could affect performance) int n = MaxDoc; if (deletedDocs != null) n -= deletedDocs.Count(); return n; } public override int MaxDoc { get { // Don't call ensureOpen() here (it could affect performance) return si.docCount; } } /// /// public override System.Collections.Generic.ICollection GetFieldNames(IndexReader.FieldOption fieldOption) { EnsureOpen(); System.Collections.Generic.ISet fieldSet = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet(); for (int i = 0; i < core.fieldInfos.Size(); i++) { FieldInfo fi = core.fieldInfos.FieldInfo(i); if (fieldOption == IndexReader.FieldOption.ALL) { fieldSet.Add(fi.name); } else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) { fieldSet.Add(fi.name); } else if (fi.omitTermFreqAndPositions && fieldOption == IndexReader.FieldOption.OMIT_TERM_FREQ_AND_POSITIONS) { fieldSet.Add(fi.name); } else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) { fieldSet.Add(fi.name); } else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED) { fieldSet.Add(fi.name); } else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) { fieldSet.Add(fi.name); } else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR) { fieldSet.Add(fi.name); } else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) { fieldSet.Add(fi.name); } else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) { fieldSet.Add(fi.name); } else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) { fieldSet.Add(fi.name); } else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) { fieldSet.Add(fi.name); } } return fieldSet; } public override bool HasNorms(System.String field) { lock (this) { EnsureOpen(); return norms.ContainsKey(field); } } // can return null if norms aren't stored protected internal virtual byte[] GetNorms(System.String field) { lock (this) { Norm norm = norms[field]; if (norm == null) return null; // not indexed, or norms not stored return norm.Bytes(); } } // returns fake norms if norms aren't available public override byte[] Norms(System.String field) { lock (this) { EnsureOpen(); byte[] bytes = GetNorms(field); return bytes; } } protected internal override void DoSetNorm(int doc, System.String field, byte value_Renamed) { Norm norm = norms[field]; if (norm == null) // not an indexed field return ; normsDirty = true; norm.CopyOnWrite()[doc] = value_Renamed; // set the value } /// Read norms into a pre-allocated array. public override void Norms(System.String field, byte[] bytes, int offset) { lock (this) { EnsureOpen(); Norm norm = norms[field]; if (norm == null) { for (int i = offset; i < bytes.Length; i++) { bytes[i] = (byte) DefaultSimilarity.EncodeNorm(1.0f); } return ; } norm.Bytes(bytes, offset, MaxDoc); } } private void OpenNorms(Directory cfsDir, int readBufferSize) { long nextNormSeek = SegmentMerger.NORMS_HEADER.Length; //skip header (header unused for now) int maxDoc = MaxDoc; for (int i = 0; i < core.fieldInfos.Size(); i++) { FieldInfo fi = core.fieldInfos.FieldInfo(i); if (norms.ContainsKey(fi.name)) { // in case this SegmentReader is being re-opened, we might be able to // reuse some norm instances and skip loading them here continue; } if (fi.isIndexed && !fi.omitNorms) { Directory d = Directory(); System.String fileName = si.GetNormFileName(fi.number); if (!si.HasSeparateNorms(fi.number)) { d = cfsDir; } // singleNormFile means multiple norms share this file bool singleNormFile = fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION); IndexInput normInput = null; long normSeek; if (singleNormFile) { normSeek = nextNormSeek; if (singleNormStream == null) { singleNormStream = d.OpenInput(fileName, readBufferSize); singleNormRef = new Ref(); } else { singleNormRef.IncRef(); } // All norms in the .nrm file can share a single IndexInput since // they are only used in a synchronized context. // If this were to change in the future, a clone could be done here. normInput = singleNormStream; } else { normSeek = 0; normInput = d.OpenInput(fileName); } norms[fi.name] = new Norm(this, normInput, fi.number, normSeek); nextNormSeek += maxDoc; // increment also if some norms are separate } } } public /*internal*/ virtual bool TermsIndexLoaded() { return core.TermsIndexIsLoaded(); } // NOTE: only called from IndexWriter when a near // real-time reader is opened, or applyDeletes is run, // sharing a segment that's still being merged. This // method is not thread safe, and relies on the // synchronization in IndexWriter internal virtual void LoadTermsIndex(int termsIndexDivisor) { core.LoadTermsIndex(si, termsIndexDivisor); } // for testing only public /*internal*/ virtual bool NormsClosed() { if (singleNormStream != null) { return false; } return norms.Values.All(norm => norm.refCount <= 0); } // for testing only public /*internal*/ virtual bool NormsClosed(System.String field) { return norms[field].refCount == 0; } /// Create a clone from the initial TermVectorsReader and store it in the ThreadLocal. /// TermVectorsReader /// internal virtual TermVectorsReader GetTermVectorsReader() { TermVectorsReader tvReader = termVectorsLocal.Get(); if (tvReader == null) { TermVectorsReader orig = core.GetTermVectorsReaderOrig(); if (orig == null) { return null; } else { try { tvReader = (TermVectorsReader) orig.Clone(); } catch (System.Exception) { return null; } } termVectorsLocal.Set(tvReader); } return tvReader; } internal virtual TermVectorsReader GetTermVectorsReaderOrig() { return core.GetTermVectorsReaderOrig(); } /// Return a term frequency vector for the specified document and field. The /// vector returned contains term numbers and frequencies for all terms in /// the specified field of this document, if the field had storeTermVector /// flag set. If the flag was not set, the method returns null. /// /// IOException public override ITermFreqVector GetTermFreqVector(int docNumber, System.String field) { // Check if this field is invalid or has no stored term vector EnsureOpen(); FieldInfo fi = core.fieldInfos.FieldInfo(field); if (fi == null || !fi.storeTermVector) return null; TermVectorsReader termVectorsReader = GetTermVectorsReader(); if (termVectorsReader == null) return null; return termVectorsReader.Get(docNumber, field); } public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper) { EnsureOpen(); FieldInfo fi = core.fieldInfos.FieldInfo(field); if (fi == null || !fi.storeTermVector) return; TermVectorsReader termVectorsReader = GetTermVectorsReader(); if (termVectorsReader == null) { return; } termVectorsReader.Get(docNumber, field, mapper); } public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper) { EnsureOpen(); TermVectorsReader termVectorsReader = GetTermVectorsReader(); if (termVectorsReader == null) return ; termVectorsReader.Get(docNumber, mapper); } /// Return an array of term frequency vectors for the specified document. /// The array contains a vector for each vectorized field in the document. /// Each vector vector contains term numbers and frequencies for all terms /// in a given vectorized field. /// If no such fields existed, the method returns null. /// /// IOException public override ITermFreqVector[] GetTermFreqVectors(int docNumber) { EnsureOpen(); TermVectorsReader termVectorsReader = GetTermVectorsReader(); if (termVectorsReader == null) return null; return termVectorsReader.Get(docNumber); } /// Return the name of the segment this reader is reading. public virtual string SegmentName { get { return core.segment; } } /// Return the SegmentInfo of the segment this reader is reading. internal virtual SegmentInfo SegmentInfo { get { return si; } set { si = value; } } internal virtual void StartCommit() { rollbackSegmentInfo = (SegmentInfo)si.Clone(); rollbackHasChanges = hasChanges; rollbackDeletedDocsDirty = deletedDocsDirty; rollbackNormsDirty = normsDirty; rollbackPendingDeleteCount = pendingDeleteCount; foreach(Norm norm in norms.Values) { norm.rollbackDirty = norm.dirty; } } internal virtual void RollbackCommit() { si.Reset(rollbackSegmentInfo); hasChanges = rollbackHasChanges; deletedDocsDirty = rollbackDeletedDocsDirty; normsDirty = rollbackNormsDirty; pendingDeleteCount = rollbackPendingDeleteCount; foreach(Norm norm in norms.Values) { norm.dirty = norm.rollbackDirty; } } /// Returns the directory this index resides in. public override Directory Directory() { // Don't ensureOpen here -- in certain cases, when a // cloned/reopened reader needs to commit, it may call // this method on the closed original reader return core.dir; } // This is necessary so that cloned SegmentReaders (which // share the underlying postings data) will map to the // same entry in the FieldCache. See LUCENE-1579. public override object FieldCacheKey { get { return core.freqStream; } } public override object DeletesCacheKey { get { return deletedDocs; } } public override long UniqueTermCount { get { return core.GetTermsReader().Size(); } } /// Lotsa tests did hacks like:
/// SegmentReader reader = (SegmentReader) IndexReader.open(dir);
/// They broke. This method serves as a hack to keep hacks working /// We do it with R/W access for the tests (BW compatibility) ///
[Obsolete("Remove this when tests are fixed!")] public /*internal*/ static SegmentReader GetOnlySegmentReader(Directory dir) { return GetOnlySegmentReader(IndexReader.Open(dir,false)); } public /*internal*/ static SegmentReader GetOnlySegmentReader(IndexReader reader) { var onlySegmentReader = reader as SegmentReader; if (onlySegmentReader != null) return onlySegmentReader; if (reader is DirectoryReader) { IndexReader[] subReaders = reader.GetSequentialSubReaders(); if (subReaders.Length != 1) { throw new System.ArgumentException(reader + " has " + subReaders.Length + " segments instead of exactly one"); } return (SegmentReader) subReaders[0]; } throw new System.ArgumentException(reader + " is not a SegmentReader or a single-segment DirectoryReader"); } public override int TermInfosIndexDivisor { get { return core.termsIndexDivisor; } } public System.Collections.Generic.IDictionary norms_ForNUnit { get { return norms; } } public BitVector deletedDocs_ForNUnit { get { return deletedDocs; } } public CoreReaders core_ForNUnit { get { return core; } } public Ref deletedDocsRef_ForNUnit { get { return deletedDocsRef; } } } }