/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using System.IO; using Lucene.Net.Support; using Analyzer = Lucene.Net.Analysis.Analyzer; using Document = Lucene.Net.Documents.Document; using IndexingChain = Lucene.Net.Index.DocumentsWriter.IndexingChain; using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException; using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput; using Directory = Lucene.Net.Store.Directory; using Lock = Lucene.Net.Store.Lock; using LockObtainFailedException = Lucene.Net.Store.LockObtainFailedException; using Constants = Lucene.Net.Util.Constants; using Query = Lucene.Net.Search.Query; using Similarity = Lucene.Net.Search.Similarity; namespace Lucene.Net.Index { /// An IndexWriter creates and maintains an index. ///

The create argument to the /// constructor determines /// whether a new index is created, or whether an existing index is /// opened. Note that you can open an index with create=true /// even while readers are using the index. The old readers will /// continue to search the "point in time" snapshot they had opened, /// and won't see the newly created index until they re-open. There are /// also constructors /// with no create argument which will create a new index /// if there is not already an index at the provided path and otherwise /// open the existing index.

///

In either case, documents are added with /// and removed with or /// . A document can be updated with /// (which just deletes /// and then adds the entire document). When finished adding, deleting /// and updating documents, should be called.

/// ///

These changes are buffered in memory and periodically /// flushed to the (during the above method /// calls). A flush is triggered when there are enough /// buffered deletes (see ) /// or enough added documents since the last flush, whichever /// is sooner. For the added documents, flushing is triggered /// either by RAM usage of the documents (see /// ) or the number of added documents. /// The default is to flush when RAM usage hits 16 MB. For /// best indexing speed you should flush by RAM usage with a /// large RAM buffer. Note that flushing just moves the /// internal buffered state in IndexWriter into the index, but /// these changes are not visible to IndexReader until either /// or is called. A flush may /// also trigger one or more segment merges which by default /// run with a background thread so as not to block the /// addDocument calls (see below /// for changing the ). ///

/// If an index will not have more documents added for a while and optimal search /// performance is desired, then either the full /// method or partial method should be /// called before the index is closed. ///

/// Opening an IndexWriter creates a lock file for the directory in use. Trying to open /// another IndexWriter on the same directory will lead to a /// . The /// is also thrown if an IndexReader on the same directory is used to delete documents /// from the index.

///

/// ///

Expert: IndexWriter allows an optional /// implementation to be /// specified. You can use this to control when prior commits /// are deleted from the index. The default policy is /// which removes all prior /// commits as soon as a new commit is done (this matches /// behavior before 2.2). Creating your own policy can allow /// you to explicitly keep previous "point in time" commits /// alive in the index for some time, to allow readers to /// refresh to the new commit without having the old commit /// deleted out from under them. This is necessary on /// filesystems like NFS that do not support "delete on last /// close" semantics, which Lucene's "point in time" search /// normally relies on.

///

Expert: /// IndexWriter allows you to separately change /// the and the . /// The is invoked whenever there are /// changes to the segments in the index. Its role is to /// select which merges to do, if any, and return a /// describing the merges. It /// also selects merges to do for optimize(). (The default is /// . Then, the /// is invoked with the requested merges and /// it decides when and how to run the merges. The default is /// .

///

NOTE: if you hit an /// OutOfMemoryError then IndexWriter will quietly record this /// fact and block all future segment commits. This is a /// defensive measure in case any internal state (buffered /// documents and deletions) were corrupted. Any subsequent /// calls to will throw an /// IllegalStateException. The only course of action is to /// call , which internally will call ///, to undo any changes to the index since the /// last commit. You can also just call /// directly.

///

NOTE: /// instances are completely thread /// safe, meaning multiple threads can call any of its /// methods, concurrently. If your application requires /// external synchronization, you should not /// synchronize on the IndexWriter instance as /// this may cause deadlock; use your own (non-Lucene) objects /// instead.

/// NOTE: if you call /// Thread.Interrupt() on a thread that's within /// IndexWriter, IndexWriter will try to catch this (eg, if /// it's in a Wait() or Thread.Sleep()), and will then throw /// the unchecked exception /// and clear the interrupt status on the thread

///

/* * Clarification: Check Points (and commits) * IndexWriter writes new index files to the directory without writing a new segments_N * file which references these new files. It also means that the state of * the in memory SegmentInfos object is different than the most recent * segments_N file written to the directory. * * Each time the SegmentInfos is changed, and matches the (possibly * modified) directory files, we have a new "check point". * If the modified/new SegmentInfos is written to disk - as a new * (generation of) segments_N file - this check point is also an * IndexCommit. * * A new checkpoint always replaces the previous checkpoint and * becomes the new "front" of the index. This allows the IndexFileDeleter * to delete files that are referenced only by stale checkpoints. * (files that were created since the last commit, but are no longer * referenced by the "front" of the index). For this, IndexFileDeleter * keeps track of the last non commit checkpoint. */ public class IndexWriter : System.IDisposable { private void InitBlock() { similarity = Search.Similarity.Default; mergePolicy = new LogByteSizeMergePolicy(this); readerPool = new ReaderPool(this); } /// Default value for the write lock timeout (1,000). /// /// public static long WRITE_LOCK_TIMEOUT = 1000; private long writeLockTimeout = WRITE_LOCK_TIMEOUT; /// Name of the write lock in the index. public const System.String WRITE_LOCK_NAME = "write.lock"; /// Value to denote a flush trigger is disabled public const int DISABLE_AUTO_FLUSH = - 1; /// Disabled by default (because IndexWriter flushes by RAM usage /// by default). Change using . /// public static readonly int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH; /// Default value is 16 MB (which means flush when buffered /// docs consume 16 MB RAM). Change using . /// public const double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0; /// Disabled by default (because IndexWriter flushes by RAM usage /// by default). Change using . /// public static readonly int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH; /// Default value is 10,000. Change using . public const int DEFAULT_MAX_FIELD_LENGTH = 10000; /// Default value is 128. Change using . public const int DEFAULT_TERM_INDEX_INTERVAL = 128; /// Absolute hard maximum length for a term. If a term /// arrives from the analyzer longer than this length, it /// is skipped and a message is printed to infoStream, if /// set (see ). /// public static readonly int MAX_TERM_LENGTH; // The normal read buffer size defaults to 1024, but // increasing this during merging seems to yield // performance gains. However we don't want to increase // it too much because there are quite a few // BufferedIndexInputs created during merging. See // LUCENE-888 for details. private const int MERGE_READ_BUFFER_SIZE = 4096; // Used for printing messages private static System.Object MESSAGE_ID_LOCK = new System.Object(); private static int MESSAGE_ID = 0; private int messageID = - 1; private volatile bool hitOOM; private Directory directory; // where this index resides private Analyzer analyzer; // how to analyze text private Similarity similarity; // how to normalize private volatile uint changeCount; // increments every time a change is completed private long lastCommitChangeCount; // last changeCount that was committed private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails private HashMap rollbackSegments; internal volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit()) internal volatile uint pendingCommitChangeCount; private SegmentInfos localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails private int localFlushedDocCount; // saved docWriter.getFlushedDocCount during local transaction private SegmentInfos segmentInfos = new SegmentInfos(); // the segments private int optimizeMaxNumSegments; private DocumentsWriter docWriter; private IndexFileDeleter deleter; private ISet segmentsToOptimize = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet(); // used by optimize to note those needing optimization private Lock writeLock; private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; private bool closed; private bool closing; // Holds all SegmentInfo instances currently involved in // merges private HashSet mergingSegments = new HashSet(); private MergePolicy mergePolicy; private MergeScheduler mergeScheduler = new ConcurrentMergeScheduler(); private LinkedList pendingMerges = new LinkedList(); private ISet runningMerges = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet(); private IList mergeExceptions = new List(); private long mergeGen; private bool stopMerges; private int flushCount; private int flushDeletesCount; // Used to only allow one addIndexes to proceed at once // TODO: use ReadWriteLock once we are on 5.0 private int readCount; // count of how many threads are holding read lock private ThreadClass writeThread; // non-null if any thread holds write lock internal ReaderPool readerPool; private int upgradeCount; private int readerTermsIndexDivisor = IndexReader.DEFAULT_TERMS_INDEX_DIVISOR; // This is a "write once" variable (like the organic dye // on a DVD-R that may or may not be heated by a laser and // then cooled to permanently record the event): it's // false, until getReader() is called for the first time, // at which point it's switched to true and never changes // back to false. Once this is true, we hold open and // reuse SegmentReader instances internally for applying // deletes, doing merges, and reopening near real-time // readers. private volatile bool poolReaders; /// Expert: returns a readonly reader, covering all committed as well as /// un-committed changes to the index. This provides "near real-time" /// searching, in that changes made during an IndexWriter session can be /// quickly made available for searching without closing the writer nor /// calling . /// ///

/// Note that this is functionally equivalent to calling {#commit} and then /// using to open a new reader. But the turarnound /// time of this method should be faster since it avoids the potentially /// costly . ///

/// /// You must close the returned by this method once you are done using it. /// ///

/// It's near real-time because there is no hard /// guarantee on how quickly you can get a new reader after /// making changes with IndexWriter. You'll have to /// experiment in your situation to determine if it's /// faster enough. As this is a new and experimental /// feature, please report back on your findings so we can /// learn, improve and iterate.

/// ///

The resulting reader suppports ///, but that call will simply forward /// back to this method (though this may change in the /// future).

/// ///

The very first time this method is called, this /// writer instance will make every effort to pool the /// readers that it opens for doing merges, applying /// deletes, etc. This means additional resources (RAM, /// file descriptors, CPU time) will be consumed.

/// ///

For lower latency on reopening a reader, you should call /// to call to /// pre-warm a newly merged segment before it's committed /// to the index. This is important for minimizing index-to-search /// delay after a large merge. /// ///

If an addIndexes* call is running in another thread, /// then this reader will only search those segments from /// the foreign index that have been successfully copied /// over, so far

. /// ///

NOTE: Once the writer is closed, any /// outstanding readers may continue to be used. However, /// if you attempt to reopen any of those readers, you'll /// hit an .

/// ///

NOTE: This API is experimental and might /// change in incompatible ways in the next release.

/// ///

/// IndexReader that covers entire index plus all /// changes made so far by this IndexWriter instance /// /// /// IOException public virtual IndexReader GetReader() { return GetReader(readerTermsIndexDivisor); } /// Expert: like , except you can /// specify which termInfosIndexDivisor should be used for /// any newly opened readers. /// /// Subsambles which indexed /// terms are loaded into RAM. This has the same effect as /// except that setting /// must be done at indexing time while this setting can be /// set per reader. When set to N, then one in every /// N*termIndexInterval terms in the index is loaded into /// memory. By setting this to a value > 1 you can reduce /// memory usage, at the expense of higher latency when /// loading a TermInfo. The default value is 1. Set this /// to -1 to skip loading the terms index entirely. /// public virtual IndexReader GetReader(int termInfosIndexDivisor) { EnsureOpen(); if (infoStream != null) { Message("flush at getReader"); } // Do this up front before flushing so that the readers // obtained during this flush are pooled, the first time // this method is called: poolReaders = true; // Prevent segmentInfos from changing while opening the // reader; in theory we could do similar retry logic, // just like we do when loading segments_N IndexReader r; lock (this) { Flush(false, true, true); r = new ReadOnlyDirectoryReader(this, segmentInfos, termInfosIndexDivisor); } MaybeMerge(); return r; } /// Holds shared SegmentReader instances. IndexWriter uses /// SegmentReaders for 1) applying deletes, 2) doing /// merges, 3) handing out a real-time reader. This pool /// reuses instances of the SegmentReaders in all these /// places if it is in "near real-time mode" (getReader() /// has been called on this instance). /// internal class ReaderPool : IDisposable { public ReaderPool(IndexWriter enclosingInstance) { InitBlock(enclosingInstance); } private void InitBlock(IndexWriter enclosingInstance) { this.enclosingInstance = enclosingInstance; } private IndexWriter enclosingInstance; public IndexWriter Enclosing_Instance { get { return enclosingInstance; } } private IDictionary readerMap = new HashMap(); /// Forcefully clear changes for the specifed segments, /// and remove from the pool. This is called on succesful merge. /// internal virtual void Clear(SegmentInfos infos) { lock (this) { if (infos == null) { foreach(KeyValuePair ent in readerMap) { ent.Value.hasChanges = false; } } else { foreach(SegmentInfo info in infos) { if (readerMap.ContainsKey(info)) { readerMap[info].hasChanges = false; } } } } } // used only by asserts public virtual bool InfoIsLive(SegmentInfo info) { lock (this) { int idx = Enclosing_Instance.segmentInfos.IndexOf(info); System.Diagnostics.Debug.Assert(idx != -1); System.Diagnostics.Debug.Assert(Enclosing_Instance.segmentInfos[idx] == info); return true; } } public virtual SegmentInfo MapToLive(SegmentInfo info) { lock (this) { int idx = Enclosing_Instance.segmentInfos.IndexOf(info); if (idx != - 1) { info = Enclosing_Instance.segmentInfos[idx]; } return info; } } /// Release the segment reader (i.e. decRef it and close if there /// are no more references. /// /// /// /// IOException public virtual void Release(SegmentReader sr) { lock (this) { Release(sr, false); } } /// Release the segment reader (i.e. decRef it and close if there /// are no more references. /// /// /// /// /// IOException public virtual void Release(SegmentReader sr, bool drop) { lock (this) { bool pooled = readerMap.ContainsKey(sr.SegmentInfo); System.Diagnostics.Debug.Assert(!pooled || readerMap[sr.SegmentInfo] == sr); // Drop caller's ref; for an external reader (not // pooled), this decRef will close it sr.DecRef(); if (pooled && (drop || (!Enclosing_Instance.poolReaders && sr.RefCount == 1))) { // We invoke deleter.checkpoint below, so we must be // sync'd on IW if there are changes: // TODO: Java 1.5 has this, .NET can't. // System.Diagnostics.Debug.Assert(!sr.hasChanges || Thread.holdsLock(enclosingInstance)); // Discard (don't save) changes when we are dropping // the reader; this is used only on the sub-readers // after a successful merge. sr.hasChanges &= !drop; bool hasChanges = sr.hasChanges; // Drop our ref -- this will commit any pending // changes to the dir sr.Close(); // We are the last ref to this reader; since we're // not pooling readers, we release it: readerMap.Remove(sr.SegmentInfo); if (hasChanges) { // Must checkpoint w/ deleter, because this // segment reader will have created new _X_N.del // file. enclosingInstance.deleter.Checkpoint(enclosingInstance.segmentInfos, false); } } } } /// Remove all our references to readers, and commits /// any pending changes. /// public void Dispose() { Dispose(true); } protected void Dispose(bool disposing) { if (disposing) { // We invoke deleter.checkpoint below, so we must be // sync'd on IW: // TODO: assert Thread.holdsLock(IndexWriter.this); // TODO: Should this class have bool _isDisposed? lock (this) { //var toRemove = new List(); foreach (var ent in readerMap) { SegmentReader sr = ent.Value; if (sr.hasChanges) { System.Diagnostics.Debug.Assert(InfoIsLive(sr.SegmentInfo)); sr.DoCommit(null); // Must checkpoint w/ deleter, because this // segment reader will have created new _X_N.del // file. enclosingInstance.deleter.Checkpoint(enclosingInstance.segmentInfos, false); } //toRemove.Add(ent.Key); // NOTE: it is allowed that this decRef does not // actually close the SR; this can happen when a // near real-time reader is kept open after the // IndexWriter instance is closed sr.DecRef(); } //foreach (var key in toRemove) // readerMap.Remove(key); readerMap.Clear(); } } } /// Commit all segment reader in the pool. /// IOException internal virtual void Commit() { // We invoke deleter.checkpoint below, so we must be // sync'd on IW: // TODO: assert Thread.holdsLock(IndexWriter.this); lock (this) { foreach(KeyValuePair ent in readerMap) { SegmentReader sr = ent.Value; if (sr.hasChanges) { System.Diagnostics.Debug.Assert(InfoIsLive(sr.SegmentInfo)); sr.DoCommit(null); // Must checkpoint w/ deleter, because this // segment reader will have created new _X_N.del // file. enclosingInstance.deleter.Checkpoint(enclosingInstance.segmentInfos, false); } } } } /// Returns a ref to a clone. NOTE: this clone is not /// enrolled in the pool, so you should simply close() /// it when you're done (ie, do not call release()). /// public virtual SegmentReader GetReadOnlyClone(SegmentInfo info, bool doOpenStores, int termInfosIndexDivisor) { lock (this) { SegmentReader sr = Get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, termInfosIndexDivisor); try { return (SegmentReader) sr.Clone(true); } finally { sr.DecRef(); } } } /// Obtain a SegmentReader from the readerPool. The reader /// must be returned by calling /// /// /// /// /// /// /// /// IOException public virtual SegmentReader Get(SegmentInfo info, bool doOpenStores) { lock (this) { return Get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, enclosingInstance.readerTermsIndexDivisor); } } /// Obtain a SegmentReader from the readerPool. The reader /// must be returned by calling /// /// /// /// /// /// /// /// /// /// /// /// /// IOException public virtual SegmentReader Get(SegmentInfo info, bool doOpenStores, int readBufferSize, int termsIndexDivisor) { lock (this) { if (Enclosing_Instance.poolReaders) { readBufferSize = BufferedIndexInput.BUFFER_SIZE; } SegmentReader sr = readerMap[info]; if (sr == null) { // TODO: we may want to avoid doing this while // synchronized // Returns a ref, which we xfer to readerMap: sr = SegmentReader.Get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor); if (info.dir == enclosingInstance.directory) { // Only pool if reader is not external readerMap[info]=sr; } } else { if (doOpenStores) { sr.OpenDocStores(); } if (termsIndexDivisor != - 1 && !sr.TermsIndexLoaded()) { // If this reader was originally opened because we // needed to merge it, we didn't load the terms // index. But now, if the caller wants the terms // index (eg because it's doing deletes, or an NRT // reader is being opened) we ask the reader to // load its terms index. sr.LoadTermsIndex(termsIndexDivisor); } } // Return a ref to our caller if (info.dir == enclosingInstance.directory) { // Only incRef if we pooled (reader is not external) sr.IncRef(); } return sr; } } // Returns a ref public virtual SegmentReader GetIfExists(SegmentInfo info) { lock (this) { SegmentReader sr = readerMap[info]; if (sr != null) { sr.IncRef(); } return sr; } } } /// Obtain the number of deleted docs for a pooled reader. /// If the reader isn't being pooled, the segmentInfo's /// delCount is returned. /// public virtual int NumDeletedDocs(SegmentInfo info) { SegmentReader reader = readerPool.GetIfExists(info); try { if (reader != null) { return reader.NumDeletedDocs; } else { return info.GetDelCount(); } } finally { if (reader != null) { readerPool.Release(reader); } } } internal virtual void AcquireWrite() { lock (this) { System.Diagnostics.Debug.Assert(writeThread != ThreadClass.Current()); while (writeThread != null || readCount > 0) DoWait(); // We could have been closed while we were waiting: EnsureOpen(); writeThread = ThreadClass.Current(); } } internal virtual void ReleaseWrite() { lock (this) { System.Diagnostics.Debug.Assert(ThreadClass.Current() == writeThread); writeThread = null; System.Threading.Monitor.PulseAll(this); } } internal virtual void AcquireRead() { lock (this) { ThreadClass current = ThreadClass.Current(); while (writeThread != null && writeThread != current) DoWait(); readCount++; } } // Allows one readLock to upgrade to a writeLock even if // there are other readLocks as long as all other // readLocks are also blocked in this method: internal virtual void UpgradeReadToWrite() { lock (this) { System.Diagnostics.Debug.Assert(readCount > 0); upgradeCount++; while (readCount > upgradeCount || writeThread != null) { DoWait(); } writeThread = ThreadClass.Current(); readCount--; upgradeCount--; } } internal virtual void ReleaseRead() { lock (this) { readCount--; System.Diagnostics.Debug.Assert(readCount >= 0); System.Threading.Monitor.PulseAll(this); } } internal bool IsOpen(bool includePendingClose) { lock (this) { return !(closed || (includePendingClose && closing)); } } /// Used internally to throw an /// if this IndexWriter has been /// closed. /// /// AlreadyClosedException if this IndexWriter is protected internal void EnsureOpen(bool includePendingClose) { lock (this) { if (!IsOpen(includePendingClose)) { throw new AlreadyClosedException("this IndexWriter is closed"); } } } protected internal void EnsureOpen() { lock (this) { EnsureOpen(true); } } /// Prints a message to the infoStream (if non-null), /// prefixed with the identifying information for this /// writer and the thread that's calling it. /// public virtual void Message(System.String message) { if (infoStream != null) infoStream.WriteLine("IW " + messageID + " [" + DateTime.Now.ToString() + "; " + ThreadClass.Current().Name + "]: " + message); } private void SetMessageID(System.IO.StreamWriter infoStream) { lock (this) { if (infoStream != null && messageID == - 1) { lock (MESSAGE_ID_LOCK) { messageID = MESSAGE_ID++; } } this.infoStream = infoStream; } } /// Casts current mergePolicy to LogMergePolicy, and throws /// an exception if the mergePolicy is not a LogMergePolicy. /// private LogMergePolicy LogMergePolicy { get { if (mergePolicy is LogMergePolicy) return (LogMergePolicy) mergePolicy; throw new System.ArgumentException( "this method can only be called when the merge policy is the default LogMergePolicy"); } } ///

Gets or sets the current setting of whether newly flushed /// segments will use the compound file format. Note that /// this just returns the value previously set with /// setUseCompoundFile(boolean), or the default value /// (true). You cannot use this to query the status of /// previously flushed segments.

/// ///

Note that this method is a convenience method: it /// just calls mergePolicy.getUseCompoundFile as long as /// mergePolicy is an instance of . /// Otherwise an IllegalArgumentException is thrown.

/// ///

public virtual bool UseCompoundFile { get { return LogMergePolicy.GetUseCompoundFile(); } set { LogMergePolicy.SetUseCompoundFile(value); LogMergePolicy.SetUseCompoundDocStore(value); } } /// Expert: Set the Similarity implementation used by this IndexWriter. /// public virtual void SetSimilarity(Similarity similarity) { EnsureOpen(); this.similarity = similarity; docWriter.SetSimilarity(similarity); } /// Expert: Return the Similarity implementation used by this IndexWriter. /// ///

This defaults to the current value of . ///

public virtual Similarity Similarity { get { EnsureOpen(); return this.similarity; } } /// Expert: Gets or sets the interval between indexed terms. Large values cause less /// memory to be used by IndexReader, but slow random-access to terms. Small /// values cause more memory to be used by an IndexReader, and speed /// random-access to terms. /// /// This parameter determines the amount of computation required per query /// term, regardless of the number of documents that contain that term. In /// particular, it is the maximum number of other terms that must be /// scanned before a term is located and its frequency and position information /// may be processed. In a large index with user-entered query terms, query /// processing time is likely to be dominated not by term lookup but rather /// by the processing of frequency and positional data. In a small index /// or when many uncommon query terms are generated (e.g., by wildcard /// queries) term lookup may become a dominant cost. /// /// In particular, numUniqueTerms/interval terms are read into /// memory by an IndexReader, and, on average, interval/2 terms /// must be scanned for each random term access. /// /// /// /// public virtual int TermIndexInterval { get { // We pass false because this method is called by SegmentMerger while we are in the process of closing EnsureOpen(false); return termIndexInterval; } set { EnsureOpen(); this.termIndexInterval = value; } } /// Constructs an IndexWriter for the index in d. /// Text will be analyzed with a. If create /// is true, then a new, empty index will be created in /// d, replacing the index already there, if any. /// /// /// the index directory /// /// the analyzer to use /// /// true to create the index or overwrite /// the existing one; false to append to the existing /// index /// /// Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified /// via the MaxFieldLength constructor. /// /// CorruptIndexException if the index is corrupt /// LockObtainFailedException if another writer /// has this index open (write.lock could not /// be obtained) /// /// IOException if the directory cannot be read/written to, or /// if it does not exist and create is /// false or if there is any other low-level /// IO error /// public IndexWriter(Directory d, Analyzer a, bool create, MaxFieldLength mfl) { InitBlock(); Init(d, a, create, null, mfl.Limit, null, null); } /// Constructs an IndexWriter for the index in /// d, first creating it if it does not /// already exist. /// /// /// the index directory /// /// the analyzer to use /// /// Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified /// via the MaxFieldLength constructor. /// /// CorruptIndexException if the index is corrupt /// LockObtainFailedException if another writer /// has this index open (write.lock could not /// be obtained) /// /// IOException if the directory cannot be /// read/written to or if there is any other low-level /// IO error /// public IndexWriter(Directory d, Analyzer a, MaxFieldLength mfl) { InitBlock(); Init(d, a, null, mfl.Limit, null, null); } /// Expert: constructs an IndexWriter with a custom ///, for the index in d, /// first creating it if it does not already exist. Text /// will be analyzed with a. /// /// /// the index directory /// /// the analyzer to use /// /// see above /// /// whether or not to limit field lengths /// /// CorruptIndexException if the index is corrupt /// LockObtainFailedException if another writer /// has this index open (write.lock could not /// be obtained) /// /// IOException if the directory cannot be /// read/written to or if there is any other low-level /// IO error /// public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl) { InitBlock(); Init(d, a, deletionPolicy, mfl.Limit, null, null); } /// Expert: constructs an IndexWriter with a custom ///, for the index in d. /// Text will be analyzed with a. If /// create is true, then a new, empty index /// will be created in d, replacing the index /// already there, if any. /// /// /// the index directory /// /// the analyzer to use /// /// true to create the index or overwrite /// the existing one; false to append to the existing /// index /// /// see above /// /// , whether or not to limit field lengths. Value is in number of terms/tokens /// /// CorruptIndexException if the index is corrupt /// LockObtainFailedException if another writer /// has this index open (write.lock could not /// be obtained) /// /// IOException if the directory cannot be read/written to, or /// if it does not exist and create is /// false or if there is any other low-level /// IO error /// public IndexWriter(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl) { InitBlock(); Init(d, a, create, deletionPolicy, mfl.Limit, null, null); } /// Expert: constructs an IndexWriter with a custom /// and , /// for the index in d. /// Text will be analyzed with a. If /// create is true, then a new, empty index /// will be created in d, replacing the index /// already there, if any. /// /// /// the index directory /// /// the analyzer to use /// /// true to create the index or overwrite /// the existing one; false to append to the existing /// index /// /// see above /// /// whether or not to limit field lengths, value is in number of terms/tokens. See . /// /// the chain to be used to /// process documents /// /// which commit to open /// /// CorruptIndexException if the index is corrupt /// LockObtainFailedException if another writer /// has this index open (write.lock could not /// be obtained) /// /// IOException if the directory cannot be read/written to, or /// if it does not exist and create is /// false or if there is any other low-level /// IO error /// internal IndexWriter(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexingChain indexingChain, IndexCommit commit) { InitBlock(); Init(d, a, create, deletionPolicy, mfl.Limit, indexingChain, commit); } /// Expert: constructs an IndexWriter on specific commit /// point, with a custom , for /// the index in d. Text will be analyzed /// with a. /// ///

This is only meaningful if you've used a /// in that past that keeps more than /// just the last commit. /// ///

This operation is similar to , /// except that method can only rollback what's been done /// with the current instance of IndexWriter since its last /// commit, whereas this method can rollback to an /// arbitrary commit point from the past, assuming the /// has preserved past /// commits. /// ///

/// the index directory /// /// the analyzer to use /// /// see above /// /// whether or not to limit field lengths, value is in number of terms/tokens. See . /// /// which commit to open /// /// CorruptIndexException if the index is corrupt /// LockObtainFailedException if another writer /// has this index open (write.lock could not /// be obtained) /// /// IOException if the directory cannot be read/written to, or /// if it does not exist and create is /// false or if there is any other low-level /// IO error /// public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexCommit commit) { InitBlock(); Init(d, a, false, deletionPolicy, mfl.Limit, null, commit); } private void Init(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, int maxFieldLength, IndexingChain indexingChain, IndexCommit commit) { if (IndexReader.IndexExists(d)) { Init(d, a, false, deletionPolicy, maxFieldLength, indexingChain, commit); } else { Init(d, a, true, deletionPolicy, maxFieldLength, indexingChain, commit); } } private void Init(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, int maxFieldLength, IndexingChain indexingChain, IndexCommit commit) { directory = d; analyzer = a; SetMessageID(defaultInfoStream); this.maxFieldLength = maxFieldLength; if (indexingChain == null) indexingChain = DocumentsWriter.DefaultIndexingChain; if (create) { // Clear the write lock in case it's leftover: directory.ClearLock(WRITE_LOCK_NAME); } Lock writeLock = directory.MakeLock(WRITE_LOCK_NAME); if (!writeLock.Obtain(writeLockTimeout)) // obtain write lock { throw new LockObtainFailedException("Index locked for write: " + writeLock); } this.writeLock = writeLock; // save it bool success = false; try { if (create) { // Try to read first. This is to allow create // against an index that's currently open for // searching. In this case we write the next // segments_N file with no segments: bool doCommit; try { segmentInfos.Read(directory); segmentInfos.Clear(); doCommit = false; } catch (System.IO.IOException) { // Likely this means it's a fresh directory doCommit = true; } if (doCommit) { // Only commit if there is no segments file // in this dir already. segmentInfos.Commit(directory); synced.UnionWith(segmentInfos.Files(directory, true)); } else { // Record that we have a change (zero out all // segments) pending: changeCount++; } } else { segmentInfos.Read(directory); if (commit != null) { // Swap out all segments, but, keep metadata in // SegmentInfos, like version & generation, to // preserve write-once. This is important if // readers are open against the future commit // points. if (commit.Directory != directory) throw new System.ArgumentException("IndexCommit's directory doesn't match my directory"); SegmentInfos oldInfos = new SegmentInfos(); oldInfos.Read(directory, commit.SegmentsFileName); segmentInfos.Replace(oldInfos); changeCount++; if (infoStream != null) Message("init: loaded commit \"" + commit.SegmentsFileName + "\""); } // We assume that this segments_N was previously // properly sync'd: synced.UnionWith(segmentInfos.Files(directory, true)); } SetRollbackSegmentInfos(segmentInfos); docWriter = new DocumentsWriter(directory, this, indexingChain); docWriter.SetInfoStream(infoStream); docWriter.SetMaxFieldLength(maxFieldLength); // Default deleter (for backwards compatibility) is // KeepOnlyLastCommitDeleter: deleter = new IndexFileDeleter(directory, deletionPolicy == null?new KeepOnlyLastCommitDeletionPolicy():deletionPolicy, segmentInfos, infoStream, docWriter, synced); if (deleter.startingCommitDeleted) // Deletion policy deleted the "head" commit point. // We have to mark ourself as changed so that if we // are closed w/o any further changes we write a new // segments_N file. changeCount++; PushMaxBufferedDocs(); if (infoStream != null) { Message("init: create=" + create); MessageState(); } success = true; } finally { if (!success) { if (infoStream != null) { Message("init: hit exception on init; releasing write lock"); } try { writeLock.Release(); } catch (Exception) { // don't mask the original exception } writeLock = null; } } } private void SetRollbackSegmentInfos(SegmentInfos infos) { lock (this) { rollbackSegmentInfos = (SegmentInfos) infos.Clone(); System.Diagnostics.Debug.Assert(!rollbackSegmentInfos.HasExternalSegments(directory)); rollbackSegments = new HashMap(); int size = rollbackSegmentInfos.Count; for (int i = 0; i < size; i++) rollbackSegments[rollbackSegmentInfos.Info(i)] = i; } } /// Expert: set the merge policy used by this writer. public virtual void SetMergePolicy(MergePolicy mp) { EnsureOpen(); if (mp == null) throw new System.NullReferenceException("MergePolicy must be non-null"); if (mergePolicy != mp) mergePolicy.Close(); mergePolicy = mp; PushMaxBufferedDocs(); if (infoStream != null) { Message("setMergePolicy " + mp); } } /// Expert: returns the current MergePolicy in use by this writer. /// /// public virtual MergePolicy MergePolicy { get { EnsureOpen(); return mergePolicy; } } /// Expert: set the merge scheduler used by this writer. public virtual void SetMergeScheduler(MergeScheduler mergeScheduler) { lock (this) { EnsureOpen(); if (mergeScheduler == null) throw new System.NullReferenceException("MergeScheduler must be non-null"); if (this.mergeScheduler != mergeScheduler) { FinishMerges(true); this.mergeScheduler.Close(); } this.mergeScheduler = mergeScheduler; if (infoStream != null) { Message("setMergeScheduler " + mergeScheduler); } } } /// Expert: returns the current MergePolicy in use by this /// writer. /// /// /// public virtual MergeScheduler MergeScheduler { get { EnsureOpen(); return mergeScheduler; } } ///

Gets or sets the largest segment (measured by document /// count) that may be merged with other segments. ///

/// Small values (e.g., less than 10,000) are best for /// interactive indexing, as this limits the length of /// pauses while indexing to a few seconds. Larger values /// are best for batched indexing and speedier /// searches. ///

/// The default value is . ///

/// Note that this method is a convenience method: it /// just calls mergePolicy.getMaxMergeDocs as long as /// mergePolicy is an instance of . /// Otherwise an IllegalArgumentException is thrown.

/// /// The default merge policy () /// also allows you to set this /// limit by net size (in MB) of the segment, using /// .

///

/// /// public virtual int MaxMergeDocs { get { return LogMergePolicy.MaxMergeDocs; } set { LogMergePolicy.MaxMergeDocs = value; } } /// The maximum number of terms that will be indexed for a single field in a /// document. This limits the amount of memory required for indexing, so that /// collections with very large files will not crash the indexing process by /// running out of memory. This setting refers to the number of running terms, /// not to the number of different terms.

/// Note: this silently truncates large documents, excluding from the /// index all terms that occur further in the document. If you know your source /// documents are large, be sure to set this value high enough to accomodate /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit /// is your memory, but you should anticipate an OutOfMemoryError.

/// By default, no more than terms /// will be indexed for a field. ///

public virtual void SetMaxFieldLength(int maxFieldLength) { EnsureOpen(); this.maxFieldLength = maxFieldLength; docWriter.SetMaxFieldLength(maxFieldLength); if (infoStream != null) Message("setMaxFieldLength " + maxFieldLength); } /// Returns the maximum number of terms that will be /// indexed for a single field in a document. /// /// /// [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] public virtual int GetMaxFieldLength() { EnsureOpen(); return maxFieldLength; } /// Gets or sets the termsIndexDivisor passed to any readers that /// IndexWriter opens, for example when applying deletes /// or creating a near-real-time reader in /// . Default value is /// . public int ReaderTermsIndexDivisor { get { EnsureOpen(); return readerTermsIndexDivisor; } set { EnsureOpen(); if (value <= 0) { throw new ArgumentException("divisor must be >= 1 (got " + value + ")"); } readerTermsIndexDivisor = value; if (infoStream != null) { Message("setReaderTermsIndexDivisor " + readerTermsIndexDivisor); } } } /// Determines the minimal number of documents required /// before the buffered in-memory documents are flushed as /// a new Segment. Large values generally gives faster /// indexing. /// ///

When this is set, the writer will flush every /// maxBufferedDocs added documents. Pass in /// to prevent triggering a flush due /// to number of buffered documents. Note that if flushing /// by RAM usage is also enabled, then the flush will be /// triggered by whichever comes first.

/// ///

Disabled by default (writer flushes by RAM usage).

/// ///

/// IllegalArgumentException if maxBufferedDocs is /// enabled but smaller than 2, or it disables maxBufferedDocs /// when ramBufferSize is already disabled /// /// /// public virtual void SetMaxBufferedDocs(int maxBufferedDocs) { EnsureOpen(); if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2) throw new ArgumentException("maxBufferedDocs must at least be 2 when enabled"); if (maxBufferedDocs == DISABLE_AUTO_FLUSH && (int)GetRAMBufferSizeMB() == DISABLE_AUTO_FLUSH) throw new ArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled"); docWriter.MaxBufferedDocs = maxBufferedDocs; PushMaxBufferedDocs(); if (infoStream != null) Message("setMaxBufferedDocs " + maxBufferedDocs); } /// If we are flushing by doc count (not by RAM usage), and /// using LogDocMergePolicy then push maxBufferedDocs down /// as its minMergeDocs, to keep backwards compatibility. /// private void PushMaxBufferedDocs() { if (docWriter.MaxBufferedDocs != DISABLE_AUTO_FLUSH) { MergePolicy mp = mergePolicy; if (mp is LogDocMergePolicy) { LogDocMergePolicy lmp = (LogDocMergePolicy) mp; int maxBufferedDocs = docWriter.MaxBufferedDocs; if (lmp.MinMergeDocs != maxBufferedDocs) { if (infoStream != null) Message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy"); lmp.MinMergeDocs = maxBufferedDocs; } } } } /// Returns the number of buffered added documents that will /// trigger a flush if enabled. /// /// /// [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] public virtual int GetMaxBufferedDocs() { EnsureOpen(); return docWriter.MaxBufferedDocs; } /// Determines the amount of RAM that may be used for /// buffering added documents and deletions before they are /// flushed to the Directory. Generally for faster /// indexing performance it's best to flush by RAM usage /// instead of document count and use as large a RAM buffer /// as you can. /// ///

When this is set, the writer will flush whenever /// buffered documents and deletions use this much RAM. /// Pass in to prevent /// triggering a flush due to RAM usage. Note that if /// flushing by document count is also enabled, then the /// flush will be triggered by whichever comes first.

/// ///

NOTE: the account of RAM usage for pending /// deletions is only approximate. Specifically, if you /// delete by Query, Lucene currently has no way to measure /// the RAM usage if individual Queries so the accounting /// will under-estimate and you should compensate by either /// calling commit() periodically yourself, or by using /// to flush by count /// instead of RAM usage (each buffered delete Query counts /// as one). /// ///

/// NOTE: because IndexWriter uses ints when managing its /// internal storage, the absolute maximum value for this setting is somewhat /// less than 2048 MB. The precise limit depends on various factors, such as /// how large your documents are, how many fields have norms, etc., so it's /// best to set this value comfortably under 2048. ///

/// ///

The default value is .

/// ///

/// IllegalArgumentException if ramBufferSize is /// enabled but non-positive, or it disables ramBufferSize /// when maxBufferedDocs is already disabled /// public virtual void SetRAMBufferSizeMB(double mb) { if (mb > 2048.0) { throw new System.ArgumentException("ramBufferSize " + mb + " is too large; should be comfortably less than 2048"); } if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0) throw new System.ArgumentException("ramBufferSize should be > 0.0 MB when enabled"); if (mb == DISABLE_AUTO_FLUSH && GetMaxBufferedDocs() == DISABLE_AUTO_FLUSH) throw new System.ArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled"); docWriter.SetRAMBufferSizeMB(mb); if (infoStream != null) Message("setRAMBufferSizeMB " + mb); } /// Returns the value set by if enabled. [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] public virtual double GetRAMBufferSizeMB() { return docWriter.GetRAMBufferSizeMB(); } ///

Determines the minimal number of delete terms required before the buffered /// in-memory delete terms are applied and flushed. If there are documents /// buffered in memory at the time, they are merged and a new segment is /// created.

///

Disabled by default (writer flushes by RAM usage).

/// ///

/// IllegalArgumentException if maxBufferedDeleteTerms /// is enabled but smaller than 1 /// /// /// public virtual void SetMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) { EnsureOpen(); if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH && maxBufferedDeleteTerms < 1) throw new System.ArgumentException("maxBufferedDeleteTerms must at least be 1 when enabled"); docWriter.MaxBufferedDeleteTerms = maxBufferedDeleteTerms; if (infoStream != null) Message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms); } /// Returns the number of buffered deleted terms that will /// trigger a flush if enabled. /// /// /// [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] public virtual int GetMaxBufferedDeleteTerms() { EnsureOpen(); return docWriter.MaxBufferedDeleteTerms; } /// Gets or sets the number of segments that are merged at /// once and also controls the total number of segments /// allowed to accumulate in the index. ///

Determines how often segment indices are merged by addDocument(). With /// smaller values, less RAM is used while indexing, and searches on /// unoptimized indices are faster, but indexing speed is slower. With larger /// values, more RAM is used during indexing, and while searches on unoptimized /// indices are slower, indexing is faster. Thus larger values (> 10) are best /// for batch index creation, and smaller values (< 10) for indices that are /// interactively maintained. /// ///

Note that this method is a convenience method: it /// just calls mergePolicy.setMergeFactor as long as /// mergePolicy is an instance of . /// Otherwise an IllegalArgumentException is thrown.

/// ///

This must never be less than 2. The default value is 10. ///

public virtual int MergeFactor { set { LogMergePolicy.MergeFactor = value; } get { return LogMergePolicy.MergeFactor; } } /// Gets or sets the default info stream. /// If non-null, this will be the default infoStream used /// by a newly instantiated IndexWriter. /// /// /// public static StreamWriter DefaultInfoStream { set { IndexWriter.defaultInfoStream = value; } get { return IndexWriter.defaultInfoStream; } } /// If non-null, information about merges, deletes and a /// message when maxFieldLength is reached will be printed /// to this. /// public virtual void SetInfoStream(System.IO.StreamWriter infoStream) { EnsureOpen(); SetMessageID(infoStream); docWriter.SetInfoStream(infoStream); deleter.SetInfoStream(infoStream); if (infoStream != null) MessageState(); } private void MessageState() { Message("setInfoStream: dir=" + directory + " mergePolicy=" + mergePolicy + " mergeScheduler=" + mergeScheduler + " ramBufferSizeMB=" + docWriter.GetRAMBufferSizeMB() + " maxBufferedDocs=" + docWriter.MaxBufferedDocs + " maxBuffereDeleteTerms=" + docWriter.MaxBufferedDeleteTerms + " maxFieldLength=" + maxFieldLength + " index=" + SegString()); } /// Returns the current infoStream in use by this writer. /// /// public virtual StreamWriter InfoStream { get { EnsureOpen(); return infoStream; } } /// Returns true if verbosing is enabled (i.e., infoStream != null). public virtual bool Verbose { get { return infoStream != null; } } /// Gets or sets allowed timeout when acquiring the write lock. public virtual long WriteLockTimeout { get { EnsureOpen(); return writeLockTimeout; } set { EnsureOpen(); this.writeLockTimeout = value; } } /// Gets or sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in /// milliseconds). /// public static long DefaultWriteLockTimeout { set { IndexWriter.WRITE_LOCK_TIMEOUT = value; } get { return IndexWriter.WRITE_LOCK_TIMEOUT; } } /// Commits all changes to an index and closes all /// associated files. Note that this may be a costly /// operation, so, try to re-use a single writer instead of /// closing and opening a new one. See for /// caveats about write caching done by some IO devices. /// ///

If an Exception is hit during close, eg due to disk /// full or some other reason, then both the on-disk index /// and the internal state of the IndexWriter instance will /// be consistent. However, the close will not be complete /// even though part of it (flushing buffered documents) /// may have succeeded, so the write lock will still be /// held.

/// ///

If you can correct the underlying cause (eg free up /// some disk space) then you can call close() again. /// Failing that, if you want to force the write lock to be /// released (dangerous, because you may then lose buffered /// docs in the IndexWriter instance) then you can do /// something like this:

/// /// /// try { /// writer.close(); /// } finally { /// if (IndexWriter.isLocked(directory)) { /// IndexWriter.unlock(directory); /// } /// } /// /// /// after which, you must be certain not to use the writer /// instance anymore.

/// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer, again. See above for details.

/// ///

/// CorruptIndexException if the index is corrupt /// IOException if there is a low-level IO error [Obsolete("Use Dispose() instead")] public void Close() { Dispose(true); } /// Commits all changes to an index and closes all /// associated files. Note that this may be a costly /// operation, so, try to re-use a single writer instead of /// closing and opening a new one. See for /// caveats about write caching done by some IO devices. /// ///

If an Exception is hit during close, eg due to disk /// full or some other reason, then both the on-disk index /// and the internal state of the IndexWriter instance will /// be consistent. However, the close will not be complete /// even though part of it (flushing buffered documents) /// may have succeeded, so the write lock will still be /// held.

/// ///

If you can correct the underlying cause (eg free up /// some disk space) then you can call close() again. /// Failing that, if you want to force the write lock to be /// released (dangerous, because you may then lose buffered /// docs in the IndexWriter instance) then you can do /// something like this:

/// /// /// try { /// writer.close(); /// } finally { /// if (IndexWriter.isLocked(directory)) { /// IndexWriter.unlock(directory); /// } /// } /// /// /// after which, you must be certain not to use the writer /// instance anymore.

/// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer, again. See above for details.

/// ///

/// CorruptIndexException if the index is corrupt /// IOException if there is a low-level IO error public virtual void Dispose() { Dispose(true); } /// Closes the index with or without waiting for currently /// running merges to finish. This is only meaningful when /// using a MergeScheduler that runs merges in background /// threads. /// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer, again. See above for details.

/// ///

NOTE: it is dangerous to always call /// close(false), especially when IndexWriter is not open /// for very long, because this can result in "merge /// starvation" whereby long merges will never have a /// chance to finish. This will cause too many segments in /// your index over time.

/// ///

/// if true, this call will block /// until all merges complete; else, it will ask all /// running merges to abort, wait until those merges have /// finished (which should be at most a few seconds), and /// then return. /// public virtual void Dispose(bool waitForMerges) { Dispose(true, waitForMerges); } protected virtual void Dispose(bool disposing, bool waitForMerges) { if (disposing) { // Ensure that only one thread actually gets to do the closing: if (ShouldClose()) { // If any methods have hit OutOfMemoryError, then abort // on close, in case the internal state of IndexWriter // or DocumentsWriter is corrupt if (hitOOM) RollbackInternal(); else CloseInternal(waitForMerges); } } } /// Closes the index with or without waiting for currently /// running merges to finish. This is only meaningful when /// using a MergeScheduler that runs merges in background /// threads. /// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer, again. See above for details.

/// ///

NOTE: it is dangerous to always call /// close(false), especially when IndexWriter is not open /// for very long, because this can result in "merge /// starvation" whereby long merges will never have a /// chance to finish. This will cause too many segments in /// your index over time.

/// ///

/// if true, this call will block /// until all merges complete; else, it will ask all /// running merges to abort, wait until those merges have /// finished (which should be at most a few seconds), and /// then return. /// [Obsolete("Use Dispose(bool) instead")] public virtual void Close(bool waitForMerges) { Dispose(waitForMerges); } // Returns true if this thread should attempt to close, or // false if IndexWriter is now closed; else, waits until // another thread finishes closing private bool ShouldClose() { lock (this) { while (true) { if (!closed) { if (!closing) { closing = true; return true; } else { // Another thread is presently trying to close; // wait until it finishes one way (closes // successfully) or another (fails to close) DoWait(); } } else return false; } } } private void CloseInternal(bool waitForMerges) { docWriter.PauseAllThreads(); try { if (infoStream != null) Message("now flush at close"); docWriter.Dispose(); // Only allow a new merge to be triggered if we are // going to wait for merges: if (!hitOOM) { Flush(waitForMerges, true, true); } if (waitForMerges) // Give merge scheduler last chance to run, in case // any pending merges are waiting: mergeScheduler.Merge(this); mergePolicy.Close(); FinishMerges(waitForMerges); stopMerges = true; mergeScheduler.Close(); if (infoStream != null) Message("now call final commit()"); if (!hitOOM) { Commit(0); } if (infoStream != null) Message("at close: " + SegString()); lock (this) { readerPool.Dispose(); docWriter = null; deleter.Dispose(); } if (writeLock != null) { writeLock.Release(); // release write lock writeLock = null; } lock (this) { closed = true; } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "closeInternal"); } finally { lock (this) { closing = false; System.Threading.Monitor.PulseAll(this); if (!closed) { if (docWriter != null) docWriter.ResumeAllThreads(); if (infoStream != null) Message("hit exception while closing"); } } } } /// Tells the docWriter to close its currently open shared /// doc stores (stored fields & vectors files). /// Return value specifices whether new doc store files are compound or not. /// private bool FlushDocStores() { lock (this) { if (infoStream != null) { Message("flushDocStores segment=" + docWriter.DocStoreSegment); } bool useCompoundDocStore = false; if (infoStream != null) { Message("closeDocStores segment=" + docWriter.DocStoreSegment); } System.String docStoreSegment; bool success = false; try { docStoreSegment = docWriter.CloseDocStore(); success = true; } finally { if (!success && infoStream != null) { Message("hit exception closing doc store segment"); } } if (infoStream != null) { Message("flushDocStores files=" + docWriter.ClosedFiles()); } useCompoundDocStore = mergePolicy.UseCompoundDocStore(segmentInfos); if (useCompoundDocStore && docStoreSegment != null && docWriter.ClosedFiles().Count != 0) { // Now build compound doc store file if (infoStream != null) { Message("create compound file " + docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION); } success = false; int numSegments = segmentInfos.Count; System.String compoundFileName = docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION; try { CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, compoundFileName); foreach(string file in docWriter.closedFiles) { cfsWriter.AddFile(file); } // Perform the merge cfsWriter.Close(); success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception building compound file doc store for segment " + docStoreSegment); deleter.DeleteFile(compoundFileName); docWriter.Abort(); } } for (int i = 0; i < numSegments; i++) { SegmentInfo si = segmentInfos.Info(i); if (si.DocStoreOffset != - 1 && si.DocStoreSegment.Equals(docStoreSegment)) si.DocStoreIsCompoundFile = true; } Checkpoint(); // In case the files we just merged into a CFS were // not previously checkpointed: deleter.DeleteNewFiles(docWriter.ClosedFiles()); } return useCompoundDocStore; } } /// Returns the Directory used by this index. public virtual Directory Directory { get { // Pass false because the flush during closing calls getDirectory EnsureOpen(false); return directory; } } /// Returns the analyzer used by this index. public virtual Analyzer Analyzer { get { EnsureOpen(); return analyzer; } } /// Returns total number of docs in this index, including /// docs not yet flushed (still in the RAM buffer), /// not counting deletions. /// /// /// public virtual int MaxDoc() { lock (this) { int count; if (docWriter != null) count = docWriter.NumDocsInRAM; else count = 0; for (int i = 0; i < segmentInfos.Count; i++) count += segmentInfos.Info(i).docCount; return count; } } /// Returns total number of docs in this index, including /// docs not yet flushed (still in the RAM buffer), and /// including deletions. NOTE: buffered deletions /// are not counted. If you really need these to be /// counted you should call first. /// /// /// public virtual int NumDocs() { lock (this) { int count; if (docWriter != null) count = docWriter.NumDocsInRAM; else count = 0; for (int i = 0; i < segmentInfos.Count; i++) { SegmentInfo info = segmentInfos.Info(i); count += info.docCount - info.GetDelCount(); } return count; } } public virtual bool HasDeletions() { lock (this) { EnsureOpen(); if (docWriter.HasDeletes()) return true; for (int i = 0; i < segmentInfos.Count; i++) if (segmentInfos.Info(i).HasDeletions()) return true; return false; } } /// The maximum number of terms that will be indexed for a single field in a /// document. This limits the amount of memory required for indexing, so that /// collections with very large files will not crash the indexing process by /// running out of memory.

/// Note that this effectively truncates large documents, excluding from the /// index terms that occur further in the document. If you know your source /// documents are large, be sure to set this value high enough to accomodate /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit /// is your memory, but you should anticipate an OutOfMemoryError.

/// By default, no more than 10,000 terms will be indexed for a field. /// ///

/// /// private int maxFieldLength; /// Adds a document to this index. If the document contains more than /// terms for a given field, the remainder are /// discarded. /// ///

Note that if an Exception is hit (for example disk full) /// then the index will be consistent, but this document /// may not have been added. Furthermore, it's possible /// the index will have one segment in non-compound format /// even when using compound files (when a merge has /// partially succeeded).

/// ///

This method periodically flushes pending documents /// to the Directory (see above), and /// also periodically triggers segment merges in the index /// according to the in use.

/// ///

Merges temporarily consume space in the /// directory. The amount of space required is up to 1X the /// size of all segments being merged, when no /// readers/searchers are open against the index, and up to /// 2X the size of all segments being merged when /// readers/searchers are open against the index (see /// for details). The sequence of /// primitive merge operations performed is governed by the /// merge policy. /// ///

Note that each term in the document can be no longer /// than 16383 characters, otherwise an /// IllegalArgumentException will be thrown.

/// ///

Note that it's possible to create an invalid Unicode /// string in java if a UTF16 surrogate pair is malformed. /// In this case, the invalid characters are silently /// replaced with the Unicode replacement character /// U+FFFD.

/// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

/// ///

/// CorruptIndexException if the index is corrupt /// IOException if there is a low-level IO error public virtual void AddDocument(Document doc) { AddDocument(doc, analyzer); } /// Adds a document to this index, using the provided analyzer instead of the /// value of . If the document contains more than /// terms for a given field, the remainder are /// discarded. /// ///

See for details on /// index and IndexWriter state after an Exception, and /// flushing/merging temporary free space requirements.

/// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

/// ///

/// CorruptIndexException if the index is corrupt /// IOException if there is a low-level IO error public virtual void AddDocument(Document doc, Analyzer analyzer) { EnsureOpen(); bool doFlush = false; bool success = false; try { try { doFlush = docWriter.AddDocument(doc, analyzer); success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception adding document"); lock (this) { // If docWriter has some aborted files that were // never incref'd, then we clean them up here if (docWriter != null) { ICollection files = docWriter.AbortedFiles(); if (files != null) deleter.DeleteNewFiles(files); } } } } if (doFlush) Flush(true, false, false); } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "addDocument"); } } /// Deletes the document(s) containing term. /// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

/// ///

/// the term to identify the documents to be deleted /// /// CorruptIndexException if the index is corrupt /// IOException if there is a low-level IO error public virtual void DeleteDocuments(Term term) { EnsureOpen(); try { bool doFlush = docWriter.BufferDeleteTerm(term); if (doFlush) Flush(true, false, false); } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "deleteDocuments(Term)"); } } /// Deletes the document(s) containing any of the /// terms. All deletes are flushed at the same time. /// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

/// ///

/// array of terms to identify the documents /// to be deleted /// /// CorruptIndexException if the index is corrupt /// IOException if there is a low-level IO error public virtual void DeleteDocuments(params Term[] terms) { EnsureOpen(); try { bool doFlush = docWriter.BufferDeleteTerms(terms); if (doFlush) Flush(true, false, false); } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "deleteDocuments(params Term[])"); } } /// Deletes the document(s) matching the provided query. /// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

/// ///

/// the query to identify the documents to be deleted /// /// CorruptIndexException if the index is corrupt /// IOException if there is a low-level IO error public virtual void DeleteDocuments(Query query) { EnsureOpen(); bool doFlush = docWriter.BufferDeleteQuery(query); if (doFlush) Flush(true, false, false); } /// Deletes the document(s) matching any of the provided queries. /// All deletes are flushed at the same time. /// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

/// ///

/// array of queries to identify the documents /// to be deleted /// /// CorruptIndexException if the index is corrupt /// IOException if there is a low-level IO error public virtual void DeleteDocuments(params Query[] queries) { EnsureOpen(); bool doFlush = docWriter.BufferDeleteQueries(queries); if (doFlush) Flush(true, false, false); } /// Updates a document by first deleting the document(s) /// containing term and then adding the new /// document. The delete and then add are atomic as seen /// by a reader on the same index (flush may happen only after /// the add). /// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

/// ///

/// the term to identify the document(s) to be /// deleted /// /// the document to be added /// /// CorruptIndexException if the index is corrupt /// IOException if there is a low-level IO error public virtual void UpdateDocument(Term term, Document doc) { EnsureOpen(); UpdateDocument(term, doc, Analyzer); } /// Updates a document by first deleting the document(s) /// containing term and then adding the new /// document. The delete and then add are atomic as seen /// by a reader on the same index (flush may happen only after /// the add). /// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

/// ///

/// the term to identify the document(s) to be /// deleted /// /// the document to be added /// /// the analyzer to use when analyzing the document /// /// CorruptIndexException if the index is corrupt /// IOException if there is a low-level IO error public virtual void UpdateDocument(Term term, Document doc, Analyzer analyzer) { EnsureOpen(); try { bool doFlush = false; bool success = false; try { doFlush = docWriter.UpdateDocument(term, doc, analyzer); success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception updating document"); lock (this) { // If docWriter has some aborted files that were // never incref'd, then we clean them up here ICollection files = docWriter.AbortedFiles(); if (files != null) deleter.DeleteNewFiles(files); } } } if (doFlush) Flush(true, false, false); } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "updateDocument"); } } // for test purpose internal int GetSegmentCount() { lock (this) { return segmentInfos.Count; } } // for test purpose internal int GetNumBufferedDocuments() { lock (this) { return docWriter.NumDocsInRAM; } } // for test purpose public /*internal*/ int GetDocCount(int i) { lock (this) { if (i >= 0 && i < segmentInfos.Count) { return segmentInfos.Info(i).docCount; } else { return - 1; } } } // for test purpose internal int GetFlushCount() { lock (this) { return flushCount; } } // for test purpose internal int GetFlushDeletesCount() { lock (this) { return flushDeletesCount; } } internal System.String NewSegmentName() { // Cannot synchronize on IndexWriter because that causes // deadlock lock (segmentInfos) { // Important to increment changeCount so that the // segmentInfos is written on close. Otherwise we // could close, re-open and re-return the same segment // name that was previously returned which can cause // problems at least with ConcurrentMergeScheduler. changeCount++; return "_" + Number.ToString(segmentInfos.counter++); } } /// If non-null, information about merges will be printed to this. private System.IO.StreamWriter infoStream = null; private static System.IO.StreamWriter defaultInfoStream = null; /// Requests an "optimize" operation on an index, priming the index /// for the fastest available search. Traditionally this has meant /// merging all segments into a single segment as is done in the /// default merge policy, but individaul merge policies may implement /// optimize in different ways. /// ///

It is recommended that this method be called upon completion of indexing. In /// environments with frequent updates, optimize is best done during low volume times, if at all. /// ///

///

See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion.

/// ///

Note that optimize requires 2X the index size free /// space in your Directory (3X if you're using compound /// file format). For example, if your index /// size is 10 MB then you need 20 MB free for optimize to /// complete (30 MB if you're using compound fiel format).

/// ///

If some but not all readers re-open while an /// optimize is underway, this will cause > 2X temporary /// space to be consumed as those new readers will then /// hold open the partially optimized segments at that /// time. It is best not to re-open readers while optimize /// is running.

/// ///

The actual temporary usage could be much less than /// these figures (it depends on many factors).

/// ///

In general, once the optimize completes, the total size of the /// index will be less than the size of the starting index. /// It could be quite a bit smaller (if there were many /// pending deletes) or just slightly smaller.

/// ///

If an Exception is hit during optimize(), for example /// due to disk full, the index will not be corrupt and no /// documents will have been lost. However, it may have /// been partially optimized (some segments were merged but /// not all), and it's possible that one of the segments in /// the index will be in non-compound format even when /// using compound file format. This will occur when the /// Exception is hit during conversion of the segment into /// compound format.

/// ///

This call will optimize those segments present in /// the index when the call started. If other threads are /// still adding documents and flushing segments, those /// newly created segments will not be optimized unless you /// call optimize again.

/// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

/// ///

/// CorruptIndexException if the index is corrupt /// IOException if there is a low-level IO error /// /// public virtual void Optimize() { Optimize(true); } /// Optimize the index down to <= maxNumSegments. If /// maxNumSegments==1 then this is the same as ///. /// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

/// ///

/// maximum number of segments left /// in the index after optimization finishes /// public virtual void Optimize(int maxNumSegments) { Optimize(maxNumSegments, true); } /// Just like , except you can specify /// whether the call should block until the optimize /// completes. This is only meaningful with a /// that is able to run merges in /// background threads. /// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

///

public virtual void Optimize(bool doWait) { Optimize(1, doWait); } /// Just like , except you can /// specify whether the call should block until the /// optimize completes. This is only meaningful with a /// that is able to run merges in /// background threads. /// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

///

public virtual void Optimize(int maxNumSegments, bool doWait) { EnsureOpen(); if (maxNumSegments < 1) throw new System.ArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments); if (infoStream != null) Message("optimize: index now " + SegString()); Flush(true, false, true); lock (this) { ResetMergeExceptions(); segmentsToOptimize = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet(); optimizeMaxNumSegments = maxNumSegments; int numSegments = segmentInfos.Count; for (int i = 0; i < numSegments; i++) segmentsToOptimize.Add(segmentInfos.Info(i)); // Now mark all pending & running merges as optimize // merge: foreach(MergePolicy.OneMerge merge in pendingMerges) { merge.optimize = true; merge.maxNumSegmentsOptimize = maxNumSegments; } foreach(MergePolicy.OneMerge merge in runningMerges) { merge.optimize = true; merge.maxNumSegmentsOptimize = maxNumSegments; } } MaybeMerge(maxNumSegments, true); if (doWait) { lock (this) { while (true) { if (hitOOM) { throw new System.SystemException("this writer hit an OutOfMemoryError; cannot complete optimize"); } if (mergeExceptions.Count > 0) { // Forward any exceptions in background merge // threads to the current thread: int size = mergeExceptions.Count; for (int i = 0; i < size; i++) { MergePolicy.OneMerge merge = mergeExceptions[i]; if (merge.optimize) { System.IO.IOException err; System.Exception t = merge.GetException(); if (t != null) err = new System.IO.IOException("background merge hit exception: " + merge.SegString(directory), t); else err = new System.IO.IOException("background merge hit exception: " + merge.SegString(directory)); throw err; } } } if (OptimizeMergesPending()) DoWait(); else break; } } // If close is called while we are still // running, throw an exception so the calling // thread will know the optimize did not // complete EnsureOpen(); } // NOTE: in the ConcurrentMergeScheduler case, when // doWait is false, we can return immediately while // background threads accomplish the optimization } /// Returns true if any merges in pendingMerges or /// runningMerges are optimization merges. /// private bool OptimizeMergesPending() { lock (this) { foreach (MergePolicy.OneMerge merge in pendingMerges) { if (merge.optimize) return true; } foreach(MergePolicy.OneMerge merge in runningMerges) { if (merge.optimize) return true; } return false; } } /// Just like , except you can /// specify whether the call should block until the /// operation completes. This is only meaningful with a /// that is able to run merges in /// background threads. /// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

///

public virtual void ExpungeDeletes(bool doWait) { EnsureOpen(); if (infoStream != null) Message("expungeDeletes: index now " + SegString()); MergePolicy.MergeSpecification spec; lock (this) { spec = mergePolicy.FindMergesToExpungeDeletes(segmentInfos); if (spec != null) { int numMerges = spec.merges.Count; for (int i = 0; i < numMerges; i++) RegisterMerge(spec.merges[i]); } } mergeScheduler.Merge(this); if (spec != null && doWait) { int numMerges = spec.merges.Count; lock (this) { bool running = true; while (running) { if (hitOOM) { throw new System.SystemException("this writer hit an OutOfMemoryError; cannot complete expungeDeletes"); } // Check each merge that MergePolicy asked us to // do, to see if any of them are still running and // if any of them have hit an exception. running = false; for (int i = 0; i < numMerges; i++) { MergePolicy.OneMerge merge = spec.merges[i]; if (pendingMerges.Contains(merge) || runningMerges.Contains(merge)) running = true; System.Exception t = merge.GetException(); if (t != null) { System.IO.IOException ioe = new System.IO.IOException("background merge hit exception: " + merge.SegString(directory), t); throw ioe; } } // If any of our merges are still running, wait: if (running) DoWait(); } } } // NOTE: in the ConcurrentMergeScheduler case, when // doWait is false, we can return immediately while // background threads accomplish the optimization } /// Expunges all deletes from the index. When an index /// has many document deletions (or updates to existing /// documents), it's best to either call optimize or /// expungeDeletes to remove all unused data in the index /// associated with the deleted documents. To see how /// many deletions you have pending in your index, call /// /// This saves disk space and memory usage while /// searching. expungeDeletes should be somewhat faster /// than optimize since it does not insist on reducing the /// index to a single segment (though, this depends on the /// ; see .). Note that /// this call does not first commit any buffered /// documents, so you must do so yourself if necessary. /// See also /// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

///

public virtual void ExpungeDeletes() { ExpungeDeletes(true); } /// Expert: asks the mergePolicy whether any merges are /// necessary now and if so, runs the requested merges and /// then iterate (test again if merges are needed) until no /// more merges are returned by the mergePolicy. /// /// Explicit calls to maybeMerge() are usually not /// necessary. The most common case is when merge policy /// parameters have changed. /// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

///

public void MaybeMerge() { MaybeMerge(false); } private void MaybeMerge(bool optimize) { MaybeMerge(1, optimize); } private void MaybeMerge(int maxNumSegmentsOptimize, bool optimize) { UpdatePendingMerges(maxNumSegmentsOptimize, optimize); mergeScheduler.Merge(this); } private void UpdatePendingMerges(int maxNumSegmentsOptimize, bool optimize) { lock (this) { System.Diagnostics.Debug.Assert(!optimize || maxNumSegmentsOptimize > 0); if (stopMerges) { return; } // Do not start new merges if we've hit OOME if (hitOOM) { return ; } MergePolicy.MergeSpecification spec; if (optimize) { spec = mergePolicy.FindMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, segmentsToOptimize); if (spec != null) { int numMerges = spec.merges.Count; for (int i = 0; i < numMerges; i++) { MergePolicy.OneMerge merge = spec.merges[i]; merge.optimize = true; merge.maxNumSegmentsOptimize = maxNumSegmentsOptimize; } } } else { spec = mergePolicy.FindMerges(segmentInfos); } if (spec != null) { int numMerges = spec.merges.Count; for (int i = 0; i < numMerges; i++) RegisterMerge(spec.merges[i]); } } } /// Expert: the calls this method /// to retrieve the next merge requested by the /// MergePolicy /// internal virtual MergePolicy.OneMerge GetNextMerge() { lock (this) { if (pendingMerges.Count == 0) return null; else { // Advance the merge from pending to running MergePolicy.OneMerge merge = pendingMerges.First.Value; pendingMerges.RemoveFirst(); runningMerges.Add(merge); return merge; } } } /// Like getNextMerge() except only returns a merge if it's /// external. /// private MergePolicy.OneMerge GetNextExternalMerge() { lock (this) { if (pendingMerges.Count == 0) return null; else { var it = pendingMerges.GetEnumerator(); while (it.MoveNext()) { MergePolicy.OneMerge merge = it.Current; if (merge.isExternal) { // Advance the merge from pending to running pendingMerges.Remove(merge); // {{Aroush-2.9}} From Mike Garski: this is an O(n) op... is that an issue? runningMerges.Add(merge); return merge; } } // All existing merges do not involve external segments return null; } } } /* * Begin a transaction. During a transaction, any segment * merges that happen (or ram segments flushed) will not * write a new segments file and will not remove any files * that were present at the start of the transaction. You * must make a matched (try/finally) call to * commitTransaction() or rollbackTransaction() to finish * the transaction. * * Note that buffered documents and delete terms are not handled * within the transactions, so they must be flushed before the * transaction is started. */ private void StartTransaction(bool haveReadLock) { lock (this) { bool success = false; try { if (infoStream != null) Message("now start transaction"); System.Diagnostics.Debug.Assert(docWriter.GetNumBufferedDeleteTerms() == 0 , "calling startTransaction with buffered delete terms not supported: numBufferedDeleteTerms=" + docWriter.GetNumBufferedDeleteTerms()); System.Diagnostics.Debug.Assert(docWriter.NumDocsInRAM == 0 , "calling startTransaction with buffered documents not supported: numDocsInRAM=" + docWriter.NumDocsInRAM); EnsureOpen(); // If a transaction is trying to roll back (because // addIndexes hit an exception) then wait here until // that's done: lock (this) { while (stopMerges) DoWait(); } success = true; } finally { // Release the write lock if our caller held it, on // hitting an exception if (!success && haveReadLock) ReleaseRead(); } if (haveReadLock) { UpgradeReadToWrite(); } else { AcquireWrite(); } success = false; try { localRollbackSegmentInfos = (SegmentInfos) segmentInfos.Clone(); System.Diagnostics.Debug.Assert(!HasExternalSegments()); localFlushedDocCount = docWriter.GetFlushedDocCount(); // Remove the incRef we did in startTransaction: deleter.IncRef(segmentInfos, false); success = true; } finally { if (!success) FinishAddIndexes(); } } } /* * Rolls back the transaction and restores state to where * we were at the start. */ private void RollbackTransaction() { lock (this) { if (infoStream != null) Message("now rollback transaction"); if (docWriter != null) { docWriter.SetFlushedDocCount(localFlushedDocCount); } // Must finish merges before rolling back segmentInfos // so merges don't hit exceptions on trying to commit // themselves, don't get files deleted out from under // them, etc: FinishMerges(false); // Keep the same segmentInfos instance but replace all // of its SegmentInfo instances. This is so the next // attempt to commit using this instance of IndexWriter // will always write to a new generation ("write once"). segmentInfos.Clear(); segmentInfos.AddRange(localRollbackSegmentInfos); localRollbackSegmentInfos = null; // This must come after we rollback segmentInfos, so // that if a commit() kicks off it does not see the // segmentInfos with external segments FinishAddIndexes(); // Ask deleter to locate unreferenced files we had // created & remove them: deleter.Checkpoint(segmentInfos, false); // Remove the incRef we did in startTransaction: deleter.DecRef(segmentInfos); // Also ask deleter to remove any newly created files // that were never incref'd; this "garbage" is created // when a merge kicks off but aborts part way through // before it had a chance to incRef the files it had // partially created deleter.Refresh(); System.Threading.Monitor.PulseAll(this); System.Diagnostics.Debug.Assert(!HasExternalSegments()); } } /* * Commits the transaction. This will write the new * segments file and remove and pending deletions we have * accumulated during the transaction */ private void CommitTransaction() { lock (this) { if (infoStream != null) Message("now commit transaction"); // Give deleter a chance to remove files now: Checkpoint(); // Remove the incRef we did in startTransaction. deleter.DecRef(localRollbackSegmentInfos); localRollbackSegmentInfos = null; System.Diagnostics.Debug.Assert(!HasExternalSegments()); FinishAddIndexes(); } } /// Close the IndexWriter without committing /// any changes that have occurred since the last commit /// (or since it was opened, if commit hasn't been called). /// This removes any temporary files that had been created, /// after which the state of the index will be the same as /// it was when commit() was last called or when this /// writer was first opened. This also clears a previous /// call to . /// /// IOException if there is a low-level IO error public virtual void Rollback() { EnsureOpen(); // Ensure that only one thread actually gets to do the closing: if (ShouldClose()) RollbackInternal(); } private void RollbackInternal() { bool success = false; if (infoStream != null) { Message("rollback"); } docWriter.PauseAllThreads(); try { FinishMerges(false); // Must pre-close these two, in case they increment // changeCount so that we can then set it to false // before calling closeInternal mergePolicy.Close(); mergeScheduler.Close(); lock (this) { if (pendingCommit != null) { pendingCommit.RollbackCommit(directory); deleter.DecRef(pendingCommit); pendingCommit = null; System.Threading.Monitor.PulseAll(this); } // Keep the same segmentInfos instance but replace all // of its SegmentInfo instances. This is so the next // attempt to commit using this instance of IndexWriter // will always write to a new generation ("write // once"). segmentInfos.Clear(); segmentInfos.AddRange(rollbackSegmentInfos); System.Diagnostics.Debug.Assert(!HasExternalSegments()); docWriter.Abort(); System.Diagnostics.Debug.Assert(TestPoint("rollback before checkpoint")); // Ask deleter to locate unreferenced files & remove // them: deleter.Checkpoint(segmentInfos, false); deleter.Refresh(); } // Don't bother saving any changes in our segmentInfos readerPool.Clear(null); lastCommitChangeCount = changeCount; success = true; } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "rollbackInternal"); } finally { lock (this) { if (!success) { docWriter.ResumeAllThreads(); closing = false; System.Threading.Monitor.PulseAll(this); if (infoStream != null) Message("hit exception during rollback"); } } } CloseInternal(false); } /// Delete all documents in the index. /// ///

This method will drop all buffered documents and will /// remove all segments from the index. This change will not be /// visible until a has been called. This method /// can be rolled back using .

/// ///

NOTE: this method is much faster than using deleteDocuments( new MatchAllDocsQuery() ).

/// ///

NOTE: this method will forcefully abort all merges /// in progress. If other threads are running /// or any of the addIndexes methods, they /// will receive s. ///

public virtual void DeleteAll() { lock (this) { docWriter.PauseAllThreads(); try { // Abort any running merges FinishMerges(false); // Remove any buffered docs docWriter.Abort(); docWriter.SetFlushedDocCount(0); // Remove all segments segmentInfos.Clear(); // Ask deleter to locate unreferenced files & remove them: deleter.Checkpoint(segmentInfos, false); deleter.Refresh(); // Don't bother saving any changes in our segmentInfos readerPool.Clear(null); // Mark that the index has changed ++changeCount; } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "deleteAll"); } finally { docWriter.ResumeAllThreads(); if (infoStream != null) { Message("hit exception during deleteAll"); } } } } private void FinishMerges(bool waitForMerges) { lock (this) { if (!waitForMerges) { stopMerges = true; // Abort all pending & running merges: foreach(MergePolicy.OneMerge merge in pendingMerges) { if (infoStream != null) Message("now abort pending merge " + merge.SegString(directory)); merge.Abort(); MergeFinish(merge); } pendingMerges.Clear(); foreach(MergePolicy.OneMerge merge in runningMerges) { if (infoStream != null) Message("now abort running merge " + merge.SegString(directory)); merge.Abort(); } // Ensure any running addIndexes finishes. It's fine // if a new one attempts to start because its merges // will quickly see the stopMerges == true and abort. AcquireRead(); ReleaseRead(); // These merges periodically check whether they have // been aborted, and stop if so. We wait here to make // sure they all stop. It should not take very long // because the merge threads periodically check if // they are aborted. while (runningMerges.Count > 0) { if (infoStream != null) Message("now wait for " + runningMerges.Count + " running merge to abort"); DoWait(); } stopMerges = false; System.Threading.Monitor.PulseAll(this); System.Diagnostics.Debug.Assert(0 == mergingSegments.Count); if (infoStream != null) Message("all running merges have aborted"); } else { // waitForMerges() will ensure any running addIndexes finishes. // It's fine if a new one attempts to start because from our // caller above the call will see that we are in the // process of closing, and will throw an // AlreadyClosedException. WaitForMerges(); } } } /// Wait for any currently outstanding merges to finish. /// ///

It is guaranteed that any merges started prior to calling this method /// will have completed once this method completes.

///

public virtual void WaitForMerges() { lock (this) { // Ensure any running addIndexes finishes. AcquireRead(); ReleaseRead(); while (pendingMerges.Count > 0 || runningMerges.Count > 0) { DoWait(); } // sanity check System.Diagnostics.Debug.Assert(0 == mergingSegments.Count); } } /* * Called whenever the SegmentInfos has been updated and * the index files referenced exist (correctly) in the * index directory. */ private void Checkpoint() { lock (this) { changeCount++; deleter.Checkpoint(segmentInfos, false); } } private void FinishAddIndexes() { ReleaseWrite(); } private void BlockAddIndexes(bool includePendingClose) { AcquireRead(); bool success = false; try { // Make sure we are still open since we could have // waited quite a while for last addIndexes to finish EnsureOpen(includePendingClose); success = true; } finally { if (!success) ReleaseRead(); } } private void ResumeAddIndexes() { ReleaseRead(); } private void ResetMergeExceptions() { lock (this) { mergeExceptions = new List(); mergeGen++; } } private void NoDupDirs(Directory[] dirs) { HashSet dups = new HashSet(); for (int i = 0; i < dirs.Length; i++) { if (dups.Contains(dirs[i])) { throw new System.ArgumentException("Directory " + dirs[i] + " appears more than once"); } if (dirs[i] == directory) throw new System.ArgumentException("Cannot add directory to itself"); dups.Add(dirs[i]); } } /// Merges all segments from an array of indexes into this /// index. /// ///

This may be used to parallelize batch indexing. A large document /// collection can be broken into sub-collections. Each sub-collection can be /// indexed in parallel, on a different thread, process or machine. The /// complete index can then be created by merging sub-collection indexes /// with this method. /// ///

NOTE: the index in each Directory must not be /// changed (opened by a writer) while this method is /// running. This method does not acquire a write lock in /// each input Directory, so it is up to the caller to /// enforce this. /// ///

NOTE: while this is running, any attempts to /// add or delete documents (with another thread) will be /// paused until this method completes. /// ///

This method is transactional in how Exceptions are /// handled: it does not commit a new segments_N file until /// all indexes are added. This means if an Exception /// occurs (for example disk full), then either no indexes /// will have been added or they all will have been.

/// ///

Note that this requires temporary free space in the /// Directory up to 2X the sum of all input indexes /// (including the starting index). If readers/searchers /// are open against the starting index, then temporary /// free space required will be higher by the size of the /// starting index (see for details). ///

/// ///

Once this completes, the final size of the index /// will be less than the sum of all input index sizes /// (including the starting index). It could be quite a /// bit smaller (if there were many pending deletes) or /// just slightly smaller.

/// ///

/// This requires this index not be among those to be added. /// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

/// ///

/// CorruptIndexException if the index is corrupt /// IOException if there is a low-level IO error public virtual void AddIndexesNoOptimize(params Directory[] dirs) { EnsureOpen(); NoDupDirs(dirs); // Do not allow add docs or deletes while we are running: docWriter.PauseAllThreads(); try { if (infoStream != null) Message("flush at addIndexesNoOptimize"); Flush(true, false, true); bool success = false; StartTransaction(false); try { int docCount = 0; lock (this) { EnsureOpen(); for (int i = 0; i < dirs.Length; i++) { if (directory == dirs[i]) { // cannot add this index: segments may be deleted in merge before added throw new System.ArgumentException("Cannot add this index to itself"); } SegmentInfos sis = new SegmentInfos(); // read infos from dir sis.Read(dirs[i]); for (int j = 0; j < sis.Count; j++) { SegmentInfo info = sis.Info(j); System.Diagnostics.Debug.Assert(!segmentInfos.Contains(info), "dup info dir=" + info.dir + " name=" + info.name); docCount += info.docCount; segmentInfos.Add(info); // add each info } } } // Notify DocumentsWriter that the flushed count just increased docWriter.UpdateFlushedDocCount(docCount); MaybeMerge(); EnsureOpen(); // If after merging there remain segments in the index // that are in a different directory, just copy these // over into our index. This is necessary (before // finishing the transaction) to avoid leaving the // index in an unusable (inconsistent) state. ResolveExternalSegments(); EnsureOpen(); success = true; } finally { if (success) { CommitTransaction(); } else { RollbackTransaction(); } } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "addIndexesNoOptimize"); } finally { if (docWriter != null) { docWriter.ResumeAllThreads(); } } } private bool HasExternalSegments() { return segmentInfos.HasExternalSegments(directory); } /* If any of our segments are using a directory != ours * then we have to either copy them over one by one, merge * them (if merge policy has chosen to) or wait until * currently running merges (in the background) complete. * We don't return until the SegmentInfos has no more * external segments. Currently this is only used by * addIndexesNoOptimize(). */ private void ResolveExternalSegments() { bool any = false; bool done = false; while (!done) { SegmentInfo info = null; MergePolicy.OneMerge merge = null; lock (this) { if (stopMerges) throw new MergePolicy.MergeAbortedException("rollback() was called or addIndexes* hit an unhandled exception"); int numSegments = segmentInfos.Count; done = true; for (int i = 0; i < numSegments; i++) { info = segmentInfos.Info(i); if (info.dir != directory) { done = false; MergePolicy.OneMerge newMerge = new MergePolicy.OneMerge(segmentInfos.Range(i, 1 + i), mergePolicy is LogMergePolicy && UseCompoundFile); // Returns true if no running merge conflicts // with this one (and, records this merge as // pending), ie, this segment is not currently // being merged: if (RegisterMerge(newMerge)) { merge = newMerge; // If this segment is not currently being // merged, then advance it to running & run // the merge ourself (below): pendingMerges.Remove(merge); // {{Aroush-2.9}} From Mike Garski: this is an O(n) op... is that an issue? runningMerges.Add(merge); break; } } } if (!done && merge == null) // We are not yet done (external segments still // exist in segmentInfos), yet, all such segments // are currently "covered" by a pending or running // merge. We now try to grab any pending merge // that involves external segments: merge = GetNextExternalMerge(); if (!done && merge == null) // We are not yet done, and, all external segments // fall under merges that the merge scheduler is // currently running. So, we now wait and check // back to see if the merge has completed. DoWait(); } if (merge != null) { any = true; Merge(merge); } } if (any) // Sometimes, on copying an external segment over, // more merges may become necessary: mergeScheduler.Merge(this); } /// Merges the provided indexes into this index. ///

After this completes, the index is optimized.

///

The provided IndexReaders are not closed.

/// ///

NOTE: while this is running, any attempts to /// add or delete documents (with another thread) will be /// paused until this method completes. /// ///

See for /// details on transactional semantics, temporary free /// space required in the Directory, and non-CFS segments /// on an Exception.

/// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

/// ///

/// CorruptIndexException if the index is corrupt /// IOException if there is a low-level IO error public virtual void AddIndexes(params IndexReader[] readers) { EnsureOpen(); // Do not allow add docs or deletes while we are running: docWriter.PauseAllThreads(); // We must pre-acquire a read lock here (and upgrade to // write lock in startTransaction below) so that no // other addIndexes is allowed to start up after we have // flushed & optimized but before we then start our // transaction. This is because the merging below // requires that only one segment is present in the // index: AcquireRead(); try { SegmentInfo info = null; System.String mergedName = null; SegmentMerger merger = null; bool success = false; try { Flush(true, false, true); Optimize(); // start with zero or 1 seg success = true; } finally { // Take care to release the read lock if we hit an // exception before starting the transaction if (!success) ReleaseRead(); } // true means we already have a read lock; if this // call hits an exception it will release the write // lock: StartTransaction(true); try { mergedName = NewSegmentName(); merger = new SegmentMerger(this, mergedName, null); SegmentReader sReader = null; lock (this) { if (segmentInfos.Count == 1) { // add existing index, if any sReader = readerPool.Get(segmentInfos.Info(0), true, BufferedIndexInput.BUFFER_SIZE, - 1); } } success = false; try { if (sReader != null) merger.Add(sReader); for (int i = 0; i < readers.Length; i++) // add new indexes merger.Add(readers[i]); int docCount = merger.Merge(); // merge 'em lock (this) { segmentInfos.Clear(); // pop old infos & add new info = new SegmentInfo(mergedName, docCount, directory, false, true, - 1, null, false, merger.HasProx()); SetDiagnostics(info, "addIndexes(params IndexReader[])"); segmentInfos.Add(info); } // Notify DocumentsWriter that the flushed count just increased docWriter.UpdateFlushedDocCount(docCount); success = true; } finally { if (sReader != null) { readerPool.Release(sReader); } } } finally { if (!success) { if (infoStream != null) Message("hit exception in addIndexes during merge"); RollbackTransaction(); } else { CommitTransaction(); } } if (mergePolicy is LogMergePolicy && UseCompoundFile) { IList files = null; lock (this) { // Must incRef our files so that if another thread // is running merge/optimize, it doesn't delete our // segment's files before we have a change to // finish making the compound file. if (segmentInfos.Contains(info)) { files = info.Files(); deleter.IncRef(files); } } if (files != null) { success = false; StartTransaction(false); try { merger.CreateCompoundFile(mergedName + ".cfs"); lock (this) { info.SetUseCompoundFile(true); } success = true; } finally { lock (this) { deleter.DecRef(files); } if (!success) { if (infoStream != null) Message("hit exception building compound file in addIndexes during merge"); RollbackTransaction(); } else { CommitTransaction(); } } } } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "addIndexes(params IndexReader[])"); } finally { if (docWriter != null) { docWriter.ResumeAllThreads(); } } } /// /// A hook for extending classes to execute operations after pending added and /// deleted documents have been flushed to the Directory but before the change /// is committed (new segments_N file written). /// protected virtual void DoAfterFlush() { } /// /// A hook for extending classes to execute operations before pending added and /// deleted documents are flushed to the Directory. /// protected virtual void DoBeforeFlush() { } /// Expert: prepare for commit. /// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

/// ///

/// /// public void PrepareCommit() { EnsureOpen(); PrepareCommit(null); } ///

Expert: prepare for commit, specifying /// commitUserData Map (String -> String). This does the /// first phase of 2-phase commit. This method does all steps /// necessary to commit changes since this writer was /// opened: flushes pending added and deleted docs, syncs /// the index files, writes most of next segments_N file. /// After calling this you must call either /// to finish the commit, or /// to revert the commit and undo all changes /// done since the writer was opened.

/// /// You can also just call directly /// without prepareCommit first in which case that method /// will internally call prepareCommit. /// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

/// ///

/// Opaque Map (String->String) /// that's recorded into the segments file in the index, /// and retrievable by . /// Note that when IndexWriter commits itself, during , the /// commitUserData is unchanged (just carried over from /// the prior commit). If this is null then the previous /// commitUserData is kept. Also, the commitUserData will /// only "stick" if there are actually changes in the /// index to commit. /// private void PrepareCommit(IDictionary commitUserData) { if (hitOOM) { throw new System.SystemException("this writer hit an OutOfMemoryError; cannot commit"); } if (pendingCommit != null) throw new System.SystemException("prepareCommit was already called with no corresponding call to commit"); if (infoStream != null) Message("prepareCommit: flush"); Flush(true, true, true); StartCommit(0, commitUserData); } // Used only by commit, below; lock order is commitLock -> IW private Object commitLock = new Object(); private void Commit(long sizeInBytes) { lock(commitLock) { StartCommit(sizeInBytes, null); FinishCommit(); } } ///

Commits all pending changes (added & deleted /// documents, optimizations, segment merges, added /// indexes, etc.) to the index, and syncs all referenced /// index files, such that a reader will see the changes /// and the index updates will survive an OS or machine /// crash or power loss. Note that this does not wait for /// any running background merges to finish. This may be a /// costly operation, so you should test the cost in your /// application and do it only when really necessary.

/// ///

Note that this operation calls Directory.sync on /// the index files. That call should not return until the /// file contents & metadata are on stable storage. For /// FSDirectory, this calls the OS's fsync. But, beware: /// some hardware devices may in fact cache writes even /// during fsync, and return before the bits are actually /// on stable storage, to give the appearance of faster /// performance. If you have such a device, and it does /// not have a battery backup (for example) then on power /// loss it may still lose data. Lucene cannot guarantee /// consistency on such devices.

/// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

/// ///

/// /// /// /// public void Commit() { Commit(null); } /// Commits all changes to the index, specifying a /// commitUserData Map (String -> String). This just /// calls (if you didn't /// already call it) and then . /// ///

NOTE: if this method hits an OutOfMemoryError /// you should immediately close the writer. See above for details.

///

public void Commit(IDictionary commitUserData) { EnsureOpen(); if (infoStream != null) { Message("commit: start"); } lock (commitLock) { if (infoStream != null) { Message("commit: enter lock"); } if (pendingCommit == null) { if (infoStream != null) { Message("commit: now prepare"); } PrepareCommit(commitUserData); } else if (infoStream != null) { Message("commit: already prepared"); } FinishCommit(); } } private void FinishCommit() { lock (this) { if (pendingCommit != null) { try { if (infoStream != null) Message("commit: pendingCommit != null"); pendingCommit.FinishCommit(directory); if (infoStream != null) Message("commit: wrote segments file \"" + pendingCommit.GetCurrentSegmentFileName() + "\""); lastCommitChangeCount = pendingCommitChangeCount; segmentInfos.UpdateGeneration(pendingCommit); segmentInfos.UserData = pendingCommit.UserData; SetRollbackSegmentInfos(pendingCommit); deleter.Checkpoint(pendingCommit, true); } finally { deleter.DecRef(pendingCommit); pendingCommit = null; System.Threading.Monitor.PulseAll(this); } } else if (infoStream != null) { Message("commit: pendingCommit == null; skip"); } if (infoStream != null) { Message("commit: done"); } } } /// Flush all in-memory buffered udpates (adds and deletes) /// to the Directory. /// /// if true, we may merge segments (if /// deletes or docs were flushed) if necessary /// /// if false we are allowed to keep /// doc stores open to share with the next segment /// /// whether pending deletes should also /// be flushed /// public /*protected internal*/ void Flush(bool triggerMerge, bool flushDocStores, bool flushDeletes) { // We can be called during close, when closing==true, so we must pass false to ensureOpen: EnsureOpen(false); if (DoFlush(flushDocStores, flushDeletes) && triggerMerge) MaybeMerge(); } // TODO: this method should not have to be entirely // synchronized, ie, merges should be allowed to commit // even while a flush is happening private bool DoFlush(bool flushDocStores, bool flushDeletes) { lock (this) { try { try { return DoFlushInternal(flushDocStores, flushDeletes); } finally { if (docWriter.DoBalanceRAM()) { docWriter.BalanceRAM(); } } } finally { docWriter.ClearFlushPending(); } } } // TODO: this method should not have to be entirely // synchronized, ie, merges should be allowed to commit // even while a flush is happening private bool DoFlushInternal(bool flushDocStores, bool flushDeletes) { lock (this) { if (hitOOM) { throw new System.SystemException("this writer hit an OutOfMemoryError; cannot flush"); } EnsureOpen(false); System.Diagnostics.Debug.Assert(TestPoint("startDoFlush")); DoBeforeFlush(); flushCount++; // If we are flushing because too many deletes // accumulated, then we should apply the deletes to free // RAM: flushDeletes |= docWriter.DoApplyDeletes(); // Make sure no threads are actively adding a document. // Returns true if docWriter is currently aborting, in // which case we skip flushing this segment if (infoStream != null) { Message("flush: now pause all indexing threads"); } if (docWriter.PauseAllThreads()) { docWriter.ResumeAllThreads(); return false; } try { SegmentInfo newSegment = null; int numDocs = docWriter.NumDocsInRAM; // Always flush docs if there are any bool flushDocs = numDocs > 0; System.String docStoreSegment = docWriter.DocStoreSegment; System.Diagnostics.Debug.Assert(docStoreSegment != null || numDocs == 0, "dss=" + docStoreSegment + " numDocs=" + numDocs); if (docStoreSegment == null) flushDocStores = false; int docStoreOffset = docWriter.DocStoreOffset; bool docStoreIsCompoundFile = false; if (infoStream != null) { Message(" flush: segment=" + docWriter.Segment + " docStoreSegment=" + docWriter.DocStoreSegment + " docStoreOffset=" + docStoreOffset + " flushDocs=" + flushDocs + " flushDeletes=" + flushDeletes + " flushDocStores=" + flushDocStores + " numDocs=" + numDocs + " numBufDelTerms=" + docWriter.GetNumBufferedDeleteTerms()); Message(" index before flush " + SegString()); } // Check if the doc stores must be separately flushed // because other segments, besides the one we are about // to flush, reference it if (flushDocStores && (!flushDocs || !docWriter.Segment.Equals(docWriter.DocStoreSegment))) { // We must separately flush the doc store if (infoStream != null) Message(" flush shared docStore segment " + docStoreSegment); docStoreIsCompoundFile = FlushDocStores(); flushDocStores = false; } System.String segment = docWriter.Segment; // If we are flushing docs, segment must not be null: System.Diagnostics.Debug.Assert(segment != null || !flushDocs); if (flushDocs) { bool success = false; int flushedDocCount; try { flushedDocCount = docWriter.Flush(flushDocStores); if (infoStream != null) { Message("flushedFiles=" + docWriter.GetFlushedFiles()); } success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception flushing segment " + segment); deleter.Refresh(segment); } } if (0 == docStoreOffset && flushDocStores) { // This means we are flushing private doc stores // with this segment, so it will not be shared // with other segments System.Diagnostics.Debug.Assert(docStoreSegment != null); System.Diagnostics.Debug.Assert(docStoreSegment.Equals(segment)); docStoreOffset = - 1; docStoreIsCompoundFile = false; docStoreSegment = null; } // Create new SegmentInfo, but do not add to our // segmentInfos until deletes are flushed // successfully. newSegment = new SegmentInfo(segment, flushedDocCount, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, docWriter.HasProx()); SetDiagnostics(newSegment, "flush"); } docWriter.PushDeletes(); if (flushDocs) { segmentInfos.Add(newSegment); Checkpoint(); } if (flushDocs && mergePolicy.UseCompoundFile(segmentInfos, newSegment)) { // Now build compound file bool success = false; try { docWriter.CreateCompoundFile(segment); success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception creating compound file for newly flushed segment " + segment); deleter.DeleteFile(segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); } } newSegment.SetUseCompoundFile(true); Checkpoint(); } if (flushDeletes) { ApplyDeletes(); } if (flushDocs) Checkpoint(); DoAfterFlush(); return flushDocs; } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "doFlush"); // never hit return false; } finally { docWriter.ResumeAllThreads(); } } } /// Expert: Return the total size of all index files currently cached in memory. /// Useful for size management with flushRamDocs() /// public long RamSizeInBytes() { EnsureOpen(); return docWriter.GetRAMUsed(); } /// Expert: Return the number of documents currently /// buffered in RAM. /// public int NumRamDocs() { lock (this) { EnsureOpen(); return docWriter.NumDocsInRAM; } } private int EnsureContiguousMerge(MergePolicy.OneMerge merge) { int first = segmentInfos.IndexOf(merge.segments.Info(0)); if (first == - 1) throw new MergePolicy.MergeException("could not find segment " + merge.segments.Info(0).name + " in current index " + SegString(), directory); int numSegments = segmentInfos.Count; int numSegmentsToMerge = merge.segments.Count; for (int i = 0; i < numSegmentsToMerge; i++) { SegmentInfo info = merge.segments.Info(i); if (first + i >= numSegments || !segmentInfos.Info(first + i).Equals(info)) { if (segmentInfos.IndexOf(info) == - 1) throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + SegString(), directory); else throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge.SegString(directory) + " vs " + SegString() + "), which IndexWriter (currently) cannot handle", directory); } } return first; } /// Carefully merges deletes for the segments we just /// merged. This is tricky because, although merging will /// clear all deletes (compacts the documents), new /// deletes may have been flushed to the segments since /// the merge was started. This method "carries over" /// such new deletes onto the newly merged segment, and /// saves the resulting deletes file (incrementing the /// delete generation for merge.info). If no deletes were /// flushed, no new deletes file is saved. /// private void CommitMergedDeletes(MergePolicy.OneMerge merge, SegmentReader mergeReader) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startCommitMergeDeletes")); SegmentInfos sourceSegments = merge.segments; if (infoStream != null) Message("commitMergeDeletes " + merge.SegString(directory)); // Carefully merge deletes that occurred after we // started merging: int docUpto = 0; int delCount = 0; for (int i = 0; i < sourceSegments.Count; i++) { SegmentInfo info = sourceSegments.Info(i); int docCount = info.docCount; SegmentReader previousReader = merge.readersClone[i]; SegmentReader currentReader = merge.readers[i]; if (previousReader.HasDeletions) { // There were deletes on this segment when the merge // started. The merge has collapsed away those // deletes, but, if new deletes were flushed since // the merge started, we must now carefully keep any // newly flushed deletes but mapping them to the new // docIDs. if (currentReader.NumDeletedDocs > previousReader.NumDeletedDocs) { // This means this segment has had new deletes // committed since we started the merge, so we // must merge them: for (int j = 0; j < docCount; j++) { if (previousReader.IsDeleted(j)) { System.Diagnostics.Debug.Assert(currentReader.IsDeleted(j)); } else { if (currentReader.IsDeleted(j)) { mergeReader.DoDelete(docUpto); delCount++; } docUpto++; } } } else { docUpto += docCount - previousReader.NumDeletedDocs; } } else if (currentReader.HasDeletions) { // This segment had no deletes before but now it // does: for (int j = 0; j < docCount; j++) { if (currentReader.IsDeleted(j)) { mergeReader.DoDelete(docUpto); delCount++; } docUpto++; } } // No deletes before or after else docUpto += info.docCount; } System.Diagnostics.Debug.Assert(mergeReader.NumDeletedDocs == delCount); mergeReader.hasChanges = delCount > 0; } } /* FIXME if we want to support non-contiguous segment merges */ private bool CommitMerge(MergePolicy.OneMerge merge, SegmentMerger merger, int mergedDocCount, SegmentReader mergedReader) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startCommitMerge")); if (hitOOM) { throw new System.SystemException("this writer hit an OutOfMemoryError; cannot complete merge"); } if (infoStream != null) Message("commitMerge: " + merge.SegString(directory) + " index=" + SegString()); System.Diagnostics.Debug.Assert(merge.registerDone); // If merge was explicitly aborted, or, if rollback() or // rollbackTransaction() had been called since our merge // started (which results in an unqualified // deleter.refresh() call that will remove any index // file that current segments does not reference), we // abort this merge if (merge.IsAborted()) { if (infoStream != null) Message("commitMerge: skipping merge " + merge.SegString(directory) + ": it was aborted"); return false; } int start = EnsureContiguousMerge(merge); CommitMergedDeletes(merge, mergedReader); docWriter.RemapDeletes(segmentInfos, merger.GetDocMaps(), merger.GetDelCounts(), merge, mergedDocCount); // If the doc store we are using has been closed and // is in now compound format (but wasn't when we // started), then we will switch to the compound // format as well: SetMergeDocStoreIsCompoundFile(merge); merge.info.HasProx = merger.HasProx(); segmentInfos.RemoveRange(start, start + merge.segments.Count - start); System.Diagnostics.Debug.Assert(!segmentInfos.Contains(merge.info)); segmentInfos.Insert(start, merge.info); CloseMergeReaders(merge, false); // Must note the change to segmentInfos so any commits // in-flight don't lose it: Checkpoint(); // If the merged segments had pending changes, clear // them so that they don't bother writing them to // disk, updating SegmentInfo, etc.: readerPool.Clear(merge.segments); if (merge.optimize) { // cascade the optimize: segmentsToOptimize.Add(merge.info); } return true; } } private void HandleMergeException(System.Exception t, MergePolicy.OneMerge merge) { if (infoStream != null) { Message("handleMergeException: merge=" + merge.SegString(directory) + " exc=" + t); } // Set the exception on the merge, so if // optimize() is waiting on us it sees the root // cause exception: merge.SetException(t); AddMergeException(merge); if (t is MergePolicy.MergeAbortedException) { // We can ignore this exception (it happens when // close(false) or rollback is called), unless the // merge involves segments from external directories, // in which case we must throw it so, for example, the // rollbackTransaction code in addIndexes* is // executed. if (merge.isExternal) throw t; } else if (t is System.IO.IOException || t is System.SystemException || t is System.ApplicationException) { throw t; } else { // Should not get here System.Diagnostics.Debug.Fail("Exception is not expected type!"); throw new System.SystemException(null, t); } } public void Merge_ForNUnit(MergePolicy.OneMerge merge) { Merge(merge); } /// Merges the indicated segments, replacing them in the stack with a /// single segment. /// internal void Merge(MergePolicy.OneMerge merge) { bool success = false; try { try { try { MergeInit(merge); if (infoStream != null) { Message("now merge\n merge=" + merge.SegString(directory) + "\n merge=" + merge + "\n index=" + SegString()); } MergeMiddle(merge); MergeSuccess(merge); success = true; } catch (System.Exception t) { HandleMergeException(t, merge); } } finally { lock (this) { MergeFinish(merge); if (!success) { if (infoStream != null) Message("hit exception during merge"); if (merge.info != null && !segmentInfos.Contains(merge.info)) deleter.Refresh(merge.info.name); } // This merge (and, generally, any change to the // segments) may now enable new merges, so we call // merge policy & update pending merges. if (success && !merge.IsAborted() && !closed && !closing) UpdatePendingMerges(merge.maxNumSegmentsOptimize, merge.optimize); } } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "merge"); } } /// Hook that's called when the specified merge is complete. internal virtual void MergeSuccess(MergePolicy.OneMerge merge) { } /// Checks whether this merge involves any segments /// already participating in a merge. If not, this merge /// is "registered", meaning we record that its segments /// are now participating in a merge, and true is /// returned. Else (the merge conflicts) false is /// returned. /// internal bool RegisterMerge(MergePolicy.OneMerge merge) { lock (this) { if (merge.registerDone) return true; if (stopMerges) { merge.Abort(); throw new MergePolicy.MergeAbortedException("merge is aborted: " + merge.SegString(directory)); } int count = merge.segments.Count; bool isExternal = false; for (int i = 0; i < count; i++) { SegmentInfo info = merge.segments.Info(i); if (mergingSegments.Contains(info)) { return false; } if (segmentInfos.IndexOf(info) == -1) { return false; } if (info.dir != directory) { isExternal = true; } if (segmentsToOptimize.Contains(info)) { merge.optimize = true; merge.maxNumSegmentsOptimize = optimizeMaxNumSegments; } } EnsureContiguousMerge(merge); pendingMerges.AddLast(merge); if (infoStream != null) Message("add merge to pendingMerges: " + merge.SegString(directory) + " [total " + pendingMerges.Count + " pending]"); merge.mergeGen = mergeGen; merge.isExternal = isExternal; // OK it does not conflict; now record that this merge // is running (while synchronized) to avoid race // condition where two conflicting merges from different // threads, start for (int i = 0; i < count; i++) { SegmentInfo si = merge.segments.Info(i); mergingSegments.Add(si); } // Merge is now registered merge.registerDone = true; return true; } } /// Does initial setup for a merge, which is fast but holds /// the synchronized lock on IndexWriter instance. /// internal void MergeInit(MergePolicy.OneMerge merge) { lock (this) { bool success = false; try { _MergeInit(merge); success = true; } finally { if (!success) { MergeFinish(merge); } } } } private void _MergeInit(MergePolicy.OneMerge merge) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startMergeInit")); System.Diagnostics.Debug.Assert(merge.registerDone); System.Diagnostics.Debug.Assert(!merge.optimize || merge.maxNumSegmentsOptimize > 0); if (hitOOM) { throw new System.SystemException("this writer hit an OutOfMemoryError; cannot merge"); } if (merge.info != null) // mergeInit already done return ; if (merge.IsAborted()) return ; ApplyDeletes(); SegmentInfos sourceSegments = merge.segments; int end = sourceSegments.Count; // Check whether this merge will allow us to skip // merging the doc stores (stored field & vectors). // This is a very substantial optimization (saves tons // of IO). Directory lastDir = directory; System.String lastDocStoreSegment = null; int next = - 1; bool mergeDocStores = false; bool doFlushDocStore = false; System.String currentDocStoreSegment = docWriter.DocStoreSegment; // Test each segment to be merged: check if we need to // flush/merge doc stores for (int i = 0; i < end; i++) { SegmentInfo si = sourceSegments.Info(i); // If it has deletions we must merge the doc stores if (si.HasDeletions()) mergeDocStores = true; // If it has its own (private) doc stores we must // merge the doc stores if (- 1 == si.DocStoreOffset) mergeDocStores = true; // If it has a different doc store segment than // previous segments, we must merge the doc stores System.String docStoreSegment = si.DocStoreSegment; if (docStoreSegment == null) mergeDocStores = true; else if (lastDocStoreSegment == null) lastDocStoreSegment = docStoreSegment; else if (!lastDocStoreSegment.Equals(docStoreSegment)) mergeDocStores = true; // Segments' docScoreOffsets must be in-order, // contiguous. For the default merge policy now // this will always be the case but for an arbitrary // merge policy this may not be the case if (- 1 == next) next = si.DocStoreOffset + si.docCount; else if (next != si.DocStoreOffset) mergeDocStores = true; else next = si.DocStoreOffset + si.docCount; // If the segment comes from a different directory // we must merge if (lastDir != si.dir) mergeDocStores = true; // If the segment is referencing the current "live" // doc store outputs then we must merge if (si.DocStoreOffset != - 1 && currentDocStoreSegment != null && si.DocStoreSegment.Equals(currentDocStoreSegment)) { doFlushDocStore = true; } } // if a mergedSegmentWarmer is installed, we must merge // the doc stores because we will open a full // SegmentReader on the merged segment: if (!mergeDocStores && mergedSegmentWarmer != null && currentDocStoreSegment != null && lastDocStoreSegment != null && lastDocStoreSegment.Equals(currentDocStoreSegment)) { mergeDocStores = true; } int docStoreOffset; System.String docStoreSegment2; bool docStoreIsCompoundFile; if (mergeDocStores) { docStoreOffset = - 1; docStoreSegment2 = null; docStoreIsCompoundFile = false; } else { SegmentInfo si = sourceSegments.Info(0); docStoreOffset = si.DocStoreOffset; docStoreSegment2 = si.DocStoreSegment; docStoreIsCompoundFile = si.DocStoreIsCompoundFile; } if (mergeDocStores && doFlushDocStore) { // SegmentMerger intends to merge the doc stores // (stored fields, vectors), and at least one of the // segments to be merged refers to the currently // live doc stores. // TODO: if we know we are about to merge away these // newly flushed doc store files then we should not // make compound file out of them... if (infoStream != null) Message("now flush at merge"); DoFlush(true, false); } merge.mergeDocStores = mergeDocStores; // Bind a new segment name here so even with // ConcurrentMergePolicy we keep deterministic segment // names. merge.info = new SegmentInfo(NewSegmentName(), 0, directory, false, true, docStoreOffset, docStoreSegment2, docStoreIsCompoundFile, false); IDictionary details = new Dictionary(); details["optimize"] = merge.optimize + ""; details["mergeFactor"] = end + ""; details["mergeDocStores"] = mergeDocStores + ""; SetDiagnostics(merge.info, "merge", details); // Also enroll the merged segment into mergingSegments; // this prevents it from getting selected for a merge // after our merge is done but while we are building the // CFS: mergingSegments.Add(merge.info); } } private void SetDiagnostics(SegmentInfo info, System.String source) { SetDiagnostics(info, source, null); } private void SetDiagnostics(SegmentInfo info, System.String source, IDictionary details) { IDictionary diagnostics = new Dictionary(); diagnostics["source"] = source; diagnostics["lucene.version"] = Constants.LUCENE_VERSION; diagnostics["os"] = Constants.OS_NAME + ""; diagnostics["os.arch"] = Constants.OS_ARCH + ""; diagnostics["os.version"] = Constants.OS_VERSION + ""; diagnostics["java.version"] = Constants.JAVA_VERSION + ""; diagnostics["java.vendor"] = Constants.JAVA_VENDOR + ""; if (details != null) { //System.Collections.ArrayList keys = new System.Collections.ArrayList(details.Keys); //System.Collections.ArrayList values = new System.Collections.ArrayList(details.Values); foreach (string key in details.Keys) { diagnostics[key] = details[key]; } } info.Diagnostics = diagnostics; } /// Does fininishing for a merge, which is fast but holds /// the synchronized lock on IndexWriter instance. /// internal void MergeFinish(MergePolicy.OneMerge merge) { lock (this) { // Optimize, addIndexes or finishMerges may be waiting // on merges to finish. System.Threading.Monitor.PulseAll(this); // It's possible we are called twice, eg if there was an // exception inside mergeInit if (merge.registerDone) { SegmentInfos sourceSegments = merge.segments; int end = sourceSegments.Count; for (int i = 0; i < end; i++) mergingSegments.Remove(sourceSegments.Info(i)); if(merge.info != null) mergingSegments.Remove(merge.info); merge.registerDone = false; } runningMerges.Remove(merge); } } private void SetMergeDocStoreIsCompoundFile(MergePolicy.OneMerge merge) { lock (this) { string mergeDocStoreSegment = merge.info.DocStoreSegment; if (mergeDocStoreSegment != null && !merge.info.DocStoreIsCompoundFile) { int size = segmentInfos.Count; for (int i = 0; i < size; i++) { SegmentInfo info = segmentInfos.Info(i); string docStoreSegment = info.DocStoreSegment; if (docStoreSegment != null && docStoreSegment.Equals(mergeDocStoreSegment) && info.DocStoreIsCompoundFile) { merge.info.DocStoreIsCompoundFile = true; break; } } } } } private void CloseMergeReaders(MergePolicy.OneMerge merge, bool suppressExceptions) { lock (this) { int numSegments = merge.segments.Count; if (suppressExceptions) { // Suppress any new exceptions so we throw the // original cause for (int i = 0; i < numSegments; i++) { if (merge.readers[i] != null) { try { readerPool.Release(merge.readers[i], false); } catch (Exception) { } merge.readers[i] = null; } if (merge.readersClone[i] != null) { try { merge.readersClone[i].Close(); } catch (Exception) { } // This was a private clone and we had the // only reference System.Diagnostics.Debug.Assert(merge.readersClone[i].RefCount == 0); //: "refCount should be 0 but is " + merge.readersClone[i].getRefCount(); merge.readersClone[i] = null; } } } else { for (int i = 0; i < numSegments; i++) { if (merge.readers[i] != null) { readerPool.Release(merge.readers[i], true); merge.readers[i] = null; } if (merge.readersClone[i] != null) { merge.readersClone[i].Close(); // This was a private clone and we had the only reference System.Diagnostics.Debug.Assert(merge.readersClone[i].RefCount == 0); merge.readersClone[i] = null; } } } } } /// Does the actual (time-consuming) work of the merge, /// but without holding synchronized lock on IndexWriter /// instance /// private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); System.String mergedName = merge.info.name; SegmentMerger merger = null; int mergedDocCount = 0; SegmentInfos sourceSegments = merge.segments; int numSegments = sourceSegments.Count; if (infoStream != null) Message("merging " + merge.SegString(directory)); merger = new SegmentMerger(this, mergedName, merge); merge.readers = new SegmentReader[numSegments]; merge.readersClone = new SegmentReader[numSegments]; bool mergeDocStores = false; String currentDocStoreSegment; lock(this) { currentDocStoreSegment = docWriter.DocStoreSegment; } bool currentDSSMerged = false; // This is try/finally to make sure merger's readers are // closed: bool success = false; try { int totDocCount = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo info = sourceSegments.Info(i); // Hold onto the "live" reader; we will use this to // commit merged deletes SegmentReader reader = merge.readers[i] = readerPool.Get(info, merge.mergeDocStores, MERGE_READ_BUFFER_SIZE, -1); // We clone the segment readers because other // deletes may come in while we're merging so we // need readers that will not change SegmentReader clone = merge.readersClone[i] = (SegmentReader)reader.Clone(true); merger.Add(clone); if (clone.HasDeletions) { mergeDocStores = true; } if (info.DocStoreOffset != -1 && currentDocStoreSegment != null) { currentDSSMerged |= currentDocStoreSegment.Equals(info.DocStoreSegment); } totDocCount += clone.NumDocs(); } if (infoStream != null) { Message("merge: total " + totDocCount + " docs"); } merge.CheckAborted(directory); // If deletions have arrived and it has now become // necessary to merge doc stores, go and open them: if (mergeDocStores && !merge.mergeDocStores) { merge.mergeDocStores = true; lock (this) { if (currentDSSMerged) { if (infoStream != null) { Message("now flush at mergeMiddle"); } DoFlush(true, false); } } for (int i = 0; i < numSegments; i++) { merge.readersClone[i].OpenDocStores(); } // Clear DSS merge.info.SetDocStore(-1, null, false); } // This is where all the work happens: mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores); System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount); if (merge.useCompoundFile) { success = false; string compoundFileName = IndexFileNames.SegmentFileName(mergedName, IndexFileNames.COMPOUND_FILE_EXTENSION); try { if (infoStream != null) { Message("create compound file " + compoundFileName); } merger.CreateCompoundFile(compoundFileName); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (merge.IsAborted()) { // This can happen if rollback or close(false) // is called -- fall through to logic below to // remove the partially created CFS: } else { HandleMergeException(ioe, merge); } } } catch (Exception t) { HandleMergeException(t, merge); } finally { if (!success) { if (infoStream != null) { Message("hit exception creating compound file during merge"); } lock (this) { deleter.DeleteFile(compoundFileName); deleter.DeleteNewFiles(merger.GetMergedFiles()); } } } success = false; lock (this) { // delete new non cfs files directly: they were never // registered with IFD deleter.DeleteNewFiles(merger.GetMergedFiles()); if (merge.IsAborted()) { if (infoStream != null) { Message("abort merge after building CFS"); } deleter.DeleteFile(compoundFileName); return 0; } } merge.info.SetUseCompoundFile(true); } int termsIndexDivisor; bool loadDocStores; // if the merged segment warmer was not installed when // this merge was started, causing us to not force // the docStores to close, we can't warm it now bool canWarm = merge.info.DocStoreSegment == null || currentDocStoreSegment == null || !merge.info.DocStoreSegment.Equals(currentDocStoreSegment); if (poolReaders && mergedSegmentWarmer != null && canWarm) { // Load terms index & doc stores so the segment // warmer can run searches, load documents/term // vectors termsIndexDivisor = readerTermsIndexDivisor; loadDocStores = true; } else { termsIndexDivisor = -1; loadDocStores = false; } // TODO: in the non-realtime case, we may want to only // keep deletes (it's costly to open entire reader // when we just need deletes) SegmentReader mergedReader = readerPool.Get(merge.info, loadDocStores, BufferedIndexInput.BUFFER_SIZE, termsIndexDivisor); try { if (poolReaders && mergedSegmentWarmer != null) { mergedSegmentWarmer.Warm(mergedReader); } if (!CommitMerge(merge, merger, mergedDocCount, mergedReader)) { // commitMerge will return false if this merge was aborted return 0; } } finally { lock (this) { readerPool.Release(mergedReader); } } success = true; } finally { // Readers are already closed in commitMerge if we didn't hit // an exc: if (!success) { CloseMergeReaders(merge, true); } } return mergedDocCount; } internal virtual void AddMergeException(MergePolicy.OneMerge merge) { lock (this) { System.Diagnostics.Debug.Assert(merge.GetException() != null); if (!mergeExceptions.Contains(merge) && mergeGen == merge.mergeGen) mergeExceptions.Add(merge); } } // Apply buffered deletes to all segments. private bool ApplyDeletes() { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startApplyDeletes")); flushDeletesCount++; bool success = false; bool changed; try { changed = docWriter.ApplyDeletes(segmentInfos); success = true; } finally { if (!success && infoStream != null) { Message("hit exception flushing deletes"); } } if (changed) Checkpoint(); return changed; } } // For test purposes. internal int GetBufferedDeleteTermsSize() { lock (this) { return docWriter.GetBufferedDeleteTerms().Count; } } // For test purposes. internal int GetNumBufferedDeleteTerms() { lock (this) { return docWriter.GetNumBufferedDeleteTerms(); } } // utility routines for tests public /*internal*/ virtual SegmentInfo NewestSegment() { return segmentInfos.Count > 0 ? segmentInfos.Info(segmentInfos.Count - 1) : null; } public virtual System.String SegString() { lock (this) { return SegString(segmentInfos); } } private System.String SegString(SegmentInfos infos) { lock (this) { System.Text.StringBuilder buffer = new System.Text.StringBuilder(); int count = infos.Count; for (int i = 0; i < count; i++) { if (i > 0) { buffer.Append(' '); } SegmentInfo info = infos.Info(i); buffer.Append(info.SegString(directory)); if (info.dir != directory) buffer.Append("**"); } return buffer.ToString(); } } // Files that have been sync'd already private HashSet synced = new HashSet(); // Files that are now being sync'd private HashSet syncing = new HashSet(); private bool StartSync(System.String fileName, ICollection pending) { lock (synced) { if (!synced.Contains(fileName)) { if (!syncing.Contains(fileName)) { syncing.Add(fileName); return true; } else { pending.Add(fileName); return false; } } else return false; } } private void FinishSync(System.String fileName, bool success) { lock (synced) { System.Diagnostics.Debug.Assert(syncing.Contains(fileName)); syncing.Remove(fileName); if (success) synced.Add(fileName); System.Threading.Monitor.PulseAll(synced); } } /// Blocks until all files in syncing are sync'd private bool WaitForAllSynced(ICollection syncing) { lock (synced) { IEnumerator it = syncing.GetEnumerator(); while (it.MoveNext()) { System.String fileName = it.Current; while (!synced.Contains(fileName)) { if (!syncing.Contains(fileName)) // There was an error because a file that was // previously syncing failed to appear in synced return false; else System.Threading.Monitor.Wait(synced); } } return true; } } private void DoWait() { lock (this) { // NOTE: the callers of this method should in theory // be able to do simply wait(), but, as a defense // against thread timing hazards where notifyAll() // falls to be called, we wait for at most 1 second // and then return so caller can check if wait // conditions are satisified: System.Threading.Monitor.Wait(this, TimeSpan.FromMilliseconds(1000)); } } /// Walk through all files referenced by the current /// segmentInfos and ask the Directory to sync each file, /// if it wasn't already. If that succeeds, then we /// prepare a new segments_N file but do not fully commit /// it. /// private void StartCommit(long sizeInBytes, IDictionary commitUserData) { System.Diagnostics.Debug.Assert(TestPoint("startStartCommit")); // TODO: as of LUCENE-2095, we can simplify this method, // since only 1 thread can be in here at once if (hitOOM) { throw new System.SystemException("this writer hit an OutOfMemoryError; cannot commit"); } try { if (infoStream != null) Message("startCommit(): start sizeInBytes=" + sizeInBytes); SegmentInfos toSync = null; long myChangeCount; lock (this) { // Wait for any running addIndexes to complete // first, then block any from running until we've // copied the segmentInfos we intend to sync: BlockAddIndexes(false); // On commit the segmentInfos must never // reference a segment in another directory: System.Diagnostics.Debug.Assert(!HasExternalSegments()); try { System.Diagnostics.Debug.Assert(lastCommitChangeCount <= changeCount); myChangeCount = changeCount; if (changeCount == lastCommitChangeCount) { if (infoStream != null) Message(" skip startCommit(): no changes pending"); return ; } // First, we clone & incref the segmentInfos we intend // to sync, then, without locking, we sync() each file // referenced by toSync, in the background. Multiple // threads can be doing this at once, if say a large // merge and a small merge finish at the same time: if (infoStream != null) Message("startCommit index=" + SegString(segmentInfos) + " changeCount=" + changeCount); readerPool.Commit(); // It's possible another flush (that did not close // the open do stores) snuck in after the flush we // just did, so we remove any tail segments // referencing the open doc store from the // SegmentInfos we are about to sync (the main // SegmentInfos will keep them): toSync = (SegmentInfos) segmentInfos.Clone(); string dss = docWriter.DocStoreSegment; if (dss != null) { while (true) { String dss2 = toSync.Info(toSync.Count - 1).DocStoreSegment; if (dss2 == null || !dss2.Equals(dss)) { break; } toSync.RemoveAt(toSync.Count - 1); changeCount++; } } if (commitUserData != null) toSync.UserData = commitUserData; deleter.IncRef(toSync, false); ICollection files = toSync.Files(directory, false); foreach(string fileName in files) { System.Diagnostics.Debug.Assert(directory.FileExists(fileName), "file " + fileName + " does not exist"); // If this trips it means we are missing a call to // .checkpoint somewhere, because by the time we // are called, deleter should know about every // file referenced by the current head // segmentInfos: System.Diagnostics.Debug.Assert(deleter.Exists(fileName)); } } finally { ResumeAddIndexes(); } } System.Diagnostics.Debug.Assert(TestPoint("midStartCommit")); bool setPending = false; try { // Loop until all files toSync references are sync'd: while (true) { ICollection pending = new List(); IEnumerator it = toSync.Files(directory, false).GetEnumerator(); while (it.MoveNext()) { string fileName = it.Current; if (StartSync(fileName, pending)) { bool success = false; try { // Because we incRef'd this commit point, above, // the file had better exist: System.Diagnostics.Debug.Assert(directory.FileExists(fileName), "file '" + fileName + "' does not exist dir=" + directory); if (infoStream != null) Message("now sync " + fileName); directory.Sync(fileName); success = true; } finally { FinishSync(fileName, success); } } } // All files that I require are either synced or being // synced by other threads. If they are being synced, // we must at this point block until they are done. // If this returns false, that means an error in // another thread resulted in failing to actually // sync one of our files, so we repeat: if (WaitForAllSynced(pending)) break; } System.Diagnostics.Debug.Assert(TestPoint("midStartCommit2")); lock (this) { // If someone saved a newer version of segments file // since I first started syncing my version, I can // safely skip saving myself since I've been // superseded: while (true) { if (myChangeCount <= lastCommitChangeCount) { if (infoStream != null) { Message("sync superseded by newer infos"); } break; } else if (pendingCommit == null) { // My turn to commit if (segmentInfos.Generation > toSync.Generation) toSync.UpdateGeneration(segmentInfos); bool success = false; try { // Exception here means nothing is prepared // (this method unwinds everything it did on // an exception) try { toSync.PrepareCommit(directory); } finally { // Have our master segmentInfos record the // generations we just prepared. We do this // on error or success so we don't // double-write a segments_N file. segmentInfos.UpdateGeneration(toSync); } System.Diagnostics.Debug.Assert(pendingCommit == null); setPending = true; pendingCommit = toSync; pendingCommitChangeCount = (uint) myChangeCount; success = true; } finally { if (!success && infoStream != null) Message("hit exception committing segments file"); } break; } else { // Must wait for other commit to complete DoWait(); } } } if (infoStream != null) Message("done all syncs"); System.Diagnostics.Debug.Assert(TestPoint("midStartCommitSuccess")); } finally { lock (this) { if (!setPending) deleter.DecRef(toSync); } } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "startCommit"); } System.Diagnostics.Debug.Assert(TestPoint("finishStartCommit")); } /// Returns true iff the index in the named directory is /// currently locked. /// /// the directory to check for a lock /// /// IOException if there is a low-level IO error public static bool IsLocked(Directory directory) { return directory.MakeLock(WRITE_LOCK_NAME).IsLocked(); } /// Forcibly unlocks the index in the named directory. ///

/// Caution: this should only be used by failure recovery code, /// when it is known that no other process nor thread is in fact /// currently accessing this index. ///

public static void Unlock(Directory directory) { directory.MakeLock(IndexWriter.WRITE_LOCK_NAME).Release(); } /// Specifies maximum field length (in number of tokens/terms) in constructors. /// overrides the value set by /// the constructor. /// public sealed class MaxFieldLength { private int limit; private System.String name; /// Private type-safe-enum-pattern constructor. /// /// /// instance name /// /// maximum field length /// internal MaxFieldLength(System.String name, int limit) { this.name = name; this.limit = limit; } /// Public constructor to allow users to specify the maximum field size limit. /// /// /// The maximum field length /// public MaxFieldLength(int limit):this("User-specified", limit) { } public int Limit { get { return limit; } } public override System.String ToString() { return name + ":" + limit; } /// Sets the maximum field length to . public static readonly MaxFieldLength UNLIMITED = new MaxFieldLength("UNLIMITED", System.Int32.MaxValue); /// Sets the maximum field length to /// /// /// public static readonly MaxFieldLength LIMITED; static MaxFieldLength() { LIMITED = new MaxFieldLength("LIMITED", Lucene.Net.Index.IndexWriter.DEFAULT_MAX_FIELD_LENGTH); } } /// If has been called (ie, this writer /// is in near real-time mode), then after a merge /// completes, this class can be invoked to warm the /// reader on the newly merged segment, before the merge /// commits. This is not required for near real-time /// search, but will reduce search latency on opening a /// new near real-time reader after a merge completes. /// ///

NOTE: This API is experimental and might /// change in incompatible ways in the next release.

/// ///

NOTE: warm is called before any deletes have /// been carried over to the merged segment. ///

public abstract class IndexReaderWarmer { public abstract void Warm(IndexReader reader); } private IndexReaderWarmer mergedSegmentWarmer; /// Gets or sets the merged segment warmer. See ///. /// public virtual IndexReaderWarmer MergedSegmentWarmer { set { mergedSegmentWarmer = value; } get { return mergedSegmentWarmer; } } private void HandleOOM(System.OutOfMemoryException oom, System.String location) { if (infoStream != null) { Message("hit OutOfMemoryError inside " + location); } hitOOM = true; throw oom; } // Used only by assert for testing. Current points: // startDoFlush // startCommitMerge // startStartCommit // midStartCommit // midStartCommit2 // midStartCommitSuccess // finishStartCommit // startCommitMergeDeletes // startMergeInit // startApplyDeletes // DocumentsWriter.ThreadState.init start public /*internal*/ virtual bool TestPoint(System.String name) { return true; } internal virtual bool NrtIsCurrent(SegmentInfos infos) { lock (this) { if (!infos.Equals(segmentInfos)) { // if any structural changes (new segments), we are // stale return false; } else if (infos.Generation != segmentInfos.Generation) { // if any commit took place since we were opened, we // are stale return false; } else { return !docWriter.AnyChanges; } } } internal virtual bool IsClosed() { lock (this) { return closed; } } static IndexWriter() { MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH; } } }