/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using Analyzer = Lucene.Net.Analysis.Analyzer; using Document = Lucene.Net.Documents.Document; using Similarity = Lucene.Net.Search.Similarity; using Query = Lucene.Net.Search.Query; using Directory = Lucene.Net.Store.Directory; using FSDirectory = Lucene.Net.Store.FSDirectory; using Lock = Lucene.Net.Store.Lock; using LockObtainFailedException = Lucene.Net.Store.LockObtainFailedException; using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException; using BitVector = Lucene.Net.Util.BitVector; using Constants = Lucene.Net.Util.Constants; using System.Collections.Generic; namespace Lucene.Net.Index { /// /// An IndexWriter creates and maintains an index. ///

The create argument to the /// constructor /// determines whether a new index is created, or whether an existing index is /// opened. Note that you /// can open an index with create=true even while readers are /// using the index. The old readers will continue to search /// the "point in time" snapshot they had opened, and won't /// see the newly created index until they re-open. There are /// also constructors /// with no create argument which /// will create a new index if there is not already an index at the /// provided path and otherwise open the existing index.

/// ///

In either case, documents are added with addDocument /// and removed with deleteDocuments(Term) /// or deleteDocuments(Query). /// A document can be updated with updateDocument /// (which just deletes and then adds the entire document). /// When finished adding, deleting and updating documents, close should be called.

/// /// ///

These changes are buffered in memory and periodically /// flushed to the {@link Directory} (during the above method /// calls). A flush is triggered when there are enough /// buffered deletes (see {@link #setMaxBufferedDeleteTerms}) /// or enough added documents since the last flush, whichever /// is sooner. For the added documents, flushing is triggered /// either by RAM usage of the documents (see {@link /// #setRAMBufferSizeMB}) or the number of added documents. /// The default is to flush when RAM usage hits 16 MB. For /// best indexing speed you should flush by RAM usage with a /// large RAM buffer. Note that flushing just moves the /// internal buffered state in IndexWriter into the index, but /// these changes are not visible to IndexReader until either /// {@link #Commit()} or {@link #close} is called. A flush may /// also trigger one or more segment merges which by default /// run with a background thread so as not to block the /// addDocument calls (see below /// for changing the {@link MergeScheduler}).

/// /// ///

The optional autoCommit argument to the constructors /// controls visibility of the changes to {@link IndexReader} /// instances reading the same index. When this is /// false, changes are not visible until {@link /// #Close()} or {@link #Commit()} is called. Note that changes will still be /// flushed to the {@link org.apache.lucene.store.Directory} /// as new files, but are not committed (no new /// segments_N file is written referencing the /// new files, nor are the files sync'd to stable storage) /// until {@link #Close()} or {@link #Commit()} is called. If something /// goes terribly wrong (for example the JVM crashes), then /// the index will reflect none of the changes made since the /// last commit, or the starting state if commit was not called. /// You can also call {@link #rollback}, which closes the writer /// without committing any changes, and removes any index /// files that had been flushed but are now unreferenced. /// This mode is useful for preventing readers from refreshing /// at a bad time (for example after you've done all your /// deletes but before you've done your adds). It can also be /// used to implement simple single-writer transactional /// semantics ("all or none"). You can do a two-phase commit /// by calling {@link #PrepareCommit()} /// followed by {@link #Commit()}. This is necessary when /// Lucene is working with an external resource (for example, /// a database) and both must either commit or rollback the /// transaction.

/// ///

When autoCommit is true then /// the writer will periodically commit on its own. [Deprecated: Note that in 3.0, IndexWriter will /// no longer accept autoCommit=true (it will be hardwired to /// false). You can always call {@link #Commit()} yourself /// when needed]. There is /// no guarantee when exactly an auto commit will occur (it /// used to be after every flush, but it is now after every /// completed merge, as of 2.4). If you want to force a /// commit, call {@link #Commit()}, or, close the writer. Once /// a commit has finished, newly opened {@link IndexReader} instances will /// see the changes to the index as of that commit. When /// running in this mode, be careful not to refresh your /// readers while optimize or segment merges are taking place /// as this can tie up substantial disk space.

/// ///

Regardless of autoCommit, an {@link /// IndexReader} or {@link org.apache.lucene.search.IndexSearcher} will only see the /// index as of the "point in time" that it was opened. Any /// changes committed to the index after the reader was opened /// are not visible until the reader is re-opened.

/// ///

If an index will not have more documents added for a while and optimal search /// performance is desired, then either the full optimize /// method or partial {@link #Optimize(int)} method should be /// called before the index is closed.

/// ///

Opening an IndexWriter creates a lock file for the directory in use. Trying to open /// another IndexWriter on the same directory will lead to a /// {@link LockObtainFailedException}. The {@link LockObtainFailedException} /// is also thrown if an IndexReader on the same directory is used to delete documents /// from the index.

/// /// ///

Expert: IndexWriter allows an optional /// {@link IndexDeletionPolicy} implementation to be /// specified. You can use this to control when prior commits /// are deleted from the index. The default policy is {@link /// KeepOnlyLastCommitDeletionPolicy} which removes all prior /// commits as soon as a new commit is done (this matches /// behavior before 2.2). Creating your own policy can allow /// you to explicitly keep previous "point in time" commits /// alive in the index for some time, to allow readers to /// refresh to the new commit without having the old commit /// deleted out from under them. This is necessary on /// filesystems like NFS that do not support "delete on last /// close" semantics, which Lucene's "point in time" search /// normally relies on.

/// ///

Expert: /// IndexWriter allows you to separately change /// the {@link MergePolicy} and the {@link MergeScheduler}. /// The {@link MergePolicy} is invoked whenever there are /// changes to the segments in the index. Its role is to /// select which merges to do, if any, and return a {@link /// MergePolicy.MergeSpecification} describing the merges. It /// also selects merges to do for Optimize(). (The default is /// {@link LogByteSizeMergePolicy}. Then, the {@link /// MergeScheduler} is invoked with the requested merges and /// it decides when and how to run the merges. The default is /// {@link ConcurrentMergeScheduler}.

///
/* * Clarification: Check Points (and commits) * Being able to set autoCommit=false allows IndexWriter to flush and * write new index files to the directory without writing a new segments_N * file which references these new files. It also means that the state of * the in memory SegmentInfos object is different than the most recent * segments_N file written to the directory. * * Each time the SegmentInfos is changed, and matches the (possibly * modified) directory files, we have a new "check point". * If the modified/new SegmentInfos is written to disk - as a new * (generation of) segments_N file - this check point is also an * IndexCommit. * * With autoCommit=true, every checkPoint is also a CommitPoint. * With autoCommit=false, some checkPoints may not be commits. * * A new checkpoint always replaces the previous checkpoint and * becomes the new "front" of the index. This allows the IndexFileDeleter * to delete files that are referenced only by stale checkpoints. * (files that were created since the last commit, but are no longer * referenced by the "front" of the index). For this, IndexFileDeleter * keeps track of the last non commit checkpoint. */ public class IndexWriter { private void InitBlock() { similarity = Similarity.GetDefault(); } /// Default value for the write lock timeout (1,000). /// /// public static long WRITE_LOCK_TIMEOUT = 1000; private long writeLockTimeout = WRITE_LOCK_TIMEOUT; /// Name of the write lock in the index. public const string WRITE_LOCK_NAME = "write.lock"; /// /// /// /// public static readonly int DEFAULT_MERGE_FACTOR = LogMergePolicy.DEFAULT_MERGE_FACTOR; /// Value to denote a flush trigger is disabled public const int DISABLE_AUTO_FLUSH = -1; /// Disabled by default (because IndexWriter flushes by RAM usage /// by default). Change using {@link #SetMaxBufferedDocs(int)}. /// public static readonly int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH; /// Default value is 16 MB (which means flush when buffered /// docs consume 16 MB RAM). Change using {@link #setRAMBufferSizeMB}. /// public const double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0; /// Disabled by default (because IndexWriter flushes by RAM usage /// by default). Change using {@link #SetMaxBufferedDeleteTerms(int)}. /// public static readonly int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH; /// /// /// /// public static readonly int DEFAULT_MAX_MERGE_DOCS; /// Default value is 10,000. Change using {@link #SetMaxFieldLength(int)}. public const int DEFAULT_MAX_FIELD_LENGTH = 10000; /// Default value is 128. Change using {@link #SetTermIndexInterval(int)}. public const int DEFAULT_TERM_INDEX_INTERVAL = 128; /// Absolute hard maximum length for a term. If a term /// arrives from the analyzer longer than this length, it /// is skipped and a message is printed to infoStream, if /// set (see {@link #setInfoStream}). /// public static readonly int MAX_TERM_LENGTH; /// /// Default for {@link #getMaxSyncPauseSeconds}. On /// Windows this defaults to 10.0 seconds; elsewhere it's /// 0. /// public static readonly double DEFAULT_MAX_SYNC_PAUSE_SECONDS; // The normal read buffer size defaults to 1024, but // increasing this during merging seems to yield // performance gains. However we don't want to increase // it too much because there are quite a few // BufferedIndexInputs created during merging. See // LUCENE-888 for details. private const int MERGE_READ_BUFFER_SIZE = 4096; // Used for printing messages private static object MESSAGE_ID_LOCK = new object(); private static int MESSAGE_ID = 0; private int messageID = -1; volatile private bool hitOOM; private Directory directory; // where this index resides private Analyzer analyzer; // how to analyze text private Similarity similarity; // how to normalize // {{dougsale-2.4.0}}: // per the VS compiler: "a volatile field cannot be of type 'long'" // use uint, same positive cardinality as Java long private volatile uint changeCount; // increments every time a change is completed private long lastCommitChangeCount; // last changeCount that was committed private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails private Dictionary rollbackSegments; internal volatile SegmentInfos pendingCommit; // set when a commit is pending (after PrepareCommit() & before Commit()) // {{dougsale-2.4.0}}: // per the VS compiler: "a volatile field cannot be of type 'long'" // use uint, same positive cardinality as Java long internal volatile uint pendingCommitChangeCount; private SegmentInfos localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails private bool localAutoCommit; // saved autoCommit during local transaction private int localFlushedDocCount; // saved docWriter.GetFlushedDocCount during local transaction private bool autoCommit = true; // false if we should commit only on close private SegmentInfos segmentInfos = new SegmentInfos(); // the segments private DocumentsWriter docWriter; private IndexFileDeleter deleter; private Dictionary segmentsToOptimize = new Dictionary(); // used by optimize to note those needing optimization private Lock writeLock; private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; private bool closeDir; private bool closed; private bool closing; // Holds all SegmentInfo instances currently involved in // merges private Dictionary mergingSegments = new Dictionary(); private MergePolicy mergePolicy = new LogByteSizeMergePolicy(); private MergeScheduler mergeScheduler = new ConcurrentMergeScheduler(); private List pendingMerges = new List(); private Dictionary runningMerges = new Dictionary(); private System.Collections.IList mergeExceptions = new System.Collections.ArrayList(); private long mergeGen; private bool stopMerges; private int flushCount; private int flushDeletesCount; private double maxSyncPauseSeconds = DEFAULT_MAX_SYNC_PAUSE_SECONDS; // Used to only allow one AddIndexes to proceed at once // TODO: use ReadWriteLock once we are on 5.0 private int readCount; // count of how many threads are holding read lock //private SupportClass.ThreadClass writeThread; // non-null if any thread holds write lock private System.Threading.Thread writeThread; // non-null if any thread holds write lock internal void AcquireWrite() { lock (this) { while (writeThread != null || readCount > 0) DoWait(); // We could have been closed while we were waiting: EnsureOpen(); //writeThread = SupportClass.ThreadClass.Current(); writeThread = System.Threading.Thread.CurrentThread; } } internal void ReleaseWrite() { lock (this) { //System.Diagnostics.Debug.Assert(SupportClass.ThreadClass.Current() == writeThread); System.Diagnostics.Debug.Assert(System.Threading.Thread.CurrentThread == writeThread); writeThread = null; System.Threading.Monitor.PulseAll(this); } } internal void AcquireRead() { lock (this) { //SupportClass.ThreadClass current = SupportClass.ThreadClass.Current(); System.Threading.Thread current = System.Threading.Thread.CurrentThread; while (writeThread != null && writeThread != current) DoWait(); readCount++; } } internal void ReleaseRead() { lock (this) { readCount--; System.Diagnostics.Debug.Assert(readCount >= 0); if (0 == readCount) System.Threading.Monitor.PulseAll(this); } } /// Used internally to throw an {@link /// AlreadyClosedException} if this IndexWriter has been /// closed. /// /// AlreadyClosedException if this IndexWriter is protected internal void EnsureOpen(bool includePendingClose) { lock (this) { if (closed || (includePendingClose && closing)) { throw new AlreadyClosedException("this IndexWriter is closed"); } } } protected void EnsureOpen() { lock (this) { EnsureOpen(true); } } /// Prints a message to the infoStream (if non-null), /// prefixed with the identifying information for this /// writer and the thread that's calling it. /// public virtual void Message(string message) { if (infoStream != null) infoStream.WriteLine("IW " + messageID + " [" + SupportClass.ThreadClass.Current().Name + "]: " + message); } private void SetMessageID(System.IO.TextWriter infoStream) { lock (this) { if (infoStream != null && messageID == -1) { lock (MESSAGE_ID_LOCK) { messageID = MESSAGE_ID++; } } this.infoStream = infoStream; } } /// Casts current mergePolicy to LogMergePolicy, and throws /// an exception if the mergePolicy is not a LogMergePolicy. /// private LogMergePolicy GetLogMergePolicy() { if (mergePolicy is LogMergePolicy) return (LogMergePolicy)mergePolicy; else throw new System.ArgumentException("this method can only be called when the merge policy is the default LogMergePolicy"); } ///

Get the current setting of whether newly flushed /// segments will use the compound file format. Note that /// this just returns the value previously set with /// setUseCompoundFile(bool), or the default value /// (true). You cannot use this to query the status of /// previously flushed segments.

/// ///

Note that this method is a convenience method: it /// just calls mergePolicy.GetUseCompoundFile as long as /// mergePolicy is an instance of {@link LogMergePolicy}. /// Otherwise an System.ArgumentException is thrown.

/// ///
/// /// public virtual bool GetUseCompoundFile() { return GetLogMergePolicy().GetUseCompoundFile(); } ///

Setting to turn on usage of a compound file. When on, /// multiple files for each segment are merged into a /// single file when a new segment is flushed.

/// ///

Note that this method is a convenience method: it /// just calls mergePolicy.SetUseCompoundFile as long as /// mergePolicy is an instance of {@link LogMergePolicy}. /// Otherwise an System.ArgumentException is thrown.

///
public virtual void SetUseCompoundFile(bool value_Renamed) { GetLogMergePolicy().SetUseCompoundFile(value_Renamed); GetLogMergePolicy().SetUseCompoundDocStore(value_Renamed); } /// Expert: Set the Similarity implementation used by this IndexWriter. /// /// /// /// public virtual void SetSimilarity(Similarity similarity) { EnsureOpen(); this.similarity = similarity; docWriter.SetSimilarity(similarity); } /// Expert: Return the Similarity implementation used by this IndexWriter. /// ///

This defaults to the current value of {@link Similarity#GetDefault()}. ///

public virtual Similarity GetSimilarity() { EnsureOpen(); return this.similarity; } /// Expert: Set the interval between indexed terms. Large values cause less /// memory to be used by IndexReader, but slow random-access to terms. Small /// values cause more memory to be used by an IndexReader, and speed /// random-access to terms. /// /// This parameter determines the amount of computation required per query /// term, regardless of the number of documents that contain that term. In /// particular, it is the maximum number of other terms that must be /// scanned before a term is located and its frequency and position information /// may be processed. In a large index with user-entered query terms, query /// processing time is likely to be dominated not by term lookup but rather /// by the processing of frequency and positional data. In a small index /// or when many uncommon query terms are generated (e.g., by wildcard /// queries) term lookup may become a dominant cost. /// /// In particular, numUniqueTerms/interval terms are read into /// memory by an IndexReader, and, on average, interval/2 terms /// must be scanned for each random term access. /// /// /// /// public virtual void SetTermIndexInterval(int interval) { EnsureOpen(); this.termIndexInterval = interval; } /// Expert: Return the interval between indexed terms. /// /// /// /// public virtual int GetTermIndexInterval() { // we pass false because this method is called by SegmentMerger while we are in the process of closing EnsureOpen(false); return termIndexInterval; } /// /// Constructs an IndexWriter for the index in path. /// Text will be analyzed with a. If create /// is true, then a new, empty index will be created in /// path, replacing the index already there, if any. /// /// NOTE: autoCommit (see above) is set to false with this constructor. /// /// Throws CorruptIndexException if the index is corrupt /// Throws LockObtainFailedException if another writer has this index open (write.lock could not be obtained) /// Throws System.IO.IOException if the directory cannot be read/written to, or if it does not exist and create is false or if there is any other low-level IO error /// /// the path to the index directory /// the analyzer to use /// true to create the index or overwrite the existing one; false to append to the existing index /// Maximum field Length: LIMITED, UNLIMITED, or user-specified public IndexWriter(string path, Analyzer a, bool create, MaxFieldLength mfl) { InitBlock(); Init(FSDirectory.GetDirectory(path), a, create, true, null, false, mfl.GetLimit()); } /// /// Constructs an IndexWriter for the index in path. /// Text will be analyzed with a. If create /// is true, then a new, empty index will be created in /// path, replacing the index already there, if any. /// Throws CorruptIndexException if the index is corrupt /// Throws LockObtainFailedException if another writer has this index open (write.lock could not be obtained) /// Throws System.IO.IOException if the directory cannot be read/written to, or if it does not exist and create is false or if there is any other low-level IO error /// /// the path to the index directory /// the analyzer to use /// true to create the index or overwrite the existing one; false to append to the existing index [System.Obsolete("This constructor will be removed in the 3.0 release. Use IndexWriter(string, Analyzer, bool, MaxFieldLength) instead, and call Commit() when needed")] public IndexWriter(string path, Analyzer a, bool create) { InitBlock(); Init(FSDirectory.GetDirectory(path), a, create, true, null, true, DEFAULT_MAX_FIELD_LENGTH); } /// /// Constructs an IndexWriter for the index in path. /// Text will be analyzed with a. If create /// is true, then a new, empty index will be created in /// path, replacing the index already there, if any. /// /// NOTE: autoCommit (see above) is set to false with this constructor. /// /// Throws CorruptIndexException if the index is corrupt /// Throws LockObtainFailedException if another writer has this index open (write.lock could not be obtained) /// Throws System.IO.IOException if the directory cannot be read/written to, or if it does not exist and create is false or if there is any other low-level IO error /// /// the path to the index directory /// the analyzer to use /// true to create the index or overwrite the existing one; false to append to the existing index /// Maximum field Length: LIMITED, UNLIMITED, or user-specified public IndexWriter(System.IO.FileInfo path, Analyzer a, bool create, MaxFieldLength mfl) { InitBlock(); Init(FSDirectory.GetDirectory(path), a, create, true, null, false, mfl.GetLimit()); } /// /// Constructs an IndexWriter for the index in path. /// Text will be analyzed with a. If create /// is true, then a new, empty index will be created in /// path, replacing the index already there, if any. /// Throws CorruptIndexException if the index is corrupt /// Throws LockObtainFailedException if another writer has this index open (write.lock could not be obtained) /// Throws System.IO.IOException if the directory cannot be read/written to, or if it does not exist and create is false or if there is any other low-level IO error /// /// the path to the index directory /// the analyzer to use /// true to create the index or overwrite the existing one; false to append to the existing index [System.Obsolete("This constructor will be removed in the 3.0 release. Use IndexWriter(FileInfo, Analyzer, bool, MaxFieldLength) instead, and call Commit() when needed")] public IndexWriter(System.IO.FileInfo path, Analyzer a, bool create) { InitBlock(); Init(FSDirectory.GetDirectory(path), a, create, true, null, true, DEFAULT_MAX_FIELD_LENGTH); } /// /// Constructs an IndexWriter for the index in path. /// Text will be analyzed with a. If create /// is true, then a new, empty index will be created in /// d, replacing the index already there, if any. /// /// NOTE: autoCommit (see above) is set to false with this constructor. /// /// Throws CorruptIndexException if the index is corrupt /// Throws LockObtainFailedException if another writer has this index open (write.lock could not be obtained) /// Throws System.IO.IOException if the directory cannot be read/written to, or if it does not exist and create is false or if there is any other low-level IO error /// /// the index directory /// the analyzer to use /// true to create the index or overwrite the existing one; false to append to the existing index /// Maximum field Length: LIMITED, UNLIMITED, or user-specified public IndexWriter(Directory d, Analyzer a, bool create, MaxFieldLength mfl) { InitBlock(); Init(d, a, create, false, null, false, mfl.GetLimit()); } /// /// Constructs an IndexWriter for the index in path. /// Text will be analyzed with a. If create /// is true, then a new, empty index will be created in /// d, replacing the index already there, if any. /// Throws CorruptIndexException if the index is corrupt /// Throws LockObtainFailedException if another writer has this index open (write.lock could not be obtained) /// Throws System.IO.IOException if the directory cannot be read/written to, or if it does not exist and create is false or if there is any other low-level IO error /// /// the index directory /// the analyzer to use /// true to create the index or overwrite the existing one; false to append to the existing index [System.Obsolete("This constructor will be removed in the 3.0 release. Use IndexWriter(Directory, Analyzer, bool, MaxFieldLength) instead, and call Commit() when needed")] public IndexWriter(Directory d, Analyzer a, bool create) { InitBlock(); Init(d, a, create, false, null, true, DEFAULT_MAX_FIELD_LENGTH); } /// /// Constructs an IndexWriter for the index in path. /// Text will be analyzed with a. /// /// NOTE: autoCommit (see above) is set to false with this constructor. /// /// Throws CorruptIndexException if the index is corrupt /// Throws LockObtainFailedException if another writer has this index open (write.lock could not be obtained) /// Throws System.IO.IOException if the directory cannot be read/written to or if there is any other low-level IO error /// /// the path to the index directory /// the analyzer to use /// Maximum field Length: LIMITED, UNLIMITED, or user-specified public IndexWriter(string path, Analyzer a, MaxFieldLength mfl) { InitBlock(); Init(FSDirectory.GetDirectory(path), a, true, null, false, mfl.GetLimit()); } /// /// Constructs an IndexWriter for the index in path. /// Text will be analyzed with a. /// Throws CorruptIndexException if the index is corrupt /// Throws LockObtainFailedException if another writer has this index open (write.lock could not be obtained) /// Throws System.IO.IOException if the directory cannot be read/written to or if there is any other low-level IO error /// /// the path to the index directory /// the analyzer to use [System.Obsolete("This constructor will be removed in the 3.0 release. Use IndexWriter(string, Analyzer, MaxFieldLength) instead, and call Commit() when needed")] public IndexWriter(string path, Analyzer a) { InitBlock(); Init(FSDirectory.GetDirectory(path), a, true, null, true, DEFAULT_MAX_FIELD_LENGTH); } /// /// Constructs an IndexWriter for the index in path. /// Text will be analyzed with a. /// /// NOTE: autoCommit (see above) is set to false with this constructor. /// /// Throws CorruptIndexException if the index is corrupt /// Throws LockObtainFailedException if another writer has this index open (write.lock could not be obtained) /// Throws System.IO.IOException if the directory cannot be read/written to or if there is any other low-level IO error /// /// the path to the index directory /// the analyzer to use /// Maximum field Length: LIMITED, UNLIMITED, or user-specified public IndexWriter(System.IO.FileInfo path, Analyzer a, MaxFieldLength mfl) { InitBlock(); Init(FSDirectory.GetDirectory(path), a, true, null, false, mfl.GetLimit()); } /// /// Constructs an IndexWriter for the index in path. /// Text will be analyzed with a. /// Throws CorruptIndexException if the index is corrupt /// Throws LockObtainFailedException if another writer has this index open (write.lock could not be obtained) /// Throws System.IO.IOException if the directory cannot be read/written to or if there is any other low-level IO error /// /// the path to the index directory /// the analyzer to use [System.Obsolete("This constructor will be removed in the 3.0 release. Use IndexWriter(FileInfo, Analyzer, MaxFieldLength) instead, and call Commit() when needed")] public IndexWriter(System.IO.FileInfo path, Analyzer a) { InitBlock(); Init(FSDirectory.GetDirectory(path), a, true, null, true, DEFAULT_MAX_FIELD_LENGTH); } /// /// Constructs an IndexWriter for the index in path. /// Text will be analyzed with a. /// /// NOTE: autoCommit (see above) is set to false with this constructor. /// /// Throws CorruptIndexException if the index is corrupt /// Throws LockObtainFailedException if another writer has this index open (write.lock could not be obtained) /// Throws System.IO.IOException if the directory cannot be read/written to or if there is any other low-level IO error /// /// the index directory /// the analyzer to use /// Maximum field Length: LIMITED, UNLIMITED, or user-specified public IndexWriter(Directory d, Analyzer a, MaxFieldLength mfl) { InitBlock(); Init(d, a, false, null, false, mfl.GetLimit()); } /// /// Constructs an IndexWriter for the index in path. /// Text will be analyzed with a. /// Throws CorruptIndexException if the index is corrupt /// Throws LockObtainFailedException if another writer has this index open (write.lock could not be obtained) /// Throws System.IO.IOException if the directory cannot be read/written to or if there is any other low-level IO error /// /// the index directory /// the analyzer to use [System.Obsolete("This constructor will be removed in the 3.0 release. Use IndexWriter(Directory, Analyzer, MaxFieldLength) instead, and call Commit() when needed")] public IndexWriter(Directory d, Analyzer a) { InitBlock(); Init(d, a, false, null, true, DEFAULT_MAX_FIELD_LENGTH); } /// /// Constructs an IndexWriter for the index in path. /// Text will be analyzed with a. /// Throws CorruptIndexException if the index is corrupt /// Throws LockObtainFailedException if another writer has this index open (write.lock could not be obtained) /// Throws System.IO.IOException if the directory cannot be read/written to or if there is any other low-level IO error /// /// the index directory /// see autoCommit above /// the analyzer to use [System.Obsolete("This constructor will be removed in the 3.0 release. Use IndexWriter(Directory, Analyzer, MaxFieldLength) instead, and call Commit() when needed")] public IndexWriter(Directory d, bool autoCommit, Analyzer a) { InitBlock(); Init(d, a, false, null, autoCommit, DEFAULT_MAX_FIELD_LENGTH); } /// /// Constructs an IndexWriter for the index in path. /// Text will be analyzed with a. If create /// is true, then a new, empty index will be created in /// d, replacing the index already there, if any. /// Throws CorruptIndexException if the index is corrupt /// Throws LockObtainFailedException if another writer has this index open (write.lock could not be obtained) /// Throws System.IO.IOException if the directory cannot be read/written to, or if it does not exist and create is false or if there is any other low-level IO error /// /// the path to the index directory /// see autoCommit above /// the analyzer to use /// true to create the index or overwrite the existing one; false to append to the existing index [System.Obsolete("This constructor will be removed in the 3.0 release. Use IndexWriter(Directory, Analyzer, bool, MaxFieldLength) instead, and call Commit() when needed")] public IndexWriter(Directory d, bool autoCommit, Analyzer a, bool create) { InitBlock(); Init(d, a, create, false, null, autoCommit, DEFAULT_MAX_FIELD_LENGTH); } /// /// Expert: constructs an IndexWriter with a custom /// IndexDeletionPolicy, for the index in d, /// first creating it if it does not already exist. Text /// will be analyzed with a. /// /// /// NOTE: autoCommit (see above) is set to false with this constructor. /// /// Throws CorruptIndexException if the index is corrupt /// Throws LockObtainFailedException if another writer has this index open (write.lock could not be obtained) /// Throws System.IO.IOException if the directory cannot be read/written to or if there is any other low-level IO error /// /// the index directory /// the analyzer to use /// see deletionPolicy above /// Maximum field Length: LIMITED, UNLIMITED, or user-specified public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl) { InitBlock(); Init(d, a, false, deletionPolicy, false, mfl.GetLimit()); } /// /// Expert: constructs an IndexWriter with a custom /// IndexDeletionPolicy, for the index in d, /// first creating it if it does not already exist. Text /// will be analyzed with a. /// /// Throws CorruptIndexException if the index is corrupt /// Throws LockObtainFailedException if another writer has this index open (write.lock could not be obtained) /// Throws System.IO.IOException if the directory cannot be read/written to or if there is any other low-level IO error /// /// the index directory /// the analyzer to use /// see deletionPolicy above /// Maximum field Length: LIMITED, UNLIMITED, or user-specified [System.Obsolete("This constructor will be removed in the 3.0 release. Use IndexWriter(Directory, Analyzer, IndexDeletionPolicy, MaxFieldLength) instead, and call Commit() when needed")] public IndexWriter(Directory d, bool autoCommit, Analyzer a, IndexDeletionPolicy deletionPolicy) { InitBlock(); Init(d, a, false, deletionPolicy, autoCommit, DEFAULT_MAX_FIELD_LENGTH); } /// /// Expert: constructs an IndexWriter with a custom /// IndexDeletionPolicy, for the index in d, /// first creating it if it does not already exist. Text /// will be analyzed with a. If create /// is true, then a new, empty index will be created in /// d, replacing the index already there, if any. /// /// /// NOTE: autoCommit (see above) is set to false with this constructor. /// /// Throws CorruptIndexException if the index is corrupt /// Throws LockObtainFailedException if another writer has this index open (write.lock could not be obtained) /// Throws System.IO.IOException if the directory cannot be read/written to, or if it does not exist and create is false or if there is any other low-level IO error /// /// the index directory /// the analyzer to use /// true to create the index or overwrite the existing one; false to append to the existing index /// see deletionPolicy above /// Maximum field Length: LIMITED, UNLIMITED, or user-specified public IndexWriter(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl) { InitBlock(); Init(d, a, create, false, deletionPolicy, false, mfl.GetLimit()); } /// /// Expert: constructs an IndexWriter with a custom /// IndexDeletionPolicy, for the index in d, /// first creating it if it does not already exist. Text /// will be analyzed with a. If create /// is true, then a new, empty index will be created in /// d, replacing the index already there, if any. /// /// Throws CorruptIndexException if the index is corrupt /// Throws LockObtainFailedException if another writer has this index open (write.lock could not be obtained) /// Throws System.IO.IOException if the directory cannot be read/written to, or if it does not exist and create is false or if there is any other low-level IO error /// /// the index directory /// see autoCommit above /// the analyzer to use /// true to create the index or overwrite the existing one; false to append to the existing index /// see deletionPolicy above /// Maximum field Length: LIMITED, UNLIMITED, or user-specified [System.Obsolete("This constructor will be removed in the 3.0 release. Use IndexWriter(Directory, Analyzer, bool, IndexDeletionPolicy, MaxFieldLength) instead, and call Commit() when needed")] public IndexWriter(Directory d, bool autoCommit, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy) { InitBlock(); Init(d, a, create, false, deletionPolicy, autoCommit, DEFAULT_MAX_FIELD_LENGTH); } private void Init(Directory d, Analyzer a, bool closeDir, IndexDeletionPolicy deletionPolicy, bool autoCommit, int maxFieldLength) { if (IndexReader.IndexExists(d)) { Init(d, a, false, closeDir, deletionPolicy, autoCommit, maxFieldLength); } else { Init(d, a, true, closeDir, deletionPolicy, autoCommit, maxFieldLength); } } private void Init(Directory d, Analyzer a, bool create, bool closeDir, IndexDeletionPolicy deletionPolicy, bool autoCommit, int maxFieldLength) { this.closeDir = closeDir; directory = d; analyzer = a; SetMessageID(defaultInfoStream); this.maxFieldLength = maxFieldLength; if (create) { // Clear the write lock in case it's leftover: directory.ClearLock(WRITE_LOCK_NAME); } Lock writeLock = directory.MakeLock(WRITE_LOCK_NAME); if (!writeLock.Obtain(writeLockTimeout)) // obtain write lock { throw new LockObtainFailedException("Index locked for write: " + writeLock); } this.writeLock = writeLock; // save it try { if (create) { // Try to read first. This is to allow create // against an index that's currently open for // searching. In this case we write the next // segments_N file with no segments: try { segmentInfos.Read(directory); segmentInfos.Clear(); } catch (System.IO.IOException) { // Likely this means it's a fresh directory } segmentInfos.Commit(directory); } else { segmentInfos.Read(directory); // We assume that this segments_N was previously // properly sync'd: for (int i = 0; i < segmentInfos.Count; i++) { SegmentInfo info = segmentInfos.Info(i); List files = info.Files(); for (int j = 0; j < files.Count; j++) synced[files[j]] = files[j]; } } this.autoCommit = autoCommit; SetRollbackSegmentInfos(segmentInfos); docWriter = new DocumentsWriter(directory, this); docWriter.SetInfoStream(infoStream); docWriter.SetMaxFieldLength(maxFieldLength); // Default deleter (for backwards compatibility) is // KeepOnlyLastCommitDeleter: deleter = new IndexFileDeleter(directory, deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy, segmentInfos, infoStream, docWriter); PushMaxBufferedDocs(); if (infoStream != null) { Message("init: create=" + create); MessageState(); } } catch (System.IO.IOException e) { this.writeLock.Release(); this.writeLock = null; throw e; } } private void SetRollbackSegmentInfos(SegmentInfos infos) { lock (this) { rollbackSegmentInfos = (SegmentInfos)infos.Clone(); System.Diagnostics.Debug.Assert(!HasExternalSegments(rollbackSegmentInfos)); rollbackSegments = new Dictionary(); int size = rollbackSegmentInfos.Count; for (int i = 0; i < size; i++) rollbackSegments[rollbackSegmentInfos.Info(i)] = i; } } /// Expert: set the merge policy used by this writer. public virtual void SetMergePolicy(MergePolicy mp) { EnsureOpen(); if (mp == null) throw new System.NullReferenceException("MergePolicy must be non-null"); if (mergePolicy != mp) mergePolicy.Close(); mergePolicy = mp; PushMaxBufferedDocs(); if (infoStream != null) { Message("setMergePolicy " + mp); } } /// Expert: returns the current MergePolicy in use by this writer. /// /// public virtual MergePolicy GetMergePolicy() { EnsureOpen(); return mergePolicy; } /// Expert: set the merge scheduler used by this writer. public virtual void SetMergeScheduler(MergeScheduler mergeScheduler) { lock (this) { EnsureOpen(); if (mergeScheduler == null) throw new System.NullReferenceException("MergeScheduler must be non-null"); if (this.mergeScheduler != mergeScheduler) { FinishMerges(true); this.mergeScheduler.Close(); } this.mergeScheduler = mergeScheduler; if (infoStream != null) Message("setMergeScheduler " + mergeScheduler); } } /// Expert: returns the current MergePolicy in use by this /// writer. /// /// /// public virtual MergeScheduler GetMergeScheduler() { EnsureOpen(); return mergeScheduler; } ///

Determines the largest segment (measured by /// document count) that may be merged with other segments. /// Small values (e.g., less than 10,000) are best for /// interactive indexing, as this limits the length of /// pauses while indexing to a few seconds. Larger values /// are best for batched indexing and speedier /// searches.

/// ///

The default value is {@link Integer#MAX_VALUE}.

/// ///

Note that this method is a convenience method: it /// just calls mergePolicy.SetMaxMergeDocs as long as /// mergePolicy is an instance of {@link LogMergePolicy}. /// Otherwise an System.ArgumentException is thrown.

/// ///

The default merge policy ({@link /// LogByteSizeMergePolicy}) also allows you to set this /// limit by net size (in MB) of the segment, using {@link /// LogByteSizeMergePolicy#setMaxMergeMB}.

///
public virtual void SetMaxMergeDocs(int maxMergeDocs) { GetLogMergePolicy().SetMaxMergeDocs(maxMergeDocs); } ///

Returns the largest segment (measured by document /// count) that may be merged with other segments.

/// ///

Note that this method is a convenience method: it /// just calls mergePolicy.GetMaxMergeDocs as long as /// mergePolicy is an instance of {@link LogMergePolicy}. /// Otherwise an System.ArgumentException is thrown.

/// ///
/// /// public virtual int GetMaxMergeDocs() { return GetLogMergePolicy().GetMaxMergeDocs(); } /// The maximum number of terms that will be indexed for a single field in a /// document. This limits the amount of memory required for indexing, so that /// collections with very large files will not crash the indexing process by /// running out of memory. This setting refers to the number of running terms, /// not to the number of different terms.

/// Note: this silently truncates large documents, excluding from the /// index all terms that occur further in the document. If you know your source /// documents are large, be sure to set this value high enough to accomodate /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit /// is your memory, but you should anticipate an System.OutOfMemoryException.

/// By default, no more than DEFAULT_MAX_FIELD_LENGTH terms will be indexed for a field. ///

public virtual void SetMaxFieldLength(int maxFieldLength) { EnsureOpen(); this.maxFieldLength = maxFieldLength; docWriter.SetMaxFieldLength(maxFieldLength); if (infoStream != null) Message("setMaxFieldLength " + maxFieldLength); } /// Returns the maximum number of terms that will be /// indexed for a single field in a document. /// /// /// public virtual int GetMaxFieldLength() { EnsureOpen(); return maxFieldLength; } /// Determines the minimal number of documents required /// before the buffered in-memory documents are flushed as /// a new Segment. Large values generally gives faster /// indexing. /// ///

When this is set, the writer will flush every /// maxBufferedDocs added documents. Pass in {@link /// #DISABLE_AUTO_FLUSH} to prevent triggering a flush due /// to number of buffered documents. Note that if flushing /// by RAM usage is also enabled, then the flush will be /// triggered by whichever comes first.

/// ///

Disabled by default (writer flushes by RAM usage).

/// ///
/// System.ArgumentException if maxBufferedDocs is /// enabled but smaller than 2, or it disables maxBufferedDocs /// when ramBufferSize is already disabled /// /// /// public virtual void SetMaxBufferedDocs(int maxBufferedDocs) { EnsureOpen(); if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2) throw new System.ArgumentException("maxBufferedDocs must at least be 2 when enabled"); if (maxBufferedDocs == DISABLE_AUTO_FLUSH && GetRAMBufferSizeMB() == DISABLE_AUTO_FLUSH) throw new System.ArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled"); docWriter.SetMaxBufferedDocs(maxBufferedDocs); PushMaxBufferedDocs(); if (infoStream != null) Message("setMaxBufferedDocs " + maxBufferedDocs); } /// If we are flushing by doc count (not by RAM usage), and /// using LogDocMergePolicy then push maxBufferedDocs down /// as its minMergeDocs, to keep backwards compatibility. /// private void PushMaxBufferedDocs() { if (docWriter.GetMaxBufferedDocs() != DISABLE_AUTO_FLUSH) { MergePolicy mp = mergePolicy; if (mp is LogDocMergePolicy) { LogDocMergePolicy lmp = (LogDocMergePolicy)mp; int maxBufferedDocs = docWriter.GetMaxBufferedDocs(); if (lmp.GetMinMergeDocs() != maxBufferedDocs) { if (infoStream != null) Message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy"); lmp.SetMinMergeDocs(maxBufferedDocs); } } } } /// Returns the number of buffered added documents that will /// trigger a flush if enabled. /// /// /// public virtual int GetMaxBufferedDocs() { EnsureOpen(); return docWriter.GetMaxBufferedDocs(); } /// Determines the amount of RAM that may be used for /// buffering added documents before they are flushed as a /// new Segment. Generally for faster indexing performance /// it's best to flush by RAM usage instead of document /// count and use as large a RAM buffer as you can. /// ///

When this is set, the writer will flush whenever /// buffered documents use this much RAM. Pass in {@link /// #DISABLE_AUTO_FLUSH} to prevent triggering a flush due /// to RAM usage. Note that if flushing by document count /// is also enabled, then the flush will be triggered by /// whichever comes first.

/// ///

The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.

/// ///
/// System.ArgumentException if ramBufferSize is /// enabled but non-positive, or it disables ramBufferSize /// when maxBufferedDocs is already disabled /// public virtual void SetRAMBufferSizeMB(double mb) { if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0) throw new System.ArgumentException("ramBufferSize should be > 0.0 MB when enabled"); if (mb == DISABLE_AUTO_FLUSH && GetMaxBufferedDocs() == DISABLE_AUTO_FLUSH) throw new System.ArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled"); docWriter.SetRAMBufferSizeMB(mb); if (infoStream != null) Message("setRAMBufferSizeMB " + mb); } /// Returns the value set by {@link #setRAMBufferSizeMB} if enabled. public virtual double GetRAMBufferSizeMB() { return docWriter.GetRAMBufferSizeMB(); } ///

Determines the minimal number of delete terms required before the buffered /// in-memory delete terms are applied and flushed. If there are documents /// buffered in memory at the time, they are merged and a new segment is /// created.

///

Disabled by default (writer flushes by RAM usage).

/// ///
/// System.ArgumentException if maxBufferedDeleteTerms /// is enabled but smaller than 1 /// /// /// public virtual void SetMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) { EnsureOpen(); if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH && maxBufferedDeleteTerms < 1) throw new System.ArgumentException("maxBufferedDeleteTerms must at least be 1 when enabled"); docWriter.SetMaxBufferedDeleteTerms(maxBufferedDeleteTerms); if (infoStream != null) Message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms); } /// Returns the number of buffered deleted terms that will /// trigger a flush if enabled. /// /// /// public virtual int GetMaxBufferedDeleteTerms() { EnsureOpen(); return docWriter.GetMaxBufferedDeleteTerms(); } /// Determines how often segment indices are merged by addDocument(). With /// smaller values, less RAM is used while indexing, and searches on /// unoptimized indices are faster, but indexing speed is slower. With larger /// values, more RAM is used during indexing, and while searches on unoptimized /// indices are slower, indexing is faster. Thus larger values (> 10) are best /// for batch index creation, and smaller values (< 10) for indices that are /// interactively maintained. /// ///

Note that this method is a convenience method: it /// just calls mergePolicy.SetMergeFactor as long as /// mergePolicy is an instance of {@link LogMergePolicy}. /// Otherwise an System.ArgumentException is thrown.

/// ///

This must never be less than 2. The default value is 10. ///

public virtual void SetMergeFactor(int mergeFactor) { GetLogMergePolicy().SetMergeFactor(mergeFactor); } ///

Returns the number of segments that are merged at /// once and also controls the total number of segments /// allowed to accumulate in the index.

/// ///

Note that this method is a convenience method: it /// just calls mergePolicy.GetMergeFactor as long as /// mergePolicy is an instance of {@link LogMergePolicy}. /// Otherwise an System.ArgumentException is thrown.

/// ///
/// /// public virtual int GetMergeFactor() { return GetLogMergePolicy().GetMergeFactor(); } /// /// Expert: returns max delay inserted before syncing a /// commit point. On Windows, at least, pausing before /// syncing can increase net indexing throughput. The /// delay is variable based on size of the segment's files, /// and is only inserted when using ConcurrentMergeScheduler for merges. /// [System.Obsolete("This will be removed in 3.0, when autoCommit=true is removed from IndexWriter.")] public double GetMaxSyncPauseSeconds() { return maxSyncPauseSeconds; } /// Expert: sets the max delay before syncing a commit point. /// /// See GetMaxSyncPauseSeconds() [System.Obsolete("This will be removed in 3.0, when autoCommit=true is removed from IndexWriter.")] public void SetMaxSyncPauseSeconds(double seconds) { maxSyncPauseSeconds = seconds; } /// If non-null, this will be the default infoStream used /// by a newly instantiated IndexWriter. /// /// /// public static void SetDefaultInfoStream(System.IO.TextWriter infoStream) { IndexWriter.defaultInfoStream = infoStream; } /// Returns the current default infoStream for newly /// instantiated IndexWriters. /// /// /// public static System.IO.TextWriter GetDefaultInfoStream() { return IndexWriter.defaultInfoStream; } /// If non-null, information about merges, deletes and a /// message when maxFieldLength is reached will be printed /// to this. /// public virtual void SetInfoStream(System.IO.TextWriter infoStream) { EnsureOpen(); SetMessageID(infoStream); docWriter.SetInfoStream(infoStream); deleter.SetInfoStream(infoStream); if (infoStream != null) MessageState(); } private void MessageState() { Message( "setInfoStream: dir=" + directory + " autoCommit=" + autoCommit + " mergePolicy=" + mergePolicy + " mergeScheduler=" + mergeScheduler + " ramBufferSizeMB=" + docWriter.GetRAMBufferSizeMB() + " maxBufferedDocs=" + docWriter.GetMaxBufferedDocs() + " maxBuffereDeleteTerms=" + docWriter.GetMaxBufferedDeleteTerms() + " maxFieldLength=" + maxFieldLength + " index=" + SegString() ); } /// Returns the current infoStream in use by this writer. /// /// public virtual System.IO.TextWriter GetInfoStream() { EnsureOpen(); return infoStream; } /// /// /// /// public virtual void SetWriteLockTimeout(long writeLockTimeout) { EnsureOpen(); this.writeLockTimeout = writeLockTimeout; } /// Returns allowed timeout when acquiring the write lock. /// /// public virtual long GetWriteLockTimeout() { EnsureOpen(); return writeLockTimeout; } /// Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in /// milliseconds). /// public static void SetDefaultWriteLockTimeout(long writeLockTimeout) { IndexWriter.WRITE_LOCK_TIMEOUT = writeLockTimeout; } /// Returns default write lock timeout for newly /// instantiated IndexWriters. /// /// /// public static long GetDefaultWriteLockTimeout() { return IndexWriter.WRITE_LOCK_TIMEOUT; } /// /// Commits all changes to an index and closes all /// associated files. Note that this may be a costly /// operation, so try to re-use a single writer instead of /// closing and opening a new one. See Commit() for /// caveats about write caching done by some IO devices. /// ///

If an Exception is hit during close, eg due to disk /// full or some other reason, then both the on-disk index /// and the internal state of the IndexWriter instance will /// be consistent. However, the close will not be complete /// even though part of it (flushing buffered documents) /// may have succeeded, so the write lock will still be /// held.

/// ///

If you can correct the underlying cause (eg free up /// some disk space) then you can call Close() again. /// Failing that, if you want to force the write lock to be /// released (dangerous, because you may then lose buffered /// docs in the IndexWriter instance) then you can do /// something like this:

/// ///
        /// try
        /// {
        ///     writer.Close();
        /// }
        /// finally
        /// {
        ///     if (IndexWriter.isLocked(directory))
        ///     {
        ///         IndexWriter.unlock(directory);
        ///     }
        /// }
        /// 
/// /// after which, you must be certain not to use the writer /// instance anymore.

///
/// CorruptIndexException if the index is corrupt /// System.IO.IOException if there is a low-level IO error public virtual void Close() { Close(true); } /// Closes the index with or without waiting for currently /// running merges to finish. This is only meaningful when /// using a MergeScheduler that runs merges in background /// threads. /// /// if true, this call will block /// until all merges complete; else, it will ask all /// running merges to abort, wait until those merges have /// finished (which should be at most a few seconds), and /// then return. /// public virtual void Close(bool waitForMerges) { // If any methods have hit System.OutOfMemoryException, then abort // on close, in case theinternal state of IndexWriter // or DocumentsWriter is corrupt if (hitOOM) { Rollback(); return; } // ensure that only one thread actually gets to do the closing if (ShouldClose()) CloseInternal(waitForMerges); } // returns true if this thread should attempt to close, or // false if IndexWriter is now closed; else, waits until another // thread finishes closing private bool ShouldClose() { lock (this) { while (true) { if (!closed) { if (!closing) { closing = true; return true; } else { // another thread is presently trying to close; // wait until it finishes (successfully or not) DoWait(); } } else return false; } } } private void CloseInternal(bool waitForMerges) { docWriter.PauseAllThreads(); try { if (infoStream != null) Message("now flush at close"); docWriter.Close(); // Only allow a new merge to be triggered if we are // going to wait for merges: Flush(waitForMerges, true, true); if (waitForMerges) // Give merge scheduler last chance to run, in case // any pending merges are waiting mergeScheduler.Merge(this); mergePolicy.Close(); FinishMerges(waitForMerges); mergeScheduler.Close(); if (infoStream != null) Message("now call final commit"); Commit(0); if (infoStream != null) Message("at close: " + SegString()); lock (this) { docWriter = null; deleter.Close(); } if (closeDir) directory.Close(); if (writeLock != null) { writeLock.Release(); // release write lock writeLock = null; } lock (this) { closed = true; } } catch (System.OutOfMemoryException oom) { hitOOM = true; throw oom; } finally { lock (this) { closing = false; System.Threading.Monitor.PulseAll(this); if (!closed) { if (docWriter != null) docWriter.ResumeAllThreads(); closing = false; if (infoStream != null) Message("hit exception while closing"); } } } } /// Tells the docWriter to close its currently open shared /// doc stores (stored fields & vectors files). /// Return value specifices whether new doc store files are compound or not. /// private bool FlushDocStores() { lock (this) { bool useCompoundDocStore = false; string docStoreSegment; bool success = false; try { docStoreSegment = docWriter.CloseDocStore(); success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception closing doc store segment"); } } useCompoundDocStore = mergePolicy.UseCompoundDocStore(segmentInfos); if (useCompoundDocStore && docStoreSegment != null && docWriter.ClosedFiles().Count != 0) { // Now build compound doc store file success = false; int numSegments = segmentInfos.Count; string compoundFileName = docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION; try { CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, compoundFileName); IEnumerator it = docWriter.ClosedFiles().GetEnumerator(); while (it.MoveNext()) cfsWriter.AddFile(it.Current); // Perform the merge cfsWriter.Close(); success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception building compound file doc store for segment " + docStoreSegment); deleter.DeleteFile(compoundFileName); } } for (int i = 0; i < numSegments; i++) { SegmentInfo si = segmentInfos.Info(i); if (si.GetDocStoreOffset() != -1 && si.GetDocStoreSegment().Equals(docStoreSegment)) si.SetDocStoreIsCompoundFile(true); } Checkpoint(); // in case the files we just merged int a CFS were not previously checkpointed: deleter.DeleteNewFiles(docWriter.ClosedFiles()); } return useCompoundDocStore; } } /// Release the write lock, if needed. ~IndexWriter() { try { if (writeLock != null) { writeLock.Release(); // release write lock writeLock = null; } } finally { } } /// Returns the Directory used by this index. public virtual Directory GetDirectory() { // pass false because the flush during closing calls GetDirectory EnsureOpen(false); return directory; } /// Returns the analyzer used by this index. public virtual Analyzer GetAnalyzer() { EnsureOpen(); return analyzer; } /// Returns the number of documents currently in this index, not counting deletions. [System.Obsolete("Please use MaxDoc() (same as this method) or NumDocs() (takes deletions into account)")] public virtual int DocCount() { lock (this) { EnsureOpen(); return MaxDoc(); } } /// /// Returns total number of docs in this index, including docs not yet flushed (still in the RAM buffer), /// without regard for deletions (see NumDocs()). /// /// public int MaxDoc() { lock (this) { int count; if (docWriter != null) count = docWriter.GetNumDocsInRAM(); else count = 0; for (int i = 0; i < segmentInfos.Count; i++) count += segmentInfos.Info(i).docCount; return count; } } /// /// Returns total number of docs in this index, including docs not yet flushed (still in the RAM buffer), /// with regard for deletions. NOTE: Buffered deletions are not excluded. If these need to be excluded, /// call Commit() first. /// /// public int NumDocs() { lock (this) { int count; if (docWriter != null) count = docWriter.GetNumDocsInRAM(); else count = 0; for (int i = 0; i < segmentInfos.Count; i++) { SegmentInfo info = segmentInfos.Info(i); count += info.docCount - info.GetDelCount(); } return count; } } public bool HasDeletions() { lock (this) { EnsureOpen(); if (docWriter.HasDeletes()) return true; for (int i = 0; i < segmentInfos.Count; i++) if (segmentInfos.Info(i).HasDeletions()) return true; return false; } } /// The maximum number of terms that will be indexed for a single field in a /// document. This limits the amount of memory required for indexing, so that /// collections with very large files will not crash the indexing process by /// running out of memory.

/// Note that this effectively truncates large documents, excluding from the /// index terms that occur further in the document. If you know your source /// documents are large, be sure to set this value high enough to accomodate /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit /// is your memory, but you should anticipate an System.OutOfMemoryException.

/// By default, no more than 10,000 terms will be indexed for a field. ///

/// private int maxFieldLength; /// Adds a document to this index. If the document contains more than /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are /// discarded. /// ///

Note that if an Exception is hit (for example disk full) /// then the index will be consistent, but this document /// may not have been added. Furthermore, it's possible /// the index will have one segment in non-compound format /// even when using compound files (when a merge has /// partially succeeded).

/// ///

This method periodically flushes pending documents /// to the Directory (see Flush(), above), /// and also periodically triggers segment merges in the index /// according to the MergePolicy in use. /// ///

Merges temporarily consume space in the directory. The amout of space /// required is up to 1X the size of all segments being merged, when no /// readers/searchers are open against the index, and up to 2X the size of /// all segments being merged when readers/searchers are open against the /// index (see Optimize() for details). The sequence of primitive merge /// operations performed is governed by the merge policy. /// ///

Note that each term in the document can be no longer /// than 16383 characters, otherwise an /// ArgumentException will be thrown.

/// /// /// Note that it's possible to create an invalid Unicode string in Java if a /// UTF16 surrogate pair is malformed. In this case, the invalid characters /// are silently replaced with the Unicode replacement character U+FFFD. /// ///
/// CorruptIndexException if the index is corrupt /// System.IO.IOException if there is a low-level IO error public virtual void AddDocument(Document doc) { AddDocument(doc, analyzer); } /// Adds a document to this index, using the provided analyzer instead of the /// value of {@link #GetAnalyzer()}. If the document contains more than /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are /// discarded. /// ///

See {@link #AddDocument(Document)} for details on /// index and IndexWriter state after an Exception, and /// flushing/merging temporary free space requirements.

/// ///
/// CorruptIndexException if the index is corrupt /// System.IO.IOException if there is a low-level IO error public virtual void AddDocument(Document doc, Analyzer analyzer) { EnsureOpen(); bool doFlush = false; bool success = false; try { try { doFlush = docWriter.AddDocument(doc, analyzer); success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception adding document"); lock (this) { // If docWriter has some aborted files that were // never incref'd, then we clean them up here if (docWriter != null) { ICollection files = docWriter.AbortedFiles(); if (files != null) deleter.DeleteNewFiles(files); } } } } if (doFlush) Flush(true, false, false); } catch (System.OutOfMemoryException oom) { hitOOM = true; throw oom; } } /// Deletes the document(s) containing term. /// the term to identify the documents to be deleted /// /// CorruptIndexException if the index is corrupt /// System.IO.IOException if there is a low-level IO error public virtual void DeleteDocuments(Term term) { EnsureOpen(); try { bool doFlush = docWriter.BufferDeleteTerm(term); if (doFlush) Flush(true, false, false); } catch (System.OutOfMemoryException oom) { hitOOM = true; throw oom; } } /// Deletes the document(s) containing any of the /// terms. All deletes are flushed at the same time. /// /// array of terms to identify the documents /// to be deleted /// /// CorruptIndexException if the index is corrupt /// System.IO.IOException if there is a low-level IO error public virtual void DeleteDocuments(Term[] terms) { EnsureOpen(); try { bool doFlush = docWriter.BufferDeleteTerms(terms); if (doFlush) Flush(true, false, false); } catch (System.OutOfMemoryException oom) { hitOOM = true; throw oom; } } /// /// Deletes the document(s) matching the provided query. /// @param query the query to identify the documents to be deleted /// @throws CorruptIndexException if the index is corrupt /// @throws System.IO.IOException if there is a low-level IO error /// public void DeleteDocuments(Query query) { EnsureOpen(); bool doFlush = docWriter.BufferDeleteQuery(query); if (doFlush) Flush(true, false, false); } /// /// Deletes the document(s) matching any of the provided queries. /// All deletes are flushed at the same time. /// @param queries array of queries to identify the documents /// to be deleted /// @throws CorruptIndexException if the index is corrupt /// @throws System.IO.IOException if there is a low-level IO error /// public void DeleteDocuments(Query[] queries) { EnsureOpen(); bool doFlush = docWriter.BufferDeleteQueries(queries); if (doFlush) Flush(true, false, false); } /// Updates a document by first deleting the document(s) /// containing term and then adding the new /// document. The delete and then add are atomic as seen /// by a reader on the same index (flush may happen only after /// the add). /// /// the term to identify the document(s) to be /// deleted /// /// the document to be added /// /// CorruptIndexException if the index is corrupt /// System.IO.IOException if there is a low-level IO error public virtual void UpdateDocument(Term term, Document doc) { EnsureOpen(); UpdateDocument(term, doc, GetAnalyzer()); } /// Updates a document by first deleting the document(s) /// containing term and then adding the new /// document. The delete and then add are atomic as seen /// by a reader on the same index (flush may happen only after /// the add). /// /// the term to identify the document(s) to be /// deleted /// /// the document to be added /// /// the analyzer to use when analyzing the document /// /// CorruptIndexException if the index is corrupt /// System.IO.IOException if there is a low-level IO error public virtual void UpdateDocument(Term term, Document doc, Analyzer analyzer) { EnsureOpen(); try { bool doFlush = false; bool success = false; try { doFlush = docWriter.UpdateDocument(term, doc, analyzer); success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception updating document"); lock (this) { // If docWriter has some aborted files that were // never incref'd, then we clean them up here ICollection files = docWriter.AbortedFiles(); if (files != null) deleter.DeleteNewFiles(files); } } } if (doFlush) Flush(true, false, false); } catch (System.OutOfMemoryException oom) { hitOOM = true; throw oom; } } // for test purpose public /*internal*/ int GetSegmentCount() { lock (this) { return segmentInfos.Count; } } // for test purpose public /*internal*/ int GetNumBufferedDocuments() { lock (this) { return docWriter.GetNumDocsInRAM(); } } // for test purpose public /*internal*/ int GetDocCount(int i) { lock (this) { if (i >= 0 && i < segmentInfos.Count) { return segmentInfos.Info(i).docCount; } else { return -1; } } } // for test purpose public int GetFlushCount() { lock (this) { return flushCount; } } // for test purpose public int GetFlushDeletesCount() { lock (this) { return flushDeletesCount; } } internal string NewSegmentName() { // Cannot synchronize on IndexWriter because that causes // deadlock lock (segmentInfos) { // Important to increment changeCount so that the // segmentInfos is written on close. Otherwise we // could close, re-open and re-return the same segment // name that was previously returned which can cause // problems at least with ConcurrentMergeScheduler. changeCount++; return "_" + SupportClass.Number.ToString(segmentInfos.counter++); } } /// If non-null, information about merges will be printed to this. private System.IO.TextWriter infoStream = null; private static System.IO.TextWriter defaultInfoStream = null; /// Requests an "optimize" operation on an index, priming the index /// for the fastest available search. Traditionally this has meant /// merging all segments into a single segment as is done in the /// default merge policy, but individaul merge policies may implement /// optimize in different ways. /// /// /// /// ///

It is recommended that this method be called upon completion of indexing. In /// environments with frequent updates, optimize is best done during low volume times, if at all. /// ///

///

See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion.

/// ///

Note that this can require substantial temporary free /// space in the Directory (see LUCENE-764 /// for details):

/// ///
    ///
  • /// ///

    If no readers/searchers are open against the index, /// then free space required is up to 1X the total size of /// the starting index. For example, if the starting /// index is 10 GB, then you must have up to 10 GB of free /// space before calling optimize.

    /// ///
  • /// ///

    If readers/searchers are using the index, then free /// space required is up to 2X the size of the starting /// index. This is because in addition to the 1X used by /// optimize, the original 1X of the starting index is /// still consuming space in the Directory as the readers /// are holding the segments files open. Even on Unix, /// where it will appear as if the files are gone ("ls" /// won't list them), they still consume storage due to /// "delete on last close" semantics.

    /// ///

    Furthermore, if some but not all readers re-open /// while the optimize is underway, this will cause > 2X /// temporary space to be consumed as those new readers /// will then hold open the partially optimized segments at /// that time. It is best not to re-open readers while /// optimize is running.

    /// ///
/// ///

The actual temporary usage could be much less than /// these figures (it depends on many factors).

/// ///

In general, once the optimize completes, the total size of the /// index will be less than the size of the starting index. /// It could be quite a bit smaller (if there were many /// pending deletes) or just slightly smaller.

/// ///

If an Exception is hit during Optimize(), for example /// due to disk full, the index will not be corrupt and no /// documents will have been lost. However, it may have /// been partially optimized (some segments were merged but /// not all), and it's possible that one of the segments in /// the index will be in non-compound format even when /// using compound file format. This will occur when the /// Exception is hit during conversion of the segment into /// compound format.

/// ///

This call will optimize those segments present in /// the index when the call started. If other threads are /// still adding documents and flushing segments, those /// newly created segments will not be optimized unless you /// call optimize again.

/// ///
/// CorruptIndexException if the index is corrupt /// System.IO.IOException if there is a low-level IO error public virtual void Optimize() { Optimize(true); } /// Optimize the index down to <= maxNumSegments. If /// maxNumSegments==1 then this is the same as {@link /// #Optimize()}. /// /// maximum number of segments left /// in the index after optimization finishes /// public virtual void Optimize(int maxNumSegments) { Optimize(maxNumSegments, true); } /// Just like {@link #Optimize()}, except you can specify /// whether the call should block until the optimize /// completes. This is only meaningful with a /// {@link MergeScheduler} that is able to run merges in /// background threads. /// public virtual void Optimize(bool doWait) { Optimize(1, doWait); } /// Just like {@link #Optimize(int)}, except you can /// specify whether the call should block until the /// optimize completes. This is only meaningful with a /// {@link MergeScheduler} that is able to run merges in /// background threads. /// public virtual void Optimize(int maxNumSegments, bool doWait) { EnsureOpen(); if (maxNumSegments < 1) throw new System.ArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments); if (infoStream != null) Message("optimize: index now " + SegString()); Flush(true, false, true); lock (this) { ResetMergeExceptions(); segmentsToOptimize = new Dictionary(); int numSegments = segmentInfos.Count; for (int i = 0; i < numSegments; i++) if (!segmentsToOptimize.ContainsKey(segmentInfos.Info(i))) segmentsToOptimize[segmentInfos.Info(i)] = segmentInfos.Info(i); // Now mark all pending & running merges as optimize // merge: IEnumerator it = pendingMerges.GetEnumerator(); while (it.MoveNext()) { MergePolicy.OneMerge merge = it.Current; merge.optimize = true; merge.maxNumSegmentsOptimize = maxNumSegments; } it = runningMerges.Keys.GetEnumerator(); while (it.MoveNext()) { MergePolicy.OneMerge merge = it.Current; merge.optimize = true; merge.maxNumSegmentsOptimize = maxNumSegments; } } MaybeMerge(maxNumSegments, true); if (doWait) { lock (this) { while (true) { if (mergeExceptions.Count > 0) { // Forward any exceptions in background merge // threads to the current thread: int size = mergeExceptions.Count; for (int i = 0; i < size; i++) { MergePolicy.OneMerge merge = (MergePolicy.OneMerge)mergeExceptions[0]; if (merge.optimize) { System.IO.IOException err; System.Exception t = merge.GetException(); if (t != null) err = new System.IO.IOException("background merge hit exception: " + merge.SegString(directory), t); else err = new System.IO.IOException("background merge hit exception: " + merge.SegString(directory)); throw err; } } } if (OptimizeMergesPending()) DoWait(); else break; } } // if close is called while we are still running, throw an exception so the calling // thread will know the optimize did not complete EnsureOpen(); } // NOTE: in the ConcurrentMergeScheduler case, when // doWait is false, we can return immediately while // background threads accomplish the optimization } /// Returns true if any merges in pendingMerges or /// runningMerges are optimization merges. /// private bool OptimizeMergesPending() { lock (this) { IEnumerator it = pendingMerges.GetEnumerator(); while (it.MoveNext()) { if (it.Current.optimize) return true; } it = runningMerges.Keys.GetEnumerator(); while (it.MoveNext()) { if (it.Current.optimize) return true; } return false; } } /// /// Just like {@link #expungeDeletes()}, except you can /// specify whether the call should block until the /// operation completes. This is only meaningful with a /// {@link MergeScheduler} that is able to run merges in /// background threads. /// public void ExpungeDeletes(bool doWait) { EnsureOpen(); if (infoStream != null) Message("expungeDeletes: index now " + SegString()); MergePolicy.MergeSpecification spec; lock (this) { spec = mergePolicy.FindMergesToExpungeDeletes(segmentInfos, this); if (spec != null) { int numMerges = spec.merges.Count; for (int i = 0; i < numMerges; i++) RegisterMerge((MergePolicy.OneMerge)spec.merges[i]); } } mergeScheduler.Merge(this); if (spec != null && doWait) { int numMerges = spec.merges.Count; lock (this) { bool running = true; while (running) { // Check each merge that MergePolicy asked us to // do, to see if any of them are still running and // if any of them have hit an exception. running = false; for (int i = 0; i < numMerges; i++) { MergePolicy.OneMerge merge = (MergePolicy.OneMerge)spec.merges[i]; if (pendingMerges.Contains(merge) || runningMerges.ContainsKey(merge)) running = true; System.Exception t = merge.GetException(); if (t != null) { System.IO.IOException ioe = new System.IO.IOException("background merge hit exception: " + merge.SegString(directory), t); throw ioe; } } // If any of our merges are still running, wait: if (running) DoWait(); } } } // NOTE: in the ConcurrentMergeScheduler case, when // doWait is false, we can return immediately while // background threads accomplish the optimization } /// /// Expunges all deletes from the index. When an index /// has many document deletions (or updates to existing /// documents), it's best to either call optimize or /// expungeDeletes to remove all unused data in the index /// associated with the deleted documents. To see how /// many deletions you have pending in your index, call /// {@link IndexReader#numDeletedDocs} /// This saves disk space and memory usage while /// searching. expungeDeletes should be somewhat faster /// than optimize since it does not insist on reducing the /// index to a single segment (though, this depends on the /// {@link MergePolicy}; see {@link /// MergePolicy#findMergesToExpungeDeletes}.). Note that /// this call does not first commit any buffered /// documents, so you must do so yourself if necessary. /// See also {@link #expungeDeletes(bool)} /// public void ExpungeDeletes() { ExpungeDeletes(true); } /// Expert: asks the mergePolicy whether any merges are /// necessary now and if so, runs the requested merges and /// then iterate (test again if merges are needed) until no /// more merges are returned by the mergePolicy. /// /// Explicit calls to MaybeMerge() are usually not /// necessary. The most common case is when merge policy /// parameters have changed. /// public void MaybeMerge() { MaybeMerge(false); } private void MaybeMerge(bool optimize) { MaybeMerge(1, optimize); } private void MaybeMerge(int maxNumSegmentsOptimize, bool optimize) { UpdatePendingMerges(maxNumSegmentsOptimize, optimize); mergeScheduler.Merge(this); } private void UpdatePendingMerges(int maxNumSegmentsOptimize, bool optimize) { lock (this) { System.Diagnostics.Debug.Assert(!optimize || maxNumSegmentsOptimize > 0); if (stopMerges) return; MergePolicy.MergeSpecification spec; if (optimize) { spec = mergePolicy.FindMergesForOptimize(segmentInfos, this, maxNumSegmentsOptimize, segmentsToOptimize); if (spec != null) { int numMerges = spec.merges.Count; for (int i = 0; i < numMerges; i++) { MergePolicy.OneMerge merge = ((MergePolicy.OneMerge)spec.merges[i]); merge.optimize = true; merge.maxNumSegmentsOptimize = maxNumSegmentsOptimize; } } } else spec = mergePolicy.FindMerges(segmentInfos, this); if (spec != null) { int numMerges = spec.merges.Count; for (int i = 0; i < numMerges; i++) RegisterMerge((MergePolicy.OneMerge)spec.merges[i]); } } } /// Expert: the {@link MergeScheduler} calls this method /// to retrieve the next merge requested by the /// MergePolicy /// public /*internal*/ virtual MergePolicy.OneMerge GetNextMerge() { lock (this) { if (pendingMerges.Count == 0) return null; else { // Advance the merge from pending to running object tempobject; tempobject = pendingMerges[0]; pendingMerges.RemoveAt(0); MergePolicy.OneMerge merge = (MergePolicy.OneMerge)tempobject; runningMerges[merge] = merge; return merge; } } } /// /// Like getNextMerge() except only returns a merge if it's external. /// private MergePolicy.OneMerge GetNextExternalMerge() { lock (this) { if (pendingMerges.Count == 0) return null; else { IEnumerator it = pendingMerges.GetEnumerator(); while (it.MoveNext()) { MergePolicy.OneMerge merge = it.Current; if (merge.isExternal) { // Advance the merge from pending to running pendingMerges.Remove(merge); runningMerges[merge] = merge; return merge; } } // All existing merges do not involve external segments return null; } } } /// /// Begin a transaction. During a transaction, any segment /// merges that happen (or ram segments flushed) will not /// write a new segments file and will not remove any files /// that were present at the start of the transaction. You /// must make a matched (try/finally) call to /// CommitTransaction() or RollbackTransaction() to finish /// the transaction. /// /// Note that buffered documents and delete terms are not handled /// within the transactions, so they must be flushed before the /// transaction is started. /// private void StartTransaction(bool haveWriteLock) { lock (this) { bool success = false; try { if (infoStream != null) Message("now start transaction"); System.Diagnostics.Debug.Assert(docWriter.GetNumBufferedDeleteTerms() == 0, "calling StartTransaction with buffered delete terms not supported: numBufferedDeleteTerms=" + docWriter.GetNumBufferedDeleteTerms()); System.Diagnostics.Debug.Assert(docWriter.GetNumDocsInRAM() == 0, "calling StartTransaction with buffered documents not supported: nuDocsInRAM=" + docWriter.GetNumDocsInRAM()); EnsureOpen(); // if a transaction is trying to roll back (because AddIndexes() hit an exception) // then wait here until that is done lock (this) { while (stopMerges) DoWait(); } success = true; } finally { // release the write lock, if our caller held it, on hitting an exception if (!success && haveWriteLock) ReleaseWrite(); } if (!haveWriteLock) AcquireWrite(); success = false; try { localRollbackSegmentInfos = (SegmentInfos)segmentInfos.Clone(); System.Diagnostics.Debug.Assert(!HasExternalSegments(segmentInfos)); localAutoCommit = autoCommit; localFlushedDocCount = docWriter.GetFlushedDocCount(); if (localAutoCommit) { if (infoStream != null) Message("flush at StartTransaction"); Flush(true, false, false); // Turn off auto-commit during our local transaction: autoCommit = false; } else { // We must "protect" our files at this point from // deletion in case we need to rollback: deleter.IncRef(segmentInfos, false); } success = true; } finally { if (!success) FinishAddIndexes(); } } } /* * Rolls back the transaction and restores state to where * we were at the start. */ private void RollbackTransaction() { lock (this) { if (infoStream != null) Message("now rollback transaction"); // First restore autoCommit in case we hit an exception below: autoCommit = localAutoCommit; docWriter.SetFlushedDocCount(localFlushedDocCount); // must finish merges before rolling back segmentInfos // so merges don't hit exceptions on trying to commit // themselves, don't get files deleted out from under // them, etc FinishMerges(false); // Keep the same segmentInfos instance but replace all // of its SegmentInfo instances. This is so the next // attempt to commit using this instance of IndexWriter // will always write to a new generation ("write once"). segmentInfos.Clear(); segmentInfos.AddRange(localRollbackSegmentInfos); localRollbackSegmentInfos = null; // this must come after we rollback segmentInfos, so // that if a Commit() kicks off it does not see the // segmentInfos with external segments FinishAddIndexes(); // Ask deleter to locate unreferenced files we had // created & remove them: deleter.Checkpoint(segmentInfos, false); if (!autoCommit) // Remove the incRef we did in StartTransaction: deleter.DecRef(segmentInfos); // also ask deleter to remove any newly created files // that were never incref'd; this "garbage" is created // when a merge kicks off but aborts part way through // before it had a chance to incRef the files it had // partially created deleter.Refresh(); System.Threading.Monitor.PulseAll(this); System.Diagnostics.Debug.Assert(!HasExternalSegments()); } } /* * Commits the transaction. This will write the new * segments file and remove and pending deletions we have * accumulated during the transaction */ private void CommitTransaction() { lock (this) { if (infoStream != null) Message("now commit transaction"); // First restore autoCommit in case we hit an exception below: autoCommit = localAutoCommit; // give deleter a chance to remove files now: Checkpoint(); if (autoCommit) { bool success = false; try { Commit(0); success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception committing transaction"); RollbackTransaction(); } } } else { // remove the infRef we did in StartTransaction() deleter.DecRef(localRollbackSegmentInfos); } localRollbackSegmentInfos = null; System.Diagnostics.Debug.Assert(!HasExternalSegments()); FinishAddIndexes(); } } [System.Obsolete("Plese use Rollback() instead")] public void Abort() { Rollback(); } /// Close the IndexWriter without committing /// any of the changes that have occurred since it was /// opened. This removes any temporary files that had been /// created, after which the state of the index will be the /// same as it was when this writer was first opened. This /// can only be called when this IndexWriter was opened /// with autoCommit=false. This also clears a previous /// call to PrepareCommit(). /// /// System.Exception if this is called when /// the writer was opened with autoCommit=true. /// /// System.IO.IOException if there is a low-level IO error public virtual void Rollback() { EnsureOpen(); if (autoCommit) throw new System.SystemException("Rollback() can only be called when IndexWriter was opened with autoCommit=false"); if (ShouldClose()) RollbackInternal(); } private void RollbackInternal() { bool success = false; docWriter.PauseAllThreads(); try { FinishMerges(false); // Must pre-close these two, in case they // increment changeCount, so that we can then set it to // false before calling closeInternal mergePolicy.Close(); mergeScheduler.Close(); lock (this) { if (pendingCommit != null) { pendingCommit.RollbackCommit(directory); deleter.DecRef(pendingCommit); pendingCommit = null; System.Threading.Monitor.PulseAll(this); } // Keep the same segmentInfos instance but replace all // of its SegmentInfo instances. This is so the next // attempt to commit using this instance of IndexWriter // will always write to a new generation ("write // once"). segmentInfos.Clear(); segmentInfos.AddRange(rollbackSegmentInfos); System.Diagnostics.Debug.Assert(!HasExternalSegments()); docWriter.Abort(); System.Diagnostics.Debug.Assert(TestPoint("rollback before checkpoint")); // Ask deleter to locate unreferenced files & remove // them: deleter.Checkpoint(segmentInfos, false); deleter.Refresh(); } lastCommitChangeCount = changeCount; success = true; } catch (System.OutOfMemoryException oom) { hitOOM = true; throw oom; } finally { lock (this) { if (!success) { docWriter.ResumeAllThreads(); closing = false; System.Threading.Monitor.PulseAll(this); if (infoStream != null) Message("hit exception during rollback"); } } } CloseInternal(false); } private void FinishMerges(bool waitForMerges) { lock (this) { if (!waitForMerges) { stopMerges = true; // Abort all pending & running merges: IEnumerator it = pendingMerges.GetEnumerator(); while (it.MoveNext()) { MergePolicy.OneMerge merge = it.Current; if (infoStream != null) Message("now abort pending merge " + merge.SegString(directory)); merge.Abort(); MergeFinish(merge); } pendingMerges.Clear(); it = runningMerges.Keys.GetEnumerator(); while (it.MoveNext()) { MergePolicy.OneMerge merge = it.Current; if (infoStream != null) Message("now abort running merge " + merge.SegString(directory)); merge.Abort(); } // ensure any running AddIndexes() finishes. It's fine // if a new one attempts to start because its merges // will quickly see the stopMerges == true and abort AcquireRead(); ReleaseRead(); // These merges periodically check whether they have // been aborted, and stop if so. We wait here to make // sure they all stop. It should not take very long // because the merge threads periodically check if // they are aborted. while (runningMerges.Count > 0) { if (infoStream != null) Message("now wait for " + runningMerges.Count + " running merge to abort"); DoWait(); } stopMerges = false; System.Threading.Monitor.PulseAll(this); System.Diagnostics.Debug.Assert(0 == mergingSegments.Count); if (infoStream != null) Message("all running merges have aborted"); } else { // ensure any running AddIndexes Finishes. It's fine // if a new one attempts to start because from our // caller above the call will see that we are in the // process of closing, and will throw an AlreadyClosedException AcquireRead(); ReleaseRead(); while (pendingMerges.Count > 0 || runningMerges.Count > 0) { DoWait(); } System.Diagnostics.Debug.Assert(0 == mergingSegments.Count); } } } /* * Called whenever the SegmentInfos has been updated and * the index files referenced exist (correctly) in the * index directory. */ private void Checkpoint() { lock (this) { changeCount++; deleter.Checkpoint(segmentInfos, false); } } private void FinishAddIndexes() { ReleaseWrite(); } private void BlockAddIndexes(bool includePendingClose) { AcquireRead(); bool success = false; try { // Make sure we are still open since we could have // waited quite a while for last AddIndexes to finish EnsureOpen(includePendingClose); success = true; } finally { if (!success) ReleaseRead(); } } private void ResumeAddIndexes() { ReleaseRead(); } /** Merges all segments from an array of indexes into this index. * @deprecated Use {@link #addIndexesNoOptimize} instead, * then separately call {@link #optimize} afterwards if * you need to. * @throws CorruptIndexException if the index is corrupt * @throws System.IO.IOException if there is a low-level IO error */ public void AddIndexes(Directory[] dirs) { EnsureOpen(); NoDupDirs(dirs); // Do not allow add docs or deletes while we are running: docWriter.PauseAllThreads(); try { if (infoStream != null) Message("flush at AddIndexes"); Flush(true, false, true); bool success = false; StartTransaction(false); try { int docCount = 0; lock (this) { EnsureOpen(); for (int i = 0; i < dirs.Length; i++) { SegmentInfos sis = new SegmentInfos(); // read infos from dir sis.Read(dirs[i]); for (int j = 0; j < sis.Count; j++) { SegmentInfo info = sis.Info(j); docCount += info.docCount; System.Diagnostics.Debug.Assert(!segmentInfos.Contains(info)); segmentInfos.Add(info); // add each info } } } // Notify DocumentsWriter that the flushed count just increased docWriter.UpdateFlushedDocCount(docCount); Optimize(); success = true; } finally { if (success) { CommitTransaction(); } else { RollbackTransaction(); } } } catch (System.OutOfMemoryException oom) { hitOOM = true; throw oom; } finally { docWriter.ResumeAllThreads(); } } private void ResetMergeExceptions() { lock (this) { mergeExceptions = new System.Collections.ArrayList(); mergeGen++; } } private void NoDupDirs(Directory[] dirs) { Dictionary dups = new Dictionary(); for (int i = 0; i < dirs.Length; i++) { if (dups.ContainsKey(dirs[i])) throw new System.ArgumentException("Directory " + dirs[i] + " appears more than once"); if (dirs[i] == directory) throw new System.ArgumentException("Cannot add directory to itself"); dups[dirs[i]] = dirs[i]; } } /** * Merges all segments from an array of indexes into this * index. * *

This may be used to parallelize batch indexing. A large document * collection can be broken into sub-collections. Each sub-collection can be * indexed in parallel, on a different thread, process or machine. The * complete index can then be created by merging sub-collection indexes * with this method. * *

NOTE: the index in each Directory must not be * changed (opened by a writer) while this method is * running. This method does not acquire a write lock in * each input Directory, so it is up to the caller to * enforce this. * *

NOTE: while this is running, any attempts to * add or delete documents (with another thread) will be * paused until this method completes. * *

This method is transactional in how Exceptions are * handled: it does not commit a new segments_N file until * all indexes are added. This means if an Exception * occurs (for example disk full), then either no indexes * will have been added or they all will have been.

* *

Note that this requires temporary free space in the * Directory up to 2X the sum of all input indexes * (including the starting index). If readers/searchers * are open against the starting index, then temporary * free space required will be higher by the size of the * starting index (see {@link #Optimize()} for details). *

* *

Once this completes, the final size of the index * will be less than the sum of all input index sizes * (including the starting index). It could be quite a * bit smaller (if there were many pending deletes) or * just slightly smaller.

* *

* This requires this index not be among those to be added. * * @throws CorruptIndexException if the index is corrupt * @throws System.IO.IOException if there is a low-level IO error */ public void AddIndexesNoOptimize(Directory[] dirs) { EnsureOpen(); NoDupDirs(dirs); // Do not allow add docs or deletes while we are running: docWriter.PauseAllThreads(); try { if (infoStream != null) Message("flush at addIndexesNoOptimize"); Flush(true, false, true); bool success = false; StartTransaction(false); try { int docCount = 0; lock (this) { EnsureOpen(); for (int i = 0; i < dirs.Length; i++) { if (directory == dirs[i]) { // cannot add this index: segments may be deleted in merge before added throw new System.ArgumentException("Cannot add this index to itself"); } SegmentInfos sis = new SegmentInfos(); // read infos from dir sis.Read(dirs[i]); for (int j = 0; j < sis.Count; j++) { SegmentInfo info = sis.Info(j); System.Diagnostics.Debug.Assert(!segmentInfos.Contains(info), "dup info dir=" + info.dir + " name=" + info.name); docCount += info.docCount; segmentInfos.Add(info); // add each info } } } // Notify DocumentsWriter that the flushed count just increased docWriter.UpdateFlushedDocCount(docCount); MaybeMerge(); EnsureOpen(); // If after merging there remain segments in the index // that are in a different directory, just copy these // over into our index. This is necessary (before // finishing the transaction) to avoid leaving the // index in an unusable (inconsistent) state. ResolveExternalSegments(); EnsureOpen(); success = true; } finally { if (success) { CommitTransaction(); } else { RollbackTransaction(); } } } catch (System.OutOfMemoryException oom) { hitOOM = true; throw oom; } finally { docWriter.ResumeAllThreads(); } } private bool HasExternalSegments() { return HasExternalSegments(segmentInfos); } private bool HasExternalSegments(SegmentInfos infos) { int numSegments = infos.Count; for (int i = 0; i < numSegments; i++) if (infos.Info(i).dir != directory) return true; return false; } /* If any of our segments are using a directory != ours * then we have to either copy them over one by one, merge * them (if merge policy has chosen to) or wait until * currently running merges (in the background) complete. * We don't return until the SegmentInfos has no more * external segments. Currently this is only used by * AddIndexesNoOptimize(). */ private void ResolveExternalSegments() { bool any = false; bool done = false; while (!done) { SegmentInfo info = null; MergePolicy.OneMerge merge = null; lock (this) { if (stopMerges) throw new MergePolicy.MergeAbortedException("rollback() was called or AddIndexes* hit an unhandled exception"); int numSegments = segmentInfos.Count; done = true; for (int i = 0; i < numSegments; i++) { info = segmentInfos.Info(i); if (info.dir != directory) { done = false; MergePolicy.OneMerge newMerge = new MergePolicy.OneMerge(segmentInfos.Range(i, 1 + i), info.GetUseCompoundFile()); // Returns true if no running merge conflicts // with this one (and, records this merge as // pending), ie, this segment is not currently // being merged: if (RegisterMerge(newMerge)) { merge = newMerge; // If this segment is not currently being // merged, then advance it to running & run // the merge ourself (below): pendingMerges.Remove(merge); runningMerges[merge] = merge; break; } } } if (!done && merge == null) // We are not yet done (external segments still // exist in segmentInfos), yet, all such segments // are currently "covered" by a pending or running // merge. We now try to grab any pending merge // that involves external segments: merge = GetNextExternalMerge(); if (!done && merge == null) // We are not yet done, and, all external segments // fall under merges that the merge scheduler is // currently running. So, we now wait and check // back to see if the merge has completed. DoWait(); } if (merge != null) { any = true; Merge(merge); } } if (any) // Sometimes, on copying an external segment over, // more merges may become necessary: mergeScheduler.Merge(this); } /** Merges the provided indexes into this index. *

After this completes, the index is optimized.

*

The provided IndexReaders are not closed.

*

NOTE: the index in each Directory must not be * changed (opened by a writer) while this method is * running. This method does not acquire a write lock in * each input Directory, so it is up to the caller to * enforce this. * *

NOTE: while this is running, any attempts to * add or delete documents (with another thread) will be * paused until this method completes. * *

See {@link #AddIndexesNoOptimize(Directory[])} for * details on transactional semantics, temporary free * space required in the Directory, and non-CFS segments * on an Exception.

* @throws CorruptIndexException if the index is corrupt * @throws System.IO.IOException if there is a low-level IO error */ public void AddIndexes(IndexReader[] readers) { EnsureOpen(); // Do not allow add docs or deletes while we are running: docWriter.PauseAllThreads(); // We must pre-acquire the write lock here (and not in // StartTransaction below) so that no other AddIndexes // is allowed to start up after we have flushed & // optimized but before we then start our transaction. // This is because the merging below requires that only // one segment is present in the index: AcquireWrite(); try { bool success = false; SegmentInfo info = null; string mergedName = null; SegmentMerger merger = null; try { Flush(true, false, true); Optimize(); // start with zero or 1 seg success = true; } finally { // Take care to release the write lock if we hit an // exception before starting the transaction if (!success) ReleaseWrite(); } success = false; // true means we already have write lock; if this call // hits an exception it will release the write lock: StartTransaction(true); try { mergedName = NewSegmentName(); merger = new SegmentMerger(this, mergedName, null); IndexReader sReader = null; lock (this) { if (segmentInfos.Count == 1) { // add existing index, if any sReader = SegmentReader.Get(true, segmentInfos.Info(0)); } } try { if (sReader != null) merger.Add(sReader); for (int i = 0; i < readers.Length; i++) // add new indexes merger.Add(readers[i]); int docCount = merger.Merge(); // merge 'em if (sReader != null) { sReader.Close(); sReader = null; } lock (this) { segmentInfos.Clear(); // pop old infos & add new info = new SegmentInfo(mergedName, docCount, directory, false, true, -1, null, false, merger.HasProx()); segmentInfos.Add(info); } // Notify DocumentsWriter that the flushed count just increased docWriter.UpdateFlushedDocCount(docCount); success = true; } finally { if (sReader != null) { sReader.Close(); } } } finally { if (!success) { if (infoStream != null) Message("hit exception in AddIndexes during merge"); RollbackTransaction(); } else { CommitTransaction(); } } if (mergePolicy is LogMergePolicy && GetUseCompoundFile()) { List files = null; lock (this) { // Must incRef our files so that if another thread // is running merge/optimize, it doesn't delete our // segment's files before we have a change to // finish making the compound file. if (segmentInfos.Contains(info)) { files = info.Files(); deleter.IncRef(files); } } if (files != null) { success = false; StartTransaction(false); try { merger.CreateCompoundFile(mergedName + ".cfs"); lock (this) { info.SetUseCompoundFile(true); } success = true; } finally { deleter.DecRef(files); if (!success) { if (infoStream != null) Message("hit exception building compound file in AddIndexes during merge"); RollbackTransaction(); } else { CommitTransaction(); } } } } } catch (System.OutOfMemoryException oom) { hitOOM = true; throw oom; } finally { docWriter.ResumeAllThreads(); } } // This is called after pending added and deleted // documents have been flushed to the Directory but before // the change is committed (new segments_N file written). protected internal virtual void DoAfterFlush() { } /** * Flush all in-memory buffered updates (adds and deletes) * to the Directory. *

Note: while this will force buffered docs to be * pushed into the index, it will not make these docs * visible to a reader. Use {@link #Commit()} instead * @throws CorruptIndexException if the index is corrupt * @throws System.IO.IOException if there is a low-level IO error * @deprecated please call {@link #Commit()}) instead */ public void Flush() { Flush(true, false, true); } /**

Expert: prepare for commit. This does the first * phase of 2-phase commit. You can only call this when * autoCommit is false. This method does all steps * necessary to commit changes since this writer was * opened: flushes pending added and deleted docs, syncs * the index files, writes most of next segments_N file. * After calling this you must call either {@link * #Commit()} to finish the commit, or {@link * #rollback()} to revert the commit and undo all changes * done since the writer was opened.

* * You can also just call {@link #Commit()} directly * without prepareCommit first in which case that method * will internally call prepareCommit. */ public void PrepareCommit() { EnsureOpen(); PrepareCommit(false); } private void PrepareCommit(bool internal_Renamed) { if (hitOOM) throw new System.Exception("this writer hit an System.OutOfMemoryException; cannot commit"); if (autoCommit && !internal_Renamed) throw new System.Exception("this method can only be used when autoCommit is false"); if (!autoCommit && pendingCommit != null) throw new System.Exception("prepareCommit was already called with no corresponding call to commit"); Message("prepareCommit: flush"); Flush(true, true, true); StartCommit(0); } private void Commit(long sizeInBytes) { StartCommit(sizeInBytes); FinishCommit(); } private bool committing; private void WaitForCommit() { lock (this) { // Only allow a single thread to do the commit, at a time: while (committing) DoWait(); committing = true; } } private void DoneCommit() { lock (this) { committing = false; System.Threading.Monitor.PulseAll(this); } } /** *

Commits all pending updates (added & deleted * documents) to the index, and syncs all referenced index * files, such that a reader will see the changes and the * index updates will survive an OS or machine crash or * power loss. Note that this does not wait for any * running background merges to finish. This may be a * costly operation, so you should test the cost in your * application and do it only when really necessary.

* *

Note that this operation calls Directory.sync on * the index files. That call should not return until the * file contents & metadata are on stable storage. For * FSDirectory, this calls the OS's fsync. But, beware: * some hardware devices may in fact cache writes even * during fsync, and return before the bits are actually * on stable storage, to give the appearance of faster * performance. If you have such a device, and it does * not have a battery backup (for example) then on power * loss it may still lose data. Lucene cannot guarantee * consistency on such devices.

* * @see #prepareCommit */ public void Commit() { EnsureOpen(); // Only let one thread do the prepare/finish at a time WaitForCommit(); try { Message("commit: start"); if (autoCommit || pendingCommit == null) { Message("commit: now prepare"); PrepareCommit(true); } else Message("commit: already prepared"); FinishCommit(); } finally { DoneCommit(); } } private void FinishCommit() { lock (this) { if (pendingCommit != null) { try { Message("commit: pendingCommit != null"); pendingCommit.FinishCommit(directory); lastCommitChangeCount = pendingCommitChangeCount; segmentInfos.UpdateGeneration(pendingCommit); SetRollbackSegmentInfos(pendingCommit); deleter.Checkpoint(pendingCommit, true); } finally { deleter.DecRef(pendingCommit); pendingCommit = null; System.Threading.Monitor.PulseAll(this); } } else Message("commit: pendingCommit == null; skip"); Message("commit: done"); } } /** * Flush all in-memory buffered udpates (adds and deletes) * to the Directory. * @param triggerMerge if true, we may merge segments (if * deletes or docs were flushed) if necessary * @param flushDocStores if false we are allowed to keep * doc stores open to share with the next segment * @param flushDeletes whether pending deletes should also * be flushed */ public /* for nunit tests, was: protected */ void Flush(bool triggerMerge, bool flushDocStores, bool flushDeletes) { // We can be called during close, when closing==true, so we must pass false to EnsureOpen: EnsureOpen(false); if (DoFlush(flushDocStores, flushDeletes) && triggerMerge) MaybeMerge(); } // TODO: this method should not have to be entirely // synchronized, ie, merges should be allowed to commit // even while a flush is happening private bool DoFlush(bool flushDocStores, bool flushDeletes) { lock (this) { EnsureOpen(false); System.Diagnostics.Debug.Assert(TestPoint("startDoFlush")); flushCount++; // Make sure no threads are actively adding a document flushDeletes |= docWriter.DeletesFull(); // When autoCommit=true we must always flush deletes // when flushing a segment; otherwise deletes may become // visible before their corresponding added document // from an updateDocument call flushDeletes |= autoCommit; // Returns true if docWriter is currently aborting, in // which case we skip flushing this segment if (docWriter.PauseAllThreads()) { docWriter.ResumeAllThreads(); return false; } try { SegmentInfo newSegment = null; int numDocs = docWriter.GetNumDocsInRAM(); // Always flush docs if there are any bool flushDocs = numDocs > 0; // With autoCommit=true we always must flush the doc // stores when we flush flushDocStores |= autoCommit; string docStoreSegment = docWriter.GetDocStoreSegment(); if (docStoreSegment == null) flushDocStores = false; int docStoreOffset = docWriter.GetDocStoreOffset(); // docStoreOffset should only be non-zero when // autoCommit == false System.Diagnostics.Debug.Assert(!autoCommit || 0 == docStoreOffset); bool docStoreIsCompoundFile = false; if (infoStream != null) { Message(" flush: segment=" + docWriter.GetSegment() + " docStoreSegment=" + docWriter.GetDocStoreSegment() + " docStoreOffset=" + docStoreOffset + " flushDocs=" + flushDocs + " flushDeletes=" + flushDeletes + " flushDocStores=" + flushDocStores + " numDocs=" + numDocs + " numBufDelTerms=" + docWriter.GetNumBufferedDeleteTerms()); Message(" index before flush " + SegString()); } // Check if the doc stores must be separately flushed // because other segments, besides the one we are about // to flush, reference it if (flushDocStores && (!flushDocs || !docWriter.GetSegment().Equals(docWriter.GetDocStoreSegment()))) { // We must separately flush the doc store if (infoStream != null) Message(" flush shared docStore segment " + docStoreSegment); docStoreIsCompoundFile = FlushDocStores(); flushDocStores = false; } string segment = docWriter.GetSegment(); // If we are flushing docs, segment must not be null: System.Diagnostics.Debug.Assert(segment != null || !flushDocs); if (flushDocs) { bool success = false; int flushedDocCount; try { flushedDocCount = docWriter.Flush(flushDocStores); success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception flushing segment " + segment); deleter.Refresh(segment); } } if (0 == docStoreOffset && flushDocStores) { // This means we are flushing private doc stores // with this segment, so it will not be shared // with other segments System.Diagnostics.Debug.Assert(docStoreSegment != null); System.Diagnostics.Debug.Assert(docStoreSegment.Equals(segment)); docStoreOffset = -1; docStoreIsCompoundFile = false; docStoreSegment = null; } // Create new SegmentInfo, but do not add to our // segmentInfos until deletes are flushed // successfully. newSegment = new SegmentInfo(segment, flushedDocCount, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, docWriter.HasProx()); } docWriter.PushDeletes(); if (flushDocs) segmentInfos.Add(newSegment); if (flushDeletes) { flushDeletesCount++; ApplyDeletes(); } DoAfterFlush(); if (flushDocs) Checkpoint(); if (flushDocs && mergePolicy.UseCompoundFile(segmentInfos, newSegment)) { // Now build compound file bool success = false; try { docWriter.CreateCompoundFile(segment); success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception creating compound file for newly flushed segment " + segment); deleter.DeleteFile(segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); } } newSegment.SetUseCompoundFile(true); Checkpoint(); } return flushDocs; } catch (System.OutOfMemoryException oom) { hitOOM = true; throw oom; } finally { docWriter.ClearFlushPending(); docWriter.ResumeAllThreads(); } } } /** Expert: Return the total size of all index files currently cached in memory. * Useful for size management with flushRamDocs() */ public long RamSizeInBytes() { EnsureOpen(); return docWriter.GetRAMUsed(); } /** Expert: Return the number of documents currently * buffered in RAM. */ public int NumRamDocs() { lock (this) { EnsureOpen(); return docWriter.GetNumDocsInRAM(); } } private int EnsureContiguousMerge(MergePolicy.OneMerge merge) { int first = segmentInfos.IndexOf(merge.segments.Info(0)); if (first == -1) throw new MergePolicy.MergeException("could not find segment " + merge.segments.Info(0).name + " in current segments", directory); int numSegments = segmentInfos.Count; int numSegmentsToMerge = merge.segments.Count; for (int i = 0; i < numSegmentsToMerge; i++) { SegmentInfo info = merge.segments.Info(i); if (first + i >= numSegments || !segmentInfos.Info(first + i).Equals(info)) { if (segmentInfos.IndexOf(info) == -1) throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the index", directory); else throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge.SegString(directory) + " vs " + SegString() + "), which IndexWriter (currently) cannot handle", directory); } } return first; } /** Carefully merges deletes for the segments we just * merged. This is tricky because, although merging will * clear all deletes (compacts the documents), new * deletes may have been flushed to the segments since * the merge was started. This method "carries over" * such new deletes onto the newly merged segment, and * saves the resulting deletes file (incrementing the * delete generation for merge.info). If no deletes were * flushed, no new deletes file is saved. */ private void CommitMergedDeletes(MergePolicy.OneMerge merge) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startCommitMergeDeletes")); SegmentInfos sourceSegmentsClone = merge.segmentsClone; SegmentInfos sourceSegments = merge.segments; if (infoStream != null) Message("commitMergeDeletes " + merge.SegString(directory)); // Carefully merge deletes that occurred after we // started merging: BitVector deletes = null; int docUpto = 0; int delCount = 0; int numSegmentsToMerge = sourceSegments.Count; for (int i = 0; i < numSegmentsToMerge; i++) { SegmentInfo previousInfo = sourceSegmentsClone.Info(i); SegmentInfo currentInfo = sourceSegments.Info(i); System.Diagnostics.Debug.Assert(currentInfo.docCount == previousInfo.docCount); int docCount = currentInfo.docCount; if (previousInfo.HasDeletions()) { // There were deletes on this segment when the merge // started. The merge has collapsed away those // deletes, but, if new deletes were flushed since // the merge started, we must now carefully keep any // newly flushed deletes but mapping them to the new // docIDs. System.Diagnostics.Debug.Assert(currentInfo.HasDeletions()); // Load deletes present @ start of merge, for this segment: BitVector previousDeletes = new BitVector(previousInfo.dir, previousInfo.GetDelFileName()); if (!currentInfo.GetDelFileName().Equals(previousInfo.GetDelFileName())) { // This means this segment has had new deletes // committed since we started the merge, so we // must merge them: if (deletes == null) deletes = new BitVector(merge.info.docCount); BitVector currentDeletes = new BitVector(currentInfo.dir, currentInfo.GetDelFileName()); for (int j = 0; j < docCount; j++) { if (previousDeletes.Get(j)) System.Diagnostics.Debug.Assert(currentDeletes.Get(j)); else { if (currentDeletes.Get(j)) { deletes.Set(docUpto); delCount++; } docUpto++; } } } else docUpto += docCount - previousDeletes.Count(); } else if (currentInfo.HasDeletions()) { // This segment had no deletes before but now it // does: if (deletes == null) deletes = new BitVector(merge.info.docCount); BitVector currentDeletes = new BitVector(directory, currentInfo.GetDelFileName()); for (int j = 0; j < docCount; j++) { if (currentDeletes.Get(j)) { deletes.Set(docUpto); delCount++; } docUpto++; } } else // No deletes before or after docUpto += currentInfo.docCount; } if (deletes != null) { merge.info.AdvanceDelGen(); Message("commit merge deletes to " + merge.info.GetDelFileName()); deletes.Write(directory, merge.info.GetDelFileName()); merge.info.SetDelCount(delCount); System.Diagnostics.Debug.Assert(delCount == deletes.Count()); } } } /* FIXME if we want to support non-contiguous segment merges */ private bool CommitMerge(MergePolicy.OneMerge merge, SegmentMerger merger, int mergedDocCount) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startCommitMerge")); if (hitOOM) return false; if (infoStream != null) Message("commitMerge: " + merge.SegString(directory) + " index=" + SegString()); System.Diagnostics.Debug.Assert(merge.registerDone); // If merge was explicitly aborted, or, if rollback() or // RollbackTransaction() had been called since our merge // started (which results in an unqualified // deleter.Refresh() call that will remove any index // file that current segments does not reference), we // abort this merge if (merge.IsAborted()) { if (infoStream != null) Message("commitMerge: skipping merge " + merge.SegString(directory) + ": it was aborted"); deleter.Refresh(merge.info.name); return false; } int start = EnsureContiguousMerge(merge); CommitMergedDeletes(merge); docWriter.RemapDeletes(segmentInfos, merger.GetDocMaps(), merger.GetDelCounts(), merge, mergedDocCount); // Simple optimization: if the doc store we are using // has been closed and is in now compound format (but // wasn't when we started), then we will switch to the // compound format as well: string mergeDocStoreSegment = merge.info.GetDocStoreSegment(); if (mergeDocStoreSegment != null && !merge.info.GetDocStoreIsCompoundFile()) { int size = segmentInfos.Count; for (int i = 0; i < size; i++) { SegmentInfo info = segmentInfos.Info(i); string docStoreSegment = info.GetDocStoreSegment(); if (docStoreSegment != null && docStoreSegment.Equals(mergeDocStoreSegment) && info.GetDocStoreIsCompoundFile()) { merge.info.SetDocStoreIsCompoundFile(true); break; } } } merge.info.SetHasProx(merger.HasProx()); //segmentInfos.RemoveRange(start, start + merge.segments.Count); segmentInfos.RemoveRange(start, merge.segments.Count); System.Diagnostics.Debug.Assert(!segmentInfos.Contains(merge.info)); segmentInfos.Insert(start, merge.info); // Must checkpoint before decrefing so any newly // referenced files in the new merge.info are incref'd // first: Checkpoint(); DecrefMergeSegments(merge); if (merge.optimize) segmentsToOptimize[merge.info] = merge.info; return true; } } private void DecrefMergeSegments(MergePolicy.OneMerge merge) { SegmentInfos sourceSegmentsClone = merge.segmentsClone; int numSegmentsToMerge = sourceSegmentsClone.Count; System.Diagnostics.Debug.Assert(merge.increfDone); merge.increfDone = false; for (int i = 0; i < numSegmentsToMerge; i++) { SegmentInfo previousInfo = sourceSegmentsClone.Info(i); // Decref all files for this SegmentInfo (this // matches the incref in mergeInit): if (previousInfo.dir == directory) deleter.DecRef(previousInfo.Files()); } } private void HandleMergeException(System.Exception t, MergePolicy.OneMerge merge) { // Set the exception on the merge, so if // Optimize() is waiting on us it sees the root // cause exception: merge.SetException(t); AddMergeException(merge); if (t is MergePolicy.MergeAbortedException) { // We can ignore this exception (it happens when // close(false) or rollback is called), unless the // merge involves segments from external directories, // in which case we must throw it so, for example, the // rollbackTransaction code in AddIndexes* is // executed. if (merge.isExternal) throw (MergePolicy.MergeAbortedException)t; } else if (t is System.IO.IOException) throw (System.IO.IOException)t; else if (t is System.SystemException) throw (System.SystemException)t; else if (t is System.Exception) throw (System.Exception)t; else // Should not get here throw new System.SystemException(null, t); } /** * Merges the indicated segments, replacing them in the stack with a * single segment. */ public /* for nunit test, was: internal */ void Merge(MergePolicy.OneMerge merge) { bool success = false; try { try { try { MergeInit(merge); if (infoStream != null) Message("now merge\n merge=" + merge.SegString(directory) + "\n merge=" + merge + "\n index=" + SegString()); MergeMiddle(merge); success = true; } catch (System.Exception t) { HandleMergeException(t, merge); } } finally { lock (this) { try { MergeFinish(merge); if (!success) { if (infoStream != null) Message("hit exception during merge"); if (merge.info != null && !segmentInfos.Contains(merge.info)) deleter.Refresh(merge.info.name); } // This merge (and, generally, any change to the // segments) may now enable new merges, so we call // merge policy & update pending merges. if (success && !merge.IsAborted() && !closed && !closing) UpdatePendingMerges(merge.maxNumSegmentsOptimize, merge.optimize); } finally { runningMerges.Remove(merge); } } } } catch (System.OutOfMemoryException oom) { hitOOM = true; throw oom; } } /** Checks whether this merge involves any segments * already participating in a merge. If not, this merge * is "registered", meaning we record that its segments * are now participating in a merge, and true is * returned. Else (the merge conflicts) false is * returned. */ internal bool RegisterMerge(MergePolicy.OneMerge merge) { lock (this) { if (merge.registerDone) return true; if (stopMerges) { merge.Abort(); throw new MergePolicy.MergeAbortedException("merge is aborted: " + merge.SegString(directory)); } int count = merge.segments.Count; bool isExternal = false; for (int i = 0; i < count; i++) { SegmentInfo info = merge.segments.Info(i); if (mergingSegments.ContainsKey(info)) return false; if (segmentInfos.IndexOf(info) == -1) return false; if (info.dir != directory) isExternal = true; } EnsureContiguousMerge(merge); pendingMerges.Add(merge); if (infoStream != null) Message("add merge to pendingMerges: " + merge.SegString(directory) + " [total " + pendingMerges.Count + " pending]"); merge.mergeGen = mergeGen; merge.isExternal = isExternal; // OK it does not conflict; now record that this merge // is running (while synchronized) to avoid race // condition where two conflicting merges from different // threads, start for (int i = 0; i < count; i++) mergingSegments[merge.segments.Info(i)] = merge.segments.Info(i); // Merge is now registered merge.registerDone = true; return true; } } /** Does initial setup for a merge, which is fast but holds * the synchronized lock on IndexWriter instance. */ internal void MergeInit(MergePolicy.OneMerge merge) { lock (this) { bool success = false; try { _MergeInit(merge); success = true; } finally { if (!success) { MergeFinish(merge); runningMerges.Remove(merge); } } } } private void _MergeInit(MergePolicy.OneMerge merge) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startMergeInit")); System.Diagnostics.Debug.Assert(merge.registerDone); System.Diagnostics.Debug.Assert(!merge.optimize || merge.maxNumSegmentsOptimize > 0); if (merge.info != null) // mergeInit already done return; if (merge.IsAborted()) return; bool changed = ApplyDeletes(); // If autoCommit == true then all deletes should have // been flushed when we flushed the last segment System.Diagnostics.Debug.Assert(!changed || !autoCommit); SegmentInfos sourceSegments = merge.segments; int end = sourceSegments.Count; // Check whether this merge will allow us to skip // merging the doc stores (stored field & vectors). // This is a very substantial optimization (saves tons // of IO) that can only be applied with // autoCommit=false. Directory lastDir = directory; string lastDocStoreSegment = null; int next = -1; bool mergeDocStores = false; bool doFlushDocStore = false; string currentDocStoreSegment = docWriter.GetDocStoreSegment(); // Test each segment to be merged: check if we need to // flush/merge doc stores for (int i = 0; i < end; i++) { SegmentInfo si = sourceSegments.Info(i); // If it has deletions we must merge the doc stores if (si.HasDeletions()) mergeDocStores = true; // If it has its own (private) doc stores we must // merge the doc stores if (-1 == si.GetDocStoreOffset()) mergeDocStores = true; // If it has a different doc store segment than // previous segments, we must merge the doc stores string docStoreSegment_Renamed = si.GetDocStoreSegment(); if (docStoreSegment_Renamed == null) mergeDocStores = true; else if (lastDocStoreSegment == null) lastDocStoreSegment = docStoreSegment_Renamed; else if (!lastDocStoreSegment.Equals(docStoreSegment_Renamed)) mergeDocStores = true; // Segments' docScoreOffsets must be in-order, // contiguous. For the default merge policy now // this will always be the case but for an arbitrary // merge policy this may not be the case if (-1 == next) next = si.GetDocStoreOffset() + si.docCount; else if (next != si.GetDocStoreOffset()) mergeDocStores = true; else next = si.GetDocStoreOffset() + si.docCount; // If the segment comes from a different directory // we must merge if (lastDir != si.dir) mergeDocStores = true; // If the segment is referencing the current "live" // doc store outputs then we must merge if (si.GetDocStoreOffset() != -1 && currentDocStoreSegment != null && si.GetDocStoreSegment().Equals(currentDocStoreSegment)) { doFlushDocStore = true; } } int docStoreOffset; string docStoreSegment; bool docStoreIsCompoundFile; if (mergeDocStores) { docStoreOffset = -1; docStoreSegment = null; docStoreIsCompoundFile = false; } else { SegmentInfo si = sourceSegments.Info(0); docStoreOffset = si.GetDocStoreOffset(); docStoreSegment = si.GetDocStoreSegment(); docStoreIsCompoundFile = si.GetDocStoreIsCompoundFile(); } if (mergeDocStores && doFlushDocStore) { // SegmentMerger intends to merge the doc stores // (stored fields, vectors), and at least one of the // segments to be merged refers to the currently // live doc stores. // TODO: if we know we are about to merge away these // newly flushed doc store files then we should not // make compound file out of them... if (infoStream != null) Message("now flush at merge"); DoFlush(true, false); //Flush(false, true, false); } // We must take a full copy at this point so that we can // properly merge deletes in CommitMerge() merge.segmentsClone = (SegmentInfos)merge.segments.Clone(); for (int i = 0; i < end; i++) { SegmentInfo si = merge.segmentsClone.Info(i); // IncRef all files for this segment info to make sure // they are not removed while we are trying to merge. if (si.dir == directory) deleter.IncRef(si.Files()); } merge.increfDone = true; merge.mergeDocStores = mergeDocStores; // Bind a new segment name here so even with // ConcurrentMergePolicy we keep deterministic segment // names. merge.info = new SegmentInfo(NewSegmentName(), 0, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, false); // Also enroll the merged segment into mergingSegments; // this prevents it from getting selected for a merge // after our merge is done but while we are building the // CFS: mergingSegments[merge.info] = merge.info; } } /** This is called after merging a segment and before * building its CFS. Return true if the files should be * sync'd. If you return false, then the source segment * files that were merged cannot be deleted until the CFS * file is built & sync'd. So, returning false consumes * more transient disk space, but saves performance of * not having to sync files which will shortly be deleted * anyway. * @deprecated -- this will be removed in 3.0 when * autoCommit is hardwired to false */ [System.Obsolete("this will be removed in 3.0 when autoCommit is hardwired to false")] private bool DoCommitBeforeMergeCFS(MergePolicy.OneMerge merge) { lock (this) { long freeableBytes = 0; int size = merge.segments.Count; for (int i = 0; i < size; i++) { SegmentInfo info = merge.segments.Info(i); // It's only important to sync if the most recent // commit actually references this segment, because if // it doesn't, even without syncing we will free up // the disk space: if (rollbackSegments.ContainsKey(info)) { int loc = rollbackSegments[info]; SegmentInfo oldInfo = rollbackSegmentInfos.Info(loc); if (oldInfo.GetUseCompoundFile() != info.GetUseCompoundFile()) freeableBytes += info.SizeInBytes(); } } // If we would free up more than 1/3rd of the index by // committing now, then do so: long totalBytes = 0; int numSegments = segmentInfos.Count; for (int i = 0; i < numSegments; i++) totalBytes += segmentInfos.Info(i).SizeInBytes(); if (3 * freeableBytes > totalBytes) return true; else return false; } } /** Does fininishing for a merge, which is fast but holds * the synchronized lock on IndexWriter instance. */ internal void MergeFinish(MergePolicy.OneMerge merge) { lock (this) { // Optimize, AddIndexes or finishMerges may be waiting // on merges to finish. System.Threading.Monitor.PulseAll(this); if (merge.increfDone) DecrefMergeSegments(merge); System.Diagnostics.Debug.Assert(merge.registerDone); SegmentInfos sourceSegments = merge.segments; int end = sourceSegments.Count; for (int i = 0; i < end; i++) mergingSegments.Remove(sourceSegments.Info(i)); if (merge.info != null) mergingSegments.Remove(merge.info); merge.registerDone = false; } } /** Does the actual (time-consuming) work of the merge, * but without holding synchronized lock on IndexWriter * instance */ private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); string mergedName = merge.info.name; SegmentMerger merger = null; int mergedDocCount = 0; SegmentInfos sourceSegments = merge.segments; SegmentInfos sourceSegmentsClone = merge.segmentsClone; int numSegments = sourceSegments.Count; if (infoStream != null) Message("merging " + merge.SegString(directory)); merger = new SegmentMerger(this, mergedName, merge); bool success = false; // This is try/finally to make sure merger's readers are // closed: try { int totDocCount = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo si = sourceSegmentsClone.Info(i); IndexReader reader = SegmentReader.Get(true, si, MERGE_READ_BUFFER_SIZE, merge.mergeDocStores); // no need to set deleter (yet) merger.Add(reader); totDocCount += reader.NumDocs(); } if (infoStream != null) { Message("merge: total " + totDocCount + " docs"); } merge.CheckAborted(directory); // This is where all the work happens: mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores); System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount); success = true; } finally { // close readers before we attempt to delete // now-obsolete segments if (merger != null) { merger.CloseReaders(); } } if (!CommitMerge(merge, merger, mergedDocCount)) // commitMerge will return false if this merge was aborted return 0; if (merge.useCompoundFile) { // Maybe force a sync here to allow reclaiming of the // disk space used by the segments we just merged: if (autoCommit && DoCommitBeforeMergeCFS(merge)) { long size; lock (this) { size = merge.info.SizeInBytes(); } Commit(size); } success = false; string compoundFileName = mergedName + "." + IndexFileNames.COMPOUND_FILE_EXTENSION; try { merger.CreateCompoundFile(compoundFileName); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (merge.IsAborted()) { // This can happen if rollback or close(false) // is called -- fall through to logic below to // remove the partially created CFS: success = true; } else HandleMergeException(ioe, merge); } } catch (System.Exception t) { HandleMergeException(t, merge); } finally { if (!success) { if (infoStream != null) Message("hit exception creating compound file during merge"); lock (this) { deleter.DeleteFile(compoundFileName); } } } if (merge.IsAborted()) { if (infoStream != null) Message("abort merge after building CFS"); deleter.DeleteFile(compoundFileName); return 0; } lock (this) { if (segmentInfos.IndexOf(merge.info) == -1 || merge.IsAborted()) { // Our segment (committed in non-compound // format) got merged away while we were // building the compound format. deleter.DeleteFile(compoundFileName); } else { merge.info.SetUseCompoundFile(true); Checkpoint(); } } } // Force a sync after commiting the merge. Once this // sync completes then all index files referenced by the // current segmentInfos are on stable storage so if the // OS/machine crashes, or power cord is yanked, the // index will be intact. Note that this is just one // (somewhat arbitrary) policy; we could try other // policies like only sync if it's been > X minutes or // more than Y bytes have been written, etc. if (autoCommit) { long size; lock (this) { size = merge.info.SizeInBytes(); } Commit(size); } return mergedDocCount; } internal void AddMergeException(MergePolicy.OneMerge merge) { lock (this) { System.Diagnostics.Debug.Assert(merge.GetException() != null); if (!mergeExceptions.Contains(merge) && mergeGen == merge.mergeGen) mergeExceptions.Add(merge); } } // Apply buffered deletes to all segments. private bool ApplyDeletes() { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startApplyDeletes")); SegmentInfos rollback = (SegmentInfos)segmentInfos.Clone(); bool success = false; bool changed; try { changed = docWriter.ApplyDeletes(segmentInfos); success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception flushing deletes"); // Carefully remove any partially written .del // files int size = rollback.Count; for (int i = 0; i < size; i++) { string newDelFileName = segmentInfos.Info(i).GetDelFileName(); string delFileName = rollback.Info(i).GetDelFileName(); if (newDelFileName != null && !newDelFileName.Equals(delFileName)) deleter.DeleteFile(newDelFileName); } // Fully replace the segmentInfos since flushed // deletes could have changed any of the // SegmentInfo instances: segmentInfos.Clear(); for (int i = 0; i < rollback.Count; i++) segmentInfos.Add(rollback.Info(i)); } } if (changed) Checkpoint(); return changed; } } // For test purposes. public int GetBufferedDeleteTermsSize() { lock (this) { return docWriter.GetBufferedDeleteTerms().Count; } } // For test purposes. public int GetNumBufferedDeleteTerms() { lock (this) { return docWriter.GetNumBufferedDeleteTerms(); } } // utility routines for tests public SegmentInfo NewestSegment() { return segmentInfos.Info(segmentInfos.Count - 1); } public string SegString() { lock (this) { return SegString(segmentInfos); } } private string SegString(SegmentInfos infos) { lock (this) { System.Text.StringBuilder buffer = new System.Text.StringBuilder(); int count = infos.Count; for (int i = 0; i < count; i++) { if (i > 0) { buffer.Append(' '); } SegmentInfo info = infos.Info(i); buffer.Append(info.SegString(directory)); if (info.dir != directory) buffer.Append("**"); } return buffer.ToString(); } } // Files that have been sync'd already private Dictionary synced = new Dictionary(); // Files that are now being sync'd private Dictionary syncing = new Dictionary(); private bool StartSync(string fileName, ICollection pending) { lock (synced) { if (!synced.ContainsKey(fileName)) { if (!syncing.ContainsKey(fileName)) { syncing[fileName] = fileName; return true; } else { pending.Add(fileName); return false; } } else return false; } } private void FinishSync(string fileName, bool success) { lock (synced) { System.Diagnostics.Debug.Assert(syncing.ContainsKey(fileName)); syncing.Remove(fileName); if (success) synced[fileName] = fileName; System.Threading.Monitor.PulseAll(synced); } } /** Blocks until all files in syncing are sync'd */ private bool WaitForAllSynced(ICollection syncing) { lock (synced) { IEnumerator it = syncing.GetEnumerator(); while (it.MoveNext()) { string fileName = it.Current; while (!synced.ContainsKey(fileName)) { if (!syncing.Contains(fileName)) // There was an error because a file that was // previously syncing failed to appear in synced return false; else try { System.Threading.Monitor.Wait(synced); } catch (System.Threading.ThreadInterruptedException) { continue; } } } return true; } } /** Pauses before syncing. On Windows, at least, it's * best (performance-wise) to pause in order to let OS * flush writes to disk on its own, before forcing a * sync. */ [System.Obsolete("this will be removed in 3.0 when autoCommit is hardwired to false")] private void SyncPause(long sizeInBytes) { if (mergeScheduler is ConcurrentMergeScheduler && maxSyncPauseSeconds > 0) { // Rough heuristic: for every 10 MB, we pause for 1 // second, up until the max long pauseTime = (long)(1000 * sizeInBytes / 10 / 1024 / 1024); long maxPauseTime = (long)(maxSyncPauseSeconds * 1000); if (pauseTime > maxPauseTime) pauseTime = maxPauseTime; int sleepCount = (int)(pauseTime / 100); for (int i = 0; i < sleepCount; i++) { lock (this) { if (stopMerges || closing) break; } try { System.Threading.Thread.Sleep(100); } catch (System.Threading.ThreadInterruptedException) { System.Threading.Thread.CurrentThread.Interrupt(); } } } } private void DoWait() { lock (this) { try { // NOTE: the callers of this method should in theory // be able to do simply wait(), but, as a defense // against thread timing hazards where notifyAll() // falls to be called, we wait for at most 1 second // and then return so caller can check if wait // conditions are satisified: System.Threading.Monitor.Wait(this, 1000); } catch (System.Threading.ThreadInterruptedException) { System.Threading.Thread.CurrentThread.Interrupt(); } } } /** Walk through all files referenced by the current * segmentInfos and ask the Directory to sync each file, * if it wasn't already. If that succeeds, then we * prepare a new segments_N file but do not fully commit * it. */ private void StartCommit(long sizeInBytes) { System.Diagnostics.Debug.Assert(TestPoint("startStartCommit")); if (hitOOM) return; try { if (infoStream != null) Message("StartCommit(): start sizeInBytes=" + sizeInBytes); if (sizeInBytes > 0) SyncPause(sizeInBytes); SegmentInfos toSync = null; long myChangeCount; lock (this) { // sizeInBytes > 0 means this is an autoCommit at // the end of a merge. If at this point stopMerges // is true (which means a rollback() or // RollbackTransaction() is waiting for us to // finish), we skip the commit to avoid deadlock if (sizeInBytes > 0 && stopMerges) return; // Wait for any running AddIndexes to complete // first, then block any from running until we've // copied the segmentInfos we intend to sync: BlockAddIndexes(false); System.Diagnostics.Debug.Assert(!HasExternalSegments()); try { System.Diagnostics.Debug.Assert(lastCommitChangeCount <= changeCount); if (changeCount == lastCommitChangeCount) { if (infoStream != null) Message(" skip StartCommit(): no changes pending"); return; } // First, we clone & incref the segmentInfos we intend // to sync, then, without locking, we sync() each file // referenced by toSync, in the background. Multiple // threads can be doing this at once, if say a large // merge and a small merge finish at the same time: if (infoStream != null) Message("startCommit index=" + SegString(segmentInfos) + " changeCount=" + changeCount); toSync = (SegmentInfos)segmentInfos.Clone(); deleter.IncRef(toSync, false); myChangeCount = changeCount; } finally { ResumeAddIndexes(); } } System.Diagnostics.Debug.Assert(TestPoint("midStartCommit")); bool setPending = false; try { // Loop until all files toSync references are sync'd: while (true) { ICollection pending = new List(); for (int i = 0; i < toSync.Count; i++) { SegmentInfo info = toSync.Info(i); List files = info.Files(); for (int j = 0; j < files.Count; j++) { string fileName = files[j]; if (StartSync(fileName, pending)) { bool success = false; try { // Because we incRef'd this commit point, above, // the file had better exist: System.Diagnostics.Debug.Assert(directory.FileExists(fileName), "file '" + fileName + "' does not exist dir=" + directory); Message("now sync " + fileName); directory.Sync(fileName); success = true; } finally { FinishSync(fileName, success); } } } } // All files that I require are either synced or being // synced by other threads. If they are being synced, // we must at this point block until they are done. // If this returns false, that means an error in // another thread resulted in failing to actually // sync one of our files, so we repeat: if (WaitForAllSynced(pending)) break; } System.Diagnostics.Debug.Assert(TestPoint("midStartCommit2")); lock (this) { // If someone saved a newer version of segments file // since I first started syncing my version, I can // safely skip saving myself since I've been // superseded: if (myChangeCount > lastCommitChangeCount && (pendingCommit == null || myChangeCount > pendingCommitChangeCount)) { // Wait now for any current pending commit to complete: while (pendingCommit != null) { Message("wait for existing pendingCommit to finish..."); DoWait(); } if (segmentInfos.GetGeneration() > toSync.GetGeneration()) toSync.UpdateGeneration(segmentInfos); bool success = false; try { // Exception here means nothing is prepared // (this method unwinds everything it did on // an exception) try { toSync.PrepareCommit(directory); } finally { // Have our master segmentInfos record the // generations we just prepared. We do this // on error or success so we don't // double-write a segments_N file. segmentInfos.UpdateGeneration(toSync); } System.Diagnostics.Debug.Assert(pendingCommit == null); setPending = true; pendingCommit = toSync; // {{dougsale-2.4.0}}: // see pendingCommitChangeCount declaration pendingCommitChangeCount = (uint) myChangeCount; success = true; } finally { if (!success) Message("hit exception committing segments file"); } } else Message("sync superseded by newer infos"); } Message("done all syncs"); System.Diagnostics.Debug.Assert(TestPoint("midStartCommitSuccess")); } finally { lock (this) { if (!setPending) deleter.DecRef(toSync); } } } catch (System.OutOfMemoryException oom) { hitOOM = true; throw oom; } System.Diagnostics.Debug.Assert(TestPoint("finishStartCommit")); } /** * Returns true iff the index in the named directory is * currently locked. * @param directory the directory to check for a lock * @throws System.IO.IOException if there is a low-level IO error */ public static bool IsLocked(Directory directory) { return directory.MakeLock(WRITE_LOCK_NAME).IsLocked(); } /** * Returns true iff the index in the named directory is * currently locked. * @param directory the directory to check for a lock * @throws System.IO.IOException if there is a low-level IO error */ public static bool IsLocked(string directory) { Directory dir = FSDirectory.GetDirectory(directory); try { return IsLocked(dir); } finally { dir.Close(); } } /** * Forcibly unlocks the index in the named directory. *

* Caution: this should only be used by failure recovery code, * when it is known that no other process nor thread is in fact * currently accessing this index. */ public static void Unlock(Directory directory) { directory.MakeLock(IndexWriter.WRITE_LOCK_NAME).Release(); } /** * Specifies maximum field length in {@link IndexWriter} constructors. * {@link #setMaxFieldLength(int)} overrides the value set by * the constructor. */ public sealed class MaxFieldLength { private int limit; private string name; /** * Private type-safe-enum-pattern constructor. * * @param name instance name * @param limit maximum field length */ private MaxFieldLength(string name, int limit) { this.name = name; this.limit = limit; } /** * Public constructor to allow users to specify the maximum field size limit. * * @param limit The maximum field length */ public MaxFieldLength(int limit) : this("User-specified", limit) { } public int GetLimit() { return limit; } public override string ToString() { return name + ":" + limit; } /** Sets the maximum field length to {@link Integer#MAX_VALUE}. */ public static readonly MaxFieldLength UNLIMITED = new MaxFieldLength("UNLIMITED", int.MaxValue); /** * Sets the maximum field length to * {@link #DEFAULT_MAX_FIELD_LENGTH} * */ public static readonly MaxFieldLength LIMITED = new MaxFieldLength("LIMITED", DEFAULT_MAX_FIELD_LENGTH); } // Used only by assert for testing. Current points: // startDoFlush // startCommitMerge // startStartCommit // midStartCommit // midStartCommit2 // midStartCommitSuccess // finishStartCommit // startCommitMergeDeletes // startMergeInit // startApplyDeletes // DocumentsWriter.ThreadState.init start protected internal virtual bool TestPoint(string name) { return true; } } } ///

Merges all segments from an array of indexes into this index. // /// // ///

This may be used to parallelize batch indexing. A large document // /// collection can be broken into sub-collections. Each sub-collection can be // /// indexed in parallel, on a different thread, process or machine. The // /// complete index can then be created by merging sub-collection indexes // /// with this method. // /// // ///

NOTE: the index in each Directory must not be // /// changed (opened by a writer) while this method is // /// running. This method does not acquire a write lock in // /// each input Directory, so it is up to the caller to // /// enforce this. // /// // ///

NOTE: while this is running, any attempts to // /// add or delete documents (with another thread) will be // /// paused until this method completes. // /// // ///

After this completes, the index is optimized. // /// // ///

This method is transactional in how Exceptions are // /// handled: it does not commit a new segments_N file until // /// all indexes are added. This means if an Exception // /// occurs (for example disk full), then either no indexes // /// will have been added or they all will have been.

// /// // ///

If an Exception is hit, it's still possible that all // /// indexes were successfully added. This happens when the // /// Exception is hit when trying to build a CFS file. In // /// this case, one segment in the index will be in non-CFS // /// format, even when using compound file format.

// /// // ///

Also note that on an Exception, the index may still // /// have been partially or fully optimized even though none // /// of the input indexes were added.

// /// // ///

Note that this requires temporary free space in the // /// Directory up to 2X the sum of all input indexes // /// (including the starting index). If readers/searchers // /// are open against the starting index, then temporary // /// free space required will be higher by the size of the // /// starting index (see {@link #Optimize()} for details). // ///

// /// // ///

Once this completes, the final size of the index // /// will be less than the sum of all input index sizes // /// (including the starting index). It could be quite a // /// bit smaller (if there were many pending deletes) or // /// just slightly smaller.

// /// // ///

See LUCENE-702 // /// for details.

// ///
// /// CorruptIndexException if the index is corrupt // /// System.IO.IOException if there is a low-level IO error // public virtual void AddIndexes(Directory[] dirs) // { // EnsureOpen(); // // Do not allow add docs or deletes while we are running: // docWriter.PauseAllThreads(); // try // { // if (infoStream != null) // Message("flush at AddIndexes"); // Flush(); // bool success = false; // StartTransaction(); // try // { // lock (this) // { // for (int i = 0; i < dirs.Length; i++) // { // SegmentInfos sis = new SegmentInfos(); // read infos from dir // sis.Read(dirs[i]); // for (int j = 0; j < sis.Count; j++) // { // SegmentInfo info = sis.Info(j); // segmentInfos.Add(sis.Info(j)); // add each info // } // } // } // Optimize(); // success = true; // } // finally // { // if (success) // { // CommitTransaction(); // } // else // { // RollbackTransaction(); // } // } // } // catch (System.OutOfMemoryException oom) // { // hitOOM = true; // throw oom; // } // finally // { // docWriter.ResumeAllThreads(); // } // } // private void ResetMergeExceptions() // { // lock (this) // { // mergeExceptions = new System.Collections.ArrayList(); // mergeGen++; // } // } // /// Merges all segments from an array of indexes into this index. // ///

// /// This is similar to AddIndexes(Directory[]). However, no Optimize() // /// is called either at the beginning or at the end. Instead, merges // /// are carried out as necessary. // /// // ///

NOTE: the index in each Directory must not be // /// changed (opened by a writer) while this method is // /// running. This method does not acquire a write lock in // /// each input Directory, so it is up to the caller to // /// enforce this. // /// // ///

NOTE: while this is running, any attempts to // /// add or delete documents (with another thread) will be // /// paused until this method completes. // /// // ///

// /// This requires this index not be among those to be added, and the // /// upper bound* of those segment doc counts not exceed maxMergeDocs. // /// // ///

See {@link #AddIndexes(Directory[])} for // /// details on transactional semantics, temporary free // /// space required in the Directory, and non-CFS segments // /// on an Exception.

// ///
// /// CorruptIndexException if the index is corrupt // /// System.IO.IOException if there is a low-level IO error // public virtual void AddIndexesNoOptimize(Directory[] dirs) // { // EnsureOpen(); // // Do not allow add socs or deletes while we are running: // docWriter.PauseAllThreads(); // try // { // if (infoStream != null) // Message("flush at addIndexesNoOptimize"); // Flush(); // bool success = false; // StartTransaction(); // try // { // lock (this) // { // for (int i = 0; i < dirs.Length; i++) // { // if (directory == dirs[i]) // { // // cannot add this index: segments may be deleted in merge before added // throw new System.ArgumentException("Cannot add this index to itself"); // } // SegmentInfos sis = new SegmentInfos(); // read infos from dir // sis.Read(dirs[i]); // for (int j = 0; j < sis.Count; j++) // { // SegmentInfo info = sis.Info(j); // segmentInfos.Add(info); // add each info // } // } // } // MaybeMerge(); // // If after merging there remain segments in the index // // that are in a different directory, just copy these // // over into our index. This is necessary (before // // finishing the transaction) to avoid leaving the // // index in an unusable (inconsistent) state. // CopyExternalSegments(); // success = true; // } // finally // { // if (success) // { // CommitTransaction(); // } // else // { // RollbackTransaction(); // } // } // } // catch (System.OutOfMemoryException oom) // { // hitOOM = true; // throw oom; // } // finally // { // docWriter.ResumeAllThreads(); // } // } // /* If any of our segments are using a directory != ours // * then copy them over. Currently this is only used by // * AddIndexesNoOptimize(). */ // private void CopyExternalSegments() // { // bool any = false; // while (true) // { // SegmentInfo info = null; // MergePolicy.OneMerge merge = null; // lock (this) // { // int numSegments = segmentInfos.Count; // for (int i = 0; i < numSegments; i++) // { // info = segmentInfos.Info(i); // if (info.dir != directory) // { // merge = new MergePolicy.OneMerge(segmentInfos.Range(i, 1 + i), info.GetUseCompoundFile()); // break; // } // } // } // if (merge != null) // { // if (RegisterMerge(merge)) // { // pendingMerges.Remove(merge); // runningMerges.Add(merge, merge); // any = true; // Merge(merge); // } // else // { // // This means there is a bug in the // // MergeScheduler. MergeSchedulers in general are // // not allowed to run a merge involving segments // // external to this IndexWriter's directory in the // // background because this would put the index // // into an inconsistent state (where segmentInfos // // has been written with such external segments // // that an IndexReader would fail to load). // throw new MergePolicy.MergeException("segment \"" + info.name + " exists in external directory yet the MergeScheduler executed the merge in a separate thread"); // } // } // else // { // // No more external segments // break; // } // } // if (any) // // Sometimes, on copying an external segment over, // // more merges may become necessary: // mergeScheduler.Merge(this); // } // /// Merges the provided indexes into this index. // ///

After this completes, the index is optimized.

// ///

The provided IndexReaders are not closed.

// /// // ///

NOTE: the index in each Directory must not be // /// changed (opened by a writer) while this method is // /// running. Thiw method does not acquire a write lock in // /// each input Directory, so it is up to the caller to // /// enforce this. // ///

// /// // ///

NOTE: while this is running, any attempts to // /// add or delete documents (with another thread) will be // /// paused until this method completes.

// /// // ///

See {@link #AddIndexes(Directory[])} for // /// details on transactional semantics, temporary free // /// space required in the Directory, and non-CFS segments // /// on an Exception.

// ///
// /// CorruptIndexException if the index is corrupt // /// System.IO.IOException if there is a low-level IO error // public virtual void AddIndexes(IndexReader[] readers) // { // EnsureOpen(); // // Do not allow add docs or deletes while we are running: // docWriter.PauseAllThreads(); // try // { // Optimize(); // start with zero or 1 seg // string mergedName = NewSegmentName(); // SegmentMerger merger = new SegmentMerger(this, mergedName, null); // SegmentInfo info; // IndexReader sReader = null; // try // { // lock (this) // { // if (segmentInfos.Count == 1) // { // // add existing index, if any // sReader = SegmentReader.Get(segmentInfos.Info(0)); // merger.Add(sReader); // } // } // for (int i = 0; i < readers.Length; i++) // // add new indexes // merger.Add(readers[i]); // bool success = false; // StartTransaction(); // try // { // int docCount = merger.Merge(); // merge 'em // if (sReader != null) // { // sReader.Close(); // sReader = null; // } // lock (this) // { // segmentInfos.RemoveRange(0, segmentInfos.Count); // pop old infos & add new // info = new SegmentInfo(mergedName, docCount, directory, false, true, -1, null, false); // segmentInfos.Add(info); // } // success = true; // } // finally // { // if (!success) // { // if (infoStream != null) // Message("hit exception in AddIndexes during merge"); // RollbackTransaction(); // } // else // { // CommitTransaction(); // } // } // } // finally // { // if (sReader != null) // { // sReader.Close(); // } // } // if (mergePolicy is LogMergePolicy && GetUseCompoundFile()) // { // bool success = false; // StartTransaction(); // try // { // merger.CreateCompoundFile(mergedName + ".cfs"); // lock (this) // { // info.SetUseCompoundFile(true); // } // } // finally // { // if (!success) // { // if (infoStream != null) // Message("hit exception building compound file in AddIndexes during merge"); // RollbackTransaction(); // } // else // { // CommitTransaction(); // } // } // } // } // catch (System.OutOfMemoryException oom) // { // hitOOM = true; // throw oom; // } // finally // { // docWriter.ResumeAllThreads(); // } // } // // This is called after pending added and deleted // // documents have been flushed to the Directory but before // // the change is committed (new segments_N file written). // protected virtual void DoAfterFlush() // { // } // /// Flush all in-memory buffered updates (adds and deletes) // /// to the Directory. // ///

Note: if autoCommit=false, flushed data would still // /// not be visible to readers, until {@link #close} is called. // ///

// /// CorruptIndexException if the index is corrupt // /// System.IO.IOException if there is a low-level IO error // public void Flush() // { // Flush(true, false); // } // /// Flush all in-memory buffered udpates (adds and deletes) // /// to the Directory. // /// // /// if true, we may merge segments (if // /// deletes or docs were flushed) if necessary // /// // /// if false we are allowed to keep // /// doc stores open to share with the next segment // /// // public /*protected internal*/ void Flush(bool triggerMerge, bool flushDocStores) // { // EnsureOpen(); // if (DoFlush(flushDocStores) && triggerMerge) // MaybeMerge(); // } // private bool DoFlush(bool flushDocStores) // { // lock (this) // { // // Make sure no threads are actively adding a document // // Returns true if docWriter is currently aborting, in // // which case we skip flushing this segment // if (docWriter.PauseAllThreads()) // { // docWriter.ResumeAllThreads(); // return false; // } // try // { // SegmentInfo newSegment = null; // int numDocs = docWriter.GetNumDocsInRAM(); // // Always flush docs if there are any // bool flushDocs = numDocs > 0; // // With autoCommit=true we always must flush the doc // // stores when we flush // flushDocStores |= autoCommit; // string docStoreSegment = docWriter.GetDocStoreSegment(); // if (docStoreSegment == null) // flushDocStores = false; // // Always flush deletes if there are any delete terms. // // TODO: when autoCommit=false we don't have to flush // // deletes with every flushed segment; we can save // // CPU/IO by buffering longer & flushing deletes only // // when they are full or writer is being closed. We // // have to fix the "applyDeletesSelectively" logic to // // apply to more than just the last flushed segment // bool flushDeletes = docWriter.HasDeletes(); // if (infoStream != null) // { // Message(" flush: segment=" + docWriter.GetSegment() + " docStoreSegment=" + docWriter.GetDocStoreSegment() + " docStoreOffset=" + docWriter.GetDocStoreOffset() + " flushDocs=" + flushDocs + " flushDeletes=" + flushDeletes + " flushDocStores=" + flushDocStores + " numDocs=" + numDocs + " numBufDelTerms=" + docWriter.GetNumBufferedDeleteTerms()); // Message(" index before flush " + SegString()); // } // int docStoreOffset = docWriter.GetDocStoreOffset(); // // docStoreOffset should only be non-zero when // // autoCommit == false // System.Diagnostics.Debug.Assert(!autoCommit || 0 == docStoreOffset); // bool docStoreIsCompoundFile = false; // // Check if the doc stores must be separately flushed // // because other segments, besides the one we are about // // to flush, reference it // if (flushDocStores && (!flushDocs || !docWriter.GetSegment().Equals(docWriter.GetDocStoreSegment()))) // { // // We must separately flush the doc store // if (infoStream != null) // Message(" flush shared docStore segment " + docStoreSegment); // docStoreIsCompoundFile = FlushDocStores(); // flushDocStores = false; // } // string segment = docWriter.GetSegment(); // // If we are flushing docs, segment must not be null: // System.Diagnostics.Debug.Assert(segment != null || !flushDocs); // if (flushDocs || flushDeletes) // { // SegmentInfos rollback = null; // if (flushDeletes) // rollback = (SegmentInfos)segmentInfos.Clone(); // bool success = false; // try // { // if (flushDocs) // { // if (0 == docStoreOffset && flushDocStores) // { // // This means we are flushing private doc stores // // with this segment, so it will not be shared // // with other segments // System.Diagnostics.Debug.Assert(docStoreSegment != null); // System.Diagnostics.Debug.Assert(docStoreSegment.Equals(segment)); // docStoreOffset = -1; // docStoreIsCompoundFile = false; // docStoreSegment = null; // } // int flushedDocCount = docWriter.Flush(flushDocStores); // newSegment = new SegmentInfo(segment, flushedDocCount, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile); // segmentInfos.Add(newSegment); // } // if (flushDeletes) // { // // we should be able to change this so we can // // buffer deletes longer and then flush them to // // multiple flushed segments, when // // autoCommit=false // ApplyDeletes(flushDocs); // } // DoAfterFlush(); // Checkpoint(); // success = true; // } // finally // { // if (!success) // { // if (infoStream != null) // Message("hit exception flushing segment " + segment); // if (flushDeletes) // { // // Carefully check if any partial .del files // // should be removed: // int size = rollback.Count; // for (int i = 0; i < size; i++) // { // string newDelFileName = segmentInfos.Info(i).GetDelFileName(); // string delFileName = rollback.Info(i).GetDelFileName(); // if (newDelFileName != null && !newDelFileName.Equals(delFileName)) // deleter.DeleteFile(newDelFileName); // } // // Fully replace the segmentInfos since flushed // // deletes could have changed any of the // // SegmentInfo instances: // segmentInfos.Clear(); // segmentInfos.AddRange(rollback); // } // else // { // // Remove segment we added, if any: // if (newSegment != null && segmentInfos.Count > 0 && segmentInfos.Info(segmentInfos.Count - 1) == newSegment) // segmentInfos.RemoveAt(segmentInfos.Count - 1); // } // if (flushDocs) // docWriter.Abort(null); // DeletePartialSegmentsFile(); // deleter.Checkpoint(segmentInfos, false); // if (segment != null) // deleter.Refresh(segment); // } // } // deleter.Checkpoint(segmentInfos, autoCommit); // if (flushDocs && mergePolicy.UseCompoundFile(segmentInfos, newSegment)) // { // success = false; // try // { // docWriter.CreateCompoundFile(segment); // newSegment.SetUseCompoundFile(true); // Checkpoint(); // success = true; // } // finally // { // if (!success) // { // if (infoStream != null) // Message("hit exception creating compound file for newly flushed segment " + segment); // newSegment.SetUseCompoundFile(false); // deleter.DeleteFile(segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); // DeletePartialSegmentsFile(); // } // } // deleter.Checkpoint(segmentInfos, autoCommit); // } // return true; // } // else // { // return false; // } // } // catch (System.OutOfMemoryException oom) // { // hitOOM = true; // throw oom; // } // finally // { // docWriter.ClearFlushPending(); // docWriter.ResumeAllThreads(); // } // } // } // /// Expert: Return the total size of all index files currently cached in memory. // /// Useful for size management with flushRamDocs() // /// // public long RamSizeInBytes() // { // EnsureOpen(); // return docWriter.GetRAMUsed(); // } // /// Expert: Return the number of documents whose segments are currently cached in memory. // /// Useful when calling Flush() // /// // public int NumRamDocs() // { // lock (this) // { // EnsureOpen(); // return docWriter.GetNumDocsInRAM(); // } // } // private int EnsureContiguousMerge(MergePolicy.OneMerge merge) // { // int first = segmentInfos.IndexOf(merge.segments.Info(0)); // if (first == - 1) // throw new MergePolicy.MergeException("could not find segment " + merge.segments.Info(0).name + " in current segments"); // int numSegments = segmentInfos.Count; // int numSegmentsToMerge = merge.segments.Count; // for (int i = 0; i < numSegmentsToMerge; i++) // { // SegmentInfo info = merge.segments.Info(i); // if (first + i >= numSegments || !segmentInfos.Info(first + i).Equals(info)) // { // if (segmentInfos.IndexOf(info) == - 1) // throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the index"); // else // { // throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge + " vs " + SegString() + "), which IndexWriter (currently) cannot handle"); // } // } // } // return first; // } // /* FIXME if we want to support non-contiguous segment merges */ // private bool CommitMerge(MergePolicy.OneMerge merge) // { // lock (this) // { // System.Diagnostics.Debug.Assert(merge.registerDone); // if (hitOOM) // return false; // if (infoStream != null) // Message("CommitMerge: " + merge.SegString(directory)); // // If merge was explicitly aborted, or, if Abort() or // // RollbackTransaction() had been called since our merge // // started (which results in an unqualified // // deleter.Refresh() call that will remove any index // // file that current segments does not reference), we // // abort this merge // if (merge.IsAborted()) // { // if (infoStream != null) // Message("commitMerge: skipping merge " + merge.SegString(directory) + ": it was aborted"); // System.Diagnostics.Debug.Assert(merge.increfDone); // DecrefMergeSegments(merge); // deleter.Refresh(merge.info.name); // return false; // } // bool success = false; // int start; // try // { // SegmentInfos sourceSegmentsClone = merge.segmentsClone; // SegmentInfos sourceSegments = merge.segments; // start = EnsureContiguousMerge(merge); // if (infoStream != null) // Message("commitMerge " + merge.SegString(directory)); // // Carefully merge deletes that occurred after we // // started merging: // BitVector deletes = null; // int docUpto = 0; // int numSegmentsToMerge = sourceSegments.Count; // for (int i = 0; i < numSegmentsToMerge; i++) // { // SegmentInfo previousInfo = sourceSegmentsClone.Info(i); // SegmentInfo currentInfo = sourceSegments.Info(i); // System.Diagnostics.Debug.Assert(currentInfo.docCount == previousInfo.docCount); // int docCount = currentInfo.docCount; // if (previousInfo.HasDeletions()) // { // // There were deletes on this segment when the merge // // started. The merge has collapsed away those // // deletes, but, if new deletes were flushed since // // the merge started, we must now carefully keep any // // newly flushed deletes but mapping them to the new // // docIDs. // System.Diagnostics.Debug.Assert(currentInfo.HasDeletions()); // // Load deletes present @ start of merge, for this segment: // BitVector previousDeletes = new BitVector(previousInfo.dir, previousInfo.GetDelFileName()); // if (!currentInfo.GetDelFileName().Equals(previousInfo.GetDelFileName())) // { // // This means this segment has had new deletes // // committed since we started the merge, so we // // must merge them: // if (deletes == null) // deletes = new BitVector(merge.info.docCount); // BitVector currentDeletes = new BitVector(currentInfo.dir, currentInfo.GetDelFileName()); // for (int j = 0; j < docCount; j++) // { // if (previousDeletes.Get(j)) // System.Diagnostics.Debug.Assert(currentDeletes.Get(j)); // else // { // if (currentDeletes.Get(j)) // deletes.Set(docUpto); // docUpto++; // } // } // } // else // docUpto += docCount - previousDeletes.Count(); // } // else if (currentInfo.HasDeletions()) // { // // This segment had no deletes before but now it // // does: // if (deletes == null) // deletes = new BitVector(merge.info.docCount); // BitVector currentDeletes = new BitVector(directory, currentInfo.GetDelFileName()); // for (int j = 0; j < docCount; j++) // { // if (currentDeletes.Get(j)) // deletes.Set(docUpto); // docUpto++; // } // } // // No deletes before or after // else // docUpto += currentInfo.docCount; // merge.CheckAborted(directory); // } // if (deletes != null) // { // merge.info.AdvanceDelGen(); // deletes.Write(directory, merge.info.GetDelFileName()); // } // success = true; // } // finally // { // if (!success) // { // if (infoStream != null) // Message("hit exception creating merged deletes file"); // deleter.Refresh(merge.info.name); // } // } // // Simple optimization: if the doc store we are using // // has been closed and is in now compound format (but // // wasn't when we started), then we will switch to the // // compound format as well: // string mergeDocStoreSegment = merge.info.GetDocStoreSegment(); // if (mergeDocStoreSegment != null && !merge.info.GetDocStoreIsCompoundFile()) // { // int size = segmentInfos.Count; // for (int i = 0; i < size; i++) // { // SegmentInfo info = segmentInfos.Info(i); // string docStoreSegment = info.GetDocStoreSegment(); // if (docStoreSegment != null && docStoreSegment.Equals(mergeDocStoreSegment) && info.GetDocStoreIsCompoundFile()) // { // merge.info.SetDocStoreIsCompoundFile(true); // break; // } // } // } // success = false; // SegmentInfos rollback = null; // try // { // rollback = (SegmentInfos) segmentInfos.Clone(); // ((System.Collections.IList) ((System.Collections.ArrayList) segmentInfos).GetRange(start, start + merge.segments.Count - start)).Clear(); // segmentInfos.Insert(start, merge.info); // Checkpoint(); // success = true; // } // finally // { // if (!success && rollback != null) // { // if (infoStream != null) // Message("hit exception when checkpointing after merge"); // segmentInfos.Clear(); // segmentInfos.AddRange(rollback); // DeletePartialSegmentsFile(); // deleter.Refresh(merge.info.name); // } // } // if (merge.optimize) // segmentsToOptimize.Add(merge.info, merge.info); // // Must checkpoint before decrefing so any newly // // referenced files in the new merge.info are incref'd // // first: // deleter.Checkpoint(segmentInfos, autoCommit); // DecrefMergeSegments(merge); // return true; // } // } // private void DecrefMergeSegments(MergePolicy.OneMerge merge) // { // SegmentInfos sourceSegmentsClone = merge.segmentsClone; // int numSegmentsToMerge = sourceSegmentsClone.Count; // System.Diagnostics.Debug.Assert(merge.increfDone); // merge.increfDone = false; // for (int i = 0; i < numSegmentsToMerge; i++) // { // SegmentInfo previousInfo = sourceSegmentsClone.Info(i); // // Decref all files for this SegmentInfo (this // // matches the incref in mergeInit): // if (previousInfo.dir == directory) // deleter.DecRef(previousInfo.Files()); // } // } // /// Merges the indicated segments, replacing them in the stack with a // /// single segment. // /// // public /*internal*/ void Merge(MergePolicy.OneMerge merge) // { // System.Diagnostics.Debug.Assert(merge.registerDone); // System.Diagnostics.Debug.Assert(!merge.optimize || merge.maxNumSegmentsOptimize > 0); // bool success = false; // try // { // try // { // try // { // if (merge.info == null) // MergeInit(merge); // if (infoStream != null) // Message("now merge\n merge=" + merge.SegString(directory) + "\n index=" + SegString()); // MergeMiddle(merge); // success = true; // } // catch (MergePolicy.MergeAbortedException e) // { // merge.SetException(e); // AddMergeException(merge); // // We can ignore this exception, unless the merge // // involves segments from external directories, in // // which case we must throw it so, for example, the // // rollbackTransaction code in AddIndexes* is // // executed. // if (merge.isExternal) // throw e; // } // } // finally // { // lock (this) // { // try // { // MergeFinish(merge); // if (!success) // { // if (infoStream != null) // Message("hit exception during merge"); // AddMergeException(merge); // if (merge.info != null && !segmentInfos.Contains(merge.info)) // deleter.Refresh(merge.info.name); // } // // This merge (and, generally, any change to the // // segments) may now enable new merges, so we call // // merge policy & update pending merges. // if (success && !merge.IsAborted() && !closed && !closing) // UpdatePendingMerges(merge.maxNumSegmentsOptimize, merge.optimize); // } // finally // { // runningMerges.Remove(merge); // // Optimize may be waiting on the final optimize // // merge to finish; and finishMerges() may be // // waiting for all merges to finish: // System.Threading.Monitor.PulseAll(this); // } // } // } // } // catch (System.OutOfMemoryException oom) // { // hitOOM = true; // throw oom; // } // } // /// Checks whether this merge involves any segments // /// already participating in a merge. If not, this merge // /// is "registered", meaning we record that its segments // /// are now participating in a merge, and true is // /// returned. Else (the merge conflicts) false is // /// returned. // /// // internal bool RegisterMerge(MergePolicy.OneMerge merge) // { // lock (this) // { // if (merge.registerDone) // return true; // int count = merge.segments.Count; // bool isExternal = false; // for (int i = 0; i < count; i++) // { // SegmentInfo info = merge.segments.Info(i); // if (mergingSegments.Contains(info)) // return false; // if (segmentInfos.IndexOf(info) == - 1) // return false; // if (info.dir != directory) // isExternal = true; // } // pendingMerges.Add(merge); // if (infoStream != null) // Message("add merge to pendingMerges: " + merge.SegString(directory) + " [total " + pendingMerges.Count + " pending]"); // merge.mergeGen = mergeGen; // merge.isExternal = isExternal; // // OK it does not conflict; now record that this merge // // is running (while synchronized) to avoid race // // condition where two conflicting merges from different // // threads, start // for (int i = 0; i < count; i++) // if (!mergingSegments.Contains(merge.segments.Info(i))) // mergingSegments.Add(merge.segments.Info(i), merge.segments.Info(i)); // // Merge is now registered // merge.registerDone = true; // return true; // } // } // /// Does initial setup for a merge, which is fast but holds // /// the synchronized lock on IndexWriter instance. // /// // internal void MergeInit(MergePolicy.OneMerge merge) // { // lock (this) // { // bool success = false; // try // { // _MergeInit(merge); // success = true; // } // finally // { // if (!success) // { // MergeFinish(merge); // runningMerges.Remove(merge); // } // } // } // } // internal void _MergeInit(MergePolicy.OneMerge merge) // { // lock (this) // { // System.Diagnostics.Debug.Assert(TestPoint("startMergeInit")); // System.Diagnostics.Debug.Assert(merge.registerDone); // if (merge.info != null) // // mergeInit already done // return; // if (merge.IsAborted()) // return ; // SegmentInfos sourceSegments = merge.segments; // int end = sourceSegments.Count; // EnsureContiguousMerge(merge); // // Check whether this merge will allow us to skip // // merging the doc stores (stored field & vectors). // // This is a very substantial optimization (saves tons // // of IO) that can only be applied with // // autoCommit=false. // Directory lastDir = directory; // string lastDocStoreSegment = null; // int next = - 1; // bool mergeDocStores = false; // bool doFlushDocStore = false; // string currentDocStoreSegment = docWriter.GetDocStoreSegment(); // // Test each segment to be merged: check if we need to // // flush/merge doc stores // for (int i = 0; i < end; i++) // { // SegmentInfo si = sourceSegments.Info(i); // // If it has deletions we must merge the doc stores // if (si.HasDeletions()) // mergeDocStores = true; // // If it has its own (private) doc stores we must // // merge the doc stores // if (- 1 == si.GetDocStoreOffset()) // mergeDocStores = true; // // If it has a different doc store segment than // // previous segments, we must merge the doc stores // string docStoreSegment = si.GetDocStoreSegment(); // if (docStoreSegment == null) // mergeDocStores = true; // else if (lastDocStoreSegment == null) // lastDocStoreSegment = docStoreSegment; // else if (!lastDocStoreSegment.Equals(docStoreSegment)) // mergeDocStores = true; // // Segments' docScoreOffsets must be in-order, // // contiguous. For the default merge policy now // // this will always be the case but for an arbitrary // // merge policy this may not be the case // if (- 1 == next) // next = si.GetDocStoreOffset() + si.docCount; // else if (next != si.GetDocStoreOffset()) // mergeDocStores = true; // else // next = si.GetDocStoreOffset() + si.docCount; // // If the segment comes from a different directory // // we must merge // if (lastDir != si.dir) // mergeDocStores = true; // // If the segment is referencing the current "live" // // doc store outputs then we must merge // if (si.GetDocStoreOffset() != - 1 && currentDocStoreSegment != null && si.GetDocStoreSegment().Equals(currentDocStoreSegment)) // doFlushDocStore = true; // } // int docStoreOffset; // string docStoreSegment2; // bool docStoreIsCompoundFile; // if (mergeDocStores) // { // docStoreOffset = - 1; // docStoreSegment2 = null; // docStoreIsCompoundFile = false; // } // else // { // SegmentInfo si = sourceSegments.Info(0); // docStoreOffset = si.GetDocStoreOffset(); // docStoreSegment2 = si.GetDocStoreSegment(); // docStoreIsCompoundFile = si.GetDocStoreIsCompoundFile(); // } // if (mergeDocStores && doFlushDocStore) // { // // SegmentMerger intends to merge the doc stores // // (stored fields, vectors), and at least one of the // // segments to be merged refers to the currently // // live doc stores. // // TODO: if we know we are about to merge away these // // newly flushed doc store files then we should not // // make compound file out of them... // if (infoStream != null) // Message("flush at merge"); // Flush(false, true); // } // // We must take a full copy at this point so that we can // // properly merge deletes in CommitMerge() // merge.segmentsClone = (SegmentInfos) merge.segments.Clone(); // for (int i = 0; i < end; i++) // { // SegmentInfo si = merge.segmentsClone.Info(i); // // IncRef all files for this segment info to make sure // // they are not removed while we are trying to merge. // if (si.dir == directory) // deleter.IncRef(si.Files()); // } // merge.increfDone = true; // merge.mergeDocStores = mergeDocStores; // // Bind a new segment name here so even with // // ConcurrentMergePolicy we keep deterministic segment // // names. // merge.info = new SegmentInfo(NewSegmentName(), 0, directory, false, true, docStoreOffset, docStoreSegment2, docStoreIsCompoundFile); // // Also enroll the merged segment into mergingSegments; // // this prevents it from getting selected for a merge // // after our merge is done but while we are building the // // CFS: // mergingSegments.Add(merge.info, merge.info); // } // } // /// Does fininishing for a merge, which is fast but holds // /// the synchronized lock on IndexWriter instance. // /// // internal void MergeFinish(MergePolicy.OneMerge merge) // { // lock (this) // { // if (merge.increfDone) // DecrefMergeSegments(merge); // System.Diagnostics.Debug.Assert(merge.registerDone); // SegmentInfos sourceSegments = merge.segments; // int end = sourceSegments.Count; // for (int i = 0; i < end; i++) // mergingSegments.Remove(sourceSegments.Info(i)); // if (merge.info != null) // mergingSegments.Remove(merge.info); // merge.registerDone = false; // } // } // /// Does the actual (time-consuming) work of the merge, // /// but without holding synchronized lock on IndexWriter // /// instance // /// // private int MergeMiddle(MergePolicy.OneMerge merge) // { // merge.CheckAborted(directory); // string mergedName = merge.info.name; // SegmentMerger merger = null; // int mergedDocCount = 0; // SegmentInfos sourceSegments = merge.segments; // SegmentInfos sourceSegmentsClone = merge.segmentsClone; // int numSegments = sourceSegments.Count; // if (infoStream != null) // Message("merging " + merge.SegString(directory)); // merger = new SegmentMerger(this, mergedName, merge); // // This is try/finally to make sure merger's readers are // // closed: // bool success = false; // try // { // int totDocCount = 0; // for (int i = 0; i < numSegments; i++) // { // SegmentInfo si = sourceSegmentsClone.Info(i); // IndexReader reader = SegmentReader.Get(si, MERGE_READ_BUFFER_SIZE, merge.mergeDocStores); // no need to set deleter (yet) // merger.Add(reader); // totDocCount += reader.NumDocs(); // } // if (infoStream != null) // { // Message("merge: total " + totDocCount + " docs"); // } // merge.CheckAborted(directory); // mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores); // System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount); // success = true; // } // finally // { // // close readers before we attempt to delete // // now-obsolete segments // if (merger != null) // { // merger.CloseReaders(); // } // if (!success) // { // if (infoStream != null) // Message("hit exception during merge; now refresh deleter on segment " + mergedName); // lock (this) // { // AddMergeException(merge); // deleter.Refresh(mergedName); // } // } // } // if (!CommitMerge(merge)) // // commitMerge will return false if this merge was aborted // return 0; // if (merge.useCompoundFile) // { // success = false; // bool skip = false; // string compoundFileName = mergedName + "." + IndexFileNames.COMPOUND_FILE_EXTENSION; // try // { // try // { // merger.CreateCompoundFile(compoundFileName); // success = true; // } // catch (System.IO.IOException ioe) // { // lock (this) // { // if (segmentInfos.IndexOf(merge.info) == - 1) // { // // If another merge kicked in and merged our // // new segment away while we were trying to // // build the compound file, we can hit a // // FileNotFoundException and possibly // // System.IO.IOException over NFS. We can tell this has // // happened because our SegmentInfo is no // // longer in the segments; if this has // // happened it is safe to ignore the exception // // & skip finishing/committing our compound // // file creating. // if (infoStream != null) // Message("hit exception creating compound file; ignoring it because our info (segment " + merge.info.name + ") has been merged away"); // skip = true; // } // else // throw ioe; // } // } // } // finally // { // if (!success) // { // if (infoStream != null) // Message("hit exception creating compound file during merge: skip=" + skip); // lock (this) // { // if (!skip) // AddMergeException(merge); // deleter.DeleteFile(compoundFileName); // } // } // } // if (!skip) // { // lock (this) // { // if (skip || segmentInfos.IndexOf(merge.info) == - 1 || merge.IsAborted()) // { // // Our segment (committed in non-compound // // format) got merged away while we were // // building the compound format. // deleter.DeleteFile(compoundFileName); // } // else // { // success = false; // try // { // merge.info.SetUseCompoundFile(true); // Checkpoint(); // success = true; // } // finally // { // if (!success) // { // if (infoStream != null) // Message("hit exception checkpointing compound file during merge"); // // Must rollback: // AddMergeException(merge); // merge.info.SetUseCompoundFile(false); // DeletePartialSegmentsFile(); // deleter.DeleteFile(compoundFileName); // } // } // // Give deleter a chance to remove files now. // deleter.Checkpoint(segmentInfos, autoCommit); // } // } // } // } // return mergedDocCount; // } // internal virtual void AddMergeException(MergePolicy.OneMerge merge) // { // lock (this) // { // if (!mergeExceptions.Contains(merge) && mergeGen == merge.mergeGen) // mergeExceptions.Add(merge); // } // } // private void DeletePartialSegmentsFile() // { // if (segmentInfos.GetLastGeneration() != segmentInfos.GetGeneration()) // { // string segmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", segmentInfos.GetGeneration()); // if (infoStream != null) // Message("now delete partial segments file \"" + segmentFileName + "\""); // deleter.DeleteFile(segmentFileName); // } // } // // Called during flush to apply any buffered deletes. If // // flushedNewSegment is true then a new segment was just // // created and flushed from the ram segments, so we will // // selectively apply the deletes to that new segment. // private void ApplyDeletes(bool flushedNewSegment) // { // System.Collections.Hashtable bufferedDeleteTerms = docWriter.GetBufferedDeleteTerms(); // System.Collections.IList bufferedDeleteDocIDs = docWriter.GetBufferedDeleteDocIDs(); // if (infoStream != null) // Message("flush " + docWriter.GetNumBufferedDeleteTerms() + " buffered deleted terms and " + bufferedDeleteDocIDs.Count + " deleted docIDs on " + segmentInfos.Count + " segments."); // if (flushedNewSegment) // { // IndexReader reader = null; // try // { // // Open readers w/o opening the stored fields / // // vectors because these files may still be held // // open for writing by docWriter // reader = SegmentReader.Get(segmentInfos.Info(segmentInfos.Count - 1), false); // // Apply delete terms to the segment just flushed from ram // // apply appropriately so that a delete term is only applied to // // the documents buffered before it, not those buffered after it. // ApplyDeletesSelectively(bufferedDeleteTerms, bufferedDeleteDocIDs, reader); // } // finally // { // if (reader != null) // { // try // { // reader.DoCommit(); // } // finally // { // reader.DoClose(); // } // } // } // } // int infosEnd = segmentInfos.Count; // if (flushedNewSegment) // { // infosEnd--; // } // for (int i = 0; i < infosEnd; i++) // { // IndexReader reader = null; // try // { // reader = SegmentReader.Get(segmentInfos.Info(i), false); // // Apply delete terms to disk segments // // except the one just flushed from ram. // ApplyDeletes(bufferedDeleteTerms, reader); // } // finally // { // if (reader != null) // { // try // { // reader.DoCommit(); // } // finally // { // reader.DoClose(); // } // } // } // } // // Clean up bufferedDeleteTerms. // docWriter.ClearBufferedDeletes(); // } // // For test purposes. // public /*internal*/ int GetBufferedDeleteTermsSize() // { // lock (this) // { // return docWriter.GetBufferedDeleteTerms().Count; // } // } // // For test purposes. // public /*internal*/ int GetNumBufferedDeleteTerms() // { // lock (this) // { // return docWriter.GetNumBufferedDeleteTerms(); // } // } // // Apply buffered delete terms to the segment just flushed from ram // // apply appropriately so that a delete term is only applied to // // the documents buffered before it, not those buffered after it. // private void ApplyDeletesSelectively(System.Collections.Hashtable deleteTerms, System.Collections.IList deleteIds, IndexReader reader) // { // System.Collections.IEnumerator iter = new System.Collections.Hashtable(deleteTerms).GetEnumerator(); // while (iter.MoveNext()) // { // System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current; // Term term = (Term) entry.Key; // TermDocs docs = reader.TermDocs(term); // if (docs != null) // { // int num = ((DocumentsWriter.Num) entry.Value).GetNum(); // try // { // while (docs.Next()) // { // int doc = docs.Doc(); // if (doc >= num) // { // break; // } // reader.DeleteDocument(doc); // } // } // finally // { // docs.Close(); // } // } // } // if (deleteIds.Count > 0) // { // iter = deleteIds.GetEnumerator(); // while (iter.MoveNext()) // { // reader.DeleteDocument(((System.Int32) iter.Current)); // } // } // } // // Apply buffered delete terms to this reader. // private void ApplyDeletes(System.Collections.Hashtable deleteTerms, IndexReader reader) // { // System.Collections.IEnumerator iter = new System.Collections.Hashtable(deleteTerms).GetEnumerator(); // while (iter.MoveNext()) // { // System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current; // reader.DeleteDocuments((Term) entry.Key); // } // } // // utility routines for tests // public /*internal*/ virtual SegmentInfo NewestSegment() // { // return segmentInfos.Info(segmentInfos.Count - 1); // } // public virtual string SegString() // { // lock (this) // { // System.Text.StringBuilder buffer = new System.Text.StringBuilder(); // for (int i = 0; i < segmentInfos.Count; i++) // { // if (i > 0) // { // buffer.Append(' '); // } // buffer.Append(segmentInfos.Info(i).SegString(directory)); // } // return buffer.ToString(); // } // } // static IndexWriter() // { // DEFAULT_MERGE_FACTOR = LogMergePolicy.DEFAULT_MERGE_FACTOR; // DEFAULT_MAX_MERGE_DOCS = LogDocMergePolicy.DEFAULT_MAX_MERGE_DOCS; // MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH; // if (Constants.WINDOWS) // DEFAULT_MAX_SYNC_PAUSE_SECONDS = 10.0; // else // DEFAULT_MAX_SYNC_PAUSE_SECONDS = 0.0; // } // // Used only by assert for testing. // virtual protected internal bool TestPoint(string name) // { // return true; // } //}