/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using Document = Lucene.Net.Documents.Document;
using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException;
using Directory = Lucene.Net.Store.Directory;
using FSDirectory = Lucene.Net.Store.FSDirectory;
using Lock = Lucene.Net.Store.Lock;
using LockObtainFailedException = Lucene.Net.Store.LockObtainFailedException;
using BitVector = Lucene.Net.Util.BitVector;
using Analyzer = Lucene.Net.Analysis.Analyzer;
using Similarity = Lucene.Net.Search.Similarity;
using System.Collections;
namespace Lucene.Net.Index
{
/// An IndexWriter
creates and maintains an index.
/// The create
argument to the
/// constructor
/// determines whether a new index is created, or whether an existing index is
/// opened. Note that you
/// can open an index with create=true
even while readers are
/// using the index. The old readers will continue to search
/// the "point in time" snapshot they had opened, and won't
/// see the newly created index until they re-open. There are
/// also constructors
/// with no create
argument which
/// will create a new index if there is not already an index at the
/// provided path and otherwise open the existing index.
/// In either case, documents are added with addDocument
/// and removed with deleteDocuments.
/// A document can be updated with updateDocument
/// (which just deletes and then adds the entire document).
/// When finished adding, deleting and updating documents, close should be called.
/// These changes are buffered in memory and periodically
/// flushed to the {@link Directory} (during the above method
/// calls). A flush is triggered when there are enough
/// buffered deletes (see {@link #setMaxBufferedDeleteTerms})
/// or enough added documents since the last flush, whichever
/// is sooner. For the added documents, flushing is triggered
/// either by RAM usage of the documents (see {@link
/// #setRAMBufferSizeMB}) or the number of added documents.
/// The default is to flush when RAM usage hits 16 MB. For
/// best indexing speed you should flush by RAM usage with a
/// large RAM buffer. You can also force a flush by calling
/// {@link #flush}. When a flush occurs, both pending deletes
/// and added documents are flushed to the index. A flush may
/// also trigger one or more segment merges which by default
/// run with a background thread so as not to block the
/// addDocument calls (see below
/// for changing the {@link MergeScheduler}).
///
/// The optional autoCommit
argument to the
/// constructors
/// controls visibility of the changes to {@link IndexReader} instances reading the same index.
/// When this is false
, changes are not
/// visible until {@link #Close()} is called.
/// Note that changes will still be flushed to the
/// {@link Lucene.Net.Store.Directory} as new files,
/// but are not committed (no new segments_N
file
/// is written referencing the new files) until {@link #close} is
/// called. If something goes terribly wrong (for example the
/// JVM crashes) before {@link #Close()}, then
/// the index will reflect none of the changes made (it will
/// remain in its starting state).
/// You can also call {@link #Abort()}, which closes the writer without committing any
/// changes, and removes any index
/// files that had been flushed but are now unreferenced.
/// This mode is useful for preventing readers from refreshing
/// at a bad time (for example after you've done all your
/// deletes but before you've done your adds).
/// It can also be used to implement simple single-writer
/// transactional semantics ("all or none").
/// When autoCommit
is true
then
/// every flush is also a commit ({@link IndexReader}
/// instances will see each flush as changes to the index).
/// This is the default, to match the behavior before 2.2.
/// When running in this mode, be careful not to refresh your
/// readers while optimize or segment merges are taking place
/// as this can tie up substantial disk space.
///
/// Regardless of autoCommit
, an {@link
/// IndexReader} or {@link Lucene.Net.Search.IndexSearcher} will only see the
/// index as of the "point in time" that it was opened. Any
/// changes committed to the index after the reader was opened
/// are not visible until the reader is re-opened.
/// If an index will not have more documents added for a while and optimal search
/// performance is desired, then the optimize
/// method should be called before the index is closed.
/// Opening an IndexWriter
creates a lock file for the directory in use. Trying to open
/// another IndexWriter
on the same directory will lead to a
/// {@link LockObtainFailedException}. The {@link LockObtainFailedException}
/// is also thrown if an IndexReader on the same directory is used to delete documents
/// from the index.
///
///
/// Expert: IndexWriter
allows an optional
/// {@link IndexDeletionPolicy} implementation to be
/// specified. You can use this to control when prior commits
/// are deleted from the index. The default policy is {@link
/// KeepOnlyLastCommitDeletionPolicy} which removes all prior
/// commits as soon as a new commit is done (this matches
/// behavior before 2.2). Creating your own policy can allow
/// you to explicitly keep previous "point in time" commits
/// alive in the index for some time, to allow readers to
/// refresh to the new commit without having the old commit
/// deleted out from under them. This is necessary on
/// filesystems like NFS that do not support "delete on last
/// close" semantics, which Lucene's "point in time" search
/// normally relies on.
/// Expert:
/// IndexWriter
allows you to separately change
/// the {@link MergePolicy} and the {@link MergeScheduler}.
/// The {@link MergePolicy} is invoked whenever there are
/// changes to the segments in the index. Its role is to
/// select which merges to do, if any, and return a {@link
/// MergePolicy.MergeSpecification} describing the merges. It
/// also selects merges to do for optimize(). (The default is
/// {@link LogByteSizeMergePolicy}. Then, the {@link
/// MergeScheduler} is invoked with the requested merges and
/// it decides when and how to run the merges. The default is
/// {@link ConcurrentMergeScheduler}.
///
/*
* Clarification: Check Points (and commits)
* Being able to set autoCommit=false allows IndexWriter to flush and
* write new index files to the directory without writing a new segments_N
* file which references these new files. It also means that the state of
* the in memory SegmentInfos object is different than the most recent
* segments_N file written to the directory.
*
* Each time the SegmentInfos is changed, and matches the (possibly
* modified) directory files, we have a new "check point".
* If the modified/new SegmentInfos is written to disk - as a new
* (generation of) segments_N file - this check point is also an
* IndexCommitPoint.
*
* With autoCommit=true, every checkPoint is also a CommitPoint.
* With autoCommit=false, some checkPoints may not be commits.
*
* A new checkpoint always replaces the previous checkpoint and
* becomes the new "front" of the index. This allows the IndexFileDeleter
* to delete files that are referenced only by stale checkpoints.
* (files that were created since the last commit, but are no longer
* referenced by the "front" of the index). For this, IndexFileDeleter
* keeps track of the last non commit checkpoint.
*/
public class IndexWriter
{
private void InitBlock()
{
similarity = Similarity.GetDefault();
}
/// Default value for the write lock timeout (1,000).
///
///
public static long WRITE_LOCK_TIMEOUT = 1000;
private long writeLockTimeout = WRITE_LOCK_TIMEOUT;
/// Name of the write lock in the index.
public const System.String WRITE_LOCK_NAME = "write.lock";
///
///
///
///
public static readonly int DEFAULT_MERGE_FACTOR;
/// Value to denote a flush trigger is disabled
public const int DISABLE_AUTO_FLUSH = - 1;
/// Disabled by default (because IndexWriter flushes by RAM usage
/// by default). Change using {@link #SetMaxBufferedDocs(int)}.
///
public static readonly int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH;
/// Default value is 16 MB (which means flush when buffered
/// docs consume 16 MB RAM). Change using {@link #setRAMBufferSizeMB}.
///
public const double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0;
/// Disabled by default (because IndexWriter flushes by RAM usage
/// by default). Change using {@link #SetMaxBufferedDeleteTerms(int)}.
///
public static readonly int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH;
///
///
///
///
public static readonly int DEFAULT_MAX_MERGE_DOCS;
/// Default value is 10,000. Change using {@link #SetMaxFieldLength(int)}.
public const int DEFAULT_MAX_FIELD_LENGTH = 10000;
/// Default value is 128. Change using {@link #SetTermIndexInterval(int)}.
public const int DEFAULT_TERM_INDEX_INTERVAL = 128;
/// Absolute hard maximum length for a term. If a term
/// arrives from the analyzer longer than this length, it
/// is skipped and a message is printed to infoStream, if
/// set (see {@link #setInfoStream}).
///
public static readonly int MAX_TERM_LENGTH;
// The normal read buffer size defaults to 1024, but
// increasing this during merging seems to yield
// performance gains. However we don't want to increase
// it too much because there are quite a few
// BufferedIndexInputs created during merging. See
// LUCENE-888 for details.
private const int MERGE_READ_BUFFER_SIZE = 4096;
// Used for printing messages
private static System.Object MESSAGE_ID_LOCK = new System.Object();
private static int MESSAGE_ID = 0;
private int messageID = - 1;
private Directory directory; // where this index resides
private Analyzer analyzer; // how to analyze text
private Similarity similarity; // how to normalize
private bool commitPending; // true if segmentInfos has changes not yet committed
private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
private SegmentInfos localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
private bool localAutoCommit; // saved autoCommit during local transaction
private bool autoCommit = true; // false if we should commit only on close
private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
private DocumentsWriter docWriter;
private IndexFileDeleter deleter;
private System.Collections.Hashtable segmentsToOptimize = new System.Collections.Hashtable(); // used by optimize to note those needing optimization
private Lock writeLock;
private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
private bool closeDir;
private bool closed;
private bool closing;
// Holds all SegmentInfo instances currently involved in
// merges
private System.Collections.Hashtable mergingSegments = new System.Collections.Hashtable();
private MergePolicy mergePolicy = new LogByteSizeMergePolicy();
private MergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
private System.Collections.ArrayList pendingMerges = new System.Collections.ArrayList();
private System.Collections.Hashtable runningMerges = new System.Collections.Hashtable();
private System.Collections.IList mergeExceptions = new System.Collections.ArrayList();
private long mergeGen;
private bool stopMerges;
/// Used internally to throw an {@link
/// AlreadyClosedException} if this IndexWriter has been
/// closed.
///
/// AlreadyClosedException if this IndexWriter is
protected internal void EnsureOpen()
{
if (closed)
{
throw new AlreadyClosedException("this IndexWriter is closed");
}
}
/// Prints a message to the infoStream (if non-null),
/// prefixed with the identifying information for this
/// writer and the thread that's calling it.
///
public virtual void Message(System.String message)
{
if (infoStream != null)
infoStream.WriteLine("IW " + messageID + " [" + SupportClass.ThreadClass.Current().Name + "]: " + message);
}
private void SetMessageID()
{
lock (this)
{
if (infoStream != null && messageID == - 1)
{
lock (MESSAGE_ID_LOCK)
{
messageID = MESSAGE_ID++;
}
}
}
}
/// Casts current mergePolicy to LogMergePolicy, and throws
/// an exception if the mergePolicy is not a LogMergePolicy.
///
private LogMergePolicy GetLogMergePolicy()
{
if (mergePolicy is LogMergePolicy)
return (LogMergePolicy) mergePolicy;
else
throw new System.ArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");
}
/// Get the current setting of whether newly flushed
/// segments will use the compound file format. Note that
/// this just returns the value previously set with
/// setUseCompoundFile(boolean), or the default value
/// (true). You cannot use this to query the status of
/// previously flushed segments.
///
/// Note that this method is a convenience method: it
/// just calls mergePolicy.getUseCompoundFile as long as
/// mergePolicy is an instance of {@link LogMergePolicy}.
/// Otherwise an IllegalArgumentException is thrown.
///
///
///
///
public virtual bool GetUseCompoundFile()
{
return GetLogMergePolicy().GetUseCompoundFile();
}
/// Setting to turn on usage of a compound file. When on,
/// multiple files for each segment are merged into a
/// single file when a new segment is flushed.
///
/// Note that this method is a convenience method: it
/// just calls mergePolicy.setUseCompoundFile as long as
/// mergePolicy is an instance of {@link LogMergePolicy}.
/// Otherwise an IllegalArgumentException is thrown.
///
public virtual void SetUseCompoundFile(bool value_Renamed)
{
GetLogMergePolicy().SetUseCompoundFile(value_Renamed);
GetLogMergePolicy().SetUseCompoundDocStore(value_Renamed);
}
/// Expert: Set the Similarity implementation used by this IndexWriter.
///
///
///
///
public virtual void SetSimilarity(Similarity similarity)
{
EnsureOpen();
this.similarity = similarity;
}
/// Expert: Return the Similarity implementation used by this IndexWriter.
///
/// This defaults to the current value of {@link Similarity#GetDefault()}.
///
public virtual Similarity GetSimilarity()
{
EnsureOpen();
return this.similarity;
}
/// Expert: Set the interval between indexed terms. Large values cause less
/// memory to be used by IndexReader, but slow random-access to terms. Small
/// values cause more memory to be used by an IndexReader, and speed
/// random-access to terms.
///
/// This parameter determines the amount of computation required per query
/// term, regardless of the number of documents that contain that term. In
/// particular, it is the maximum number of other terms that must be
/// scanned before a term is located and its frequency and position information
/// may be processed. In a large index with user-entered query terms, query
/// processing time is likely to be dominated not by term lookup but rather
/// by the processing of frequency and positional data. In a small index
/// or when many uncommon query terms are generated (e.g., by wildcard
/// queries) term lookup may become a dominant cost.
///
/// In particular, numUniqueTerms/interval
terms are read into
/// memory by an IndexReader, and, on average, interval/2
terms
/// must be scanned for each random term access.
///
///
///
///
public virtual void SetTermIndexInterval(int interval)
{
EnsureOpen();
this.termIndexInterval = interval;
}
/// Expert: Return the interval between indexed terms.
///
///
///
///
public virtual int GetTermIndexInterval()
{
EnsureOpen();
return termIndexInterval;
}
/// Constructs an IndexWriter for the index in path
.
/// Text will be analyzed with a
. If create
/// is true, then a new, empty index will be created in
/// path
, replacing the index already there, if any.
///
///
/// the path to the index directory
///
/// the analyzer to use
///
/// true
to create the index or overwrite
/// the existing one; false
to append to the existing
/// index
///
/// CorruptIndexException if the index is corrupt
/// LockObtainFailedException if another writer
/// has this index open (write.lock
could not
/// be obtained)
///
/// IOException if the directory cannot be read/written to, or
/// if it does not exist and create
is
/// false
or if there is any other low-level
/// IO error
///
public IndexWriter(System.String path, Analyzer a, bool create)
{
InitBlock();
Init(FSDirectory.GetDirectory(path), a, create, true, null, true);
}
/// Constructs an IndexWriter for the index in path
.
/// Text will be analyzed with a
. If create
/// is true, then a new, empty index will be created in
/// path
, replacing the index already there, if any.
///
///
/// the path to the index directory
///
/// the analyzer to use
///
/// true
to create the index or overwrite
/// the existing one; false
to append to the existing
/// index
///
/// CorruptIndexException if the index is corrupt
/// LockObtainFailedException if another writer
/// has this index open (write.lock
could not
/// be obtained)
///
/// IOException if the directory cannot be read/written to, or
/// if it does not exist and create
is
/// false
or if there is any other low-level
/// IO error
///
public IndexWriter(System.IO.FileInfo path, Analyzer a, bool create)
{
InitBlock();
Init(FSDirectory.GetDirectory(path), a, create, true, null, true);
}
/// Constructs an IndexWriter for the index in d
.
/// Text will be analyzed with a
. If create
/// is true, then a new, empty index will be created in
/// d
, replacing the index already there, if any.
///
///
/// the index directory
///
/// the analyzer to use
///
/// true
to create the index or overwrite
/// the existing one; false
to append to the existing
/// index
///
/// CorruptIndexException if the index is corrupt
/// LockObtainFailedException if another writer
/// has this index open (write.lock
could not
/// be obtained)
///
/// IOException if the directory cannot be read/written to, or
/// if it does not exist and create
is
/// false
or if there is any other low-level
/// IO error
///
public IndexWriter(Directory d, Analyzer a, bool create)
{
InitBlock();
Init(d, a, create, false, null, true);
}
/// Constructs an IndexWriter for the index in
/// path
, first creating it if it does not
/// already exist. Text will be analyzed with
/// a
.
///
///
/// the path to the index directory
///
/// the analyzer to use
///
/// CorruptIndexException if the index is corrupt
/// LockObtainFailedException if another writer
/// has this index open (write.lock
could not
/// be obtained)
///
/// IOException if the directory cannot be
/// read/written to or if there is any other low-level
/// IO error
///
public IndexWriter(System.String path, Analyzer a)
{
InitBlock();
Init(FSDirectory.GetDirectory(path), a, true, null, true);
}
/// Constructs an IndexWriter for the index in
/// path
, first creating it if it does not
/// already exist. Text will be analyzed with
/// a
.
///
///
/// the path to the index directory
///
/// the analyzer to use
///
/// CorruptIndexException if the index is corrupt
/// LockObtainFailedException if another writer
/// has this index open (write.lock
could not
/// be obtained)
///
/// IOException if the directory cannot be
/// read/written to or if there is any other low-level
/// IO error
///
public IndexWriter(System.IO.FileInfo path, Analyzer a)
{
InitBlock();
Init(FSDirectory.GetDirectory(path), a, true, null, true);
}
/// Constructs an IndexWriter for the index in
/// d
, first creating it if it does not
/// already exist. Text will be analyzed with
/// a
.
///
///
/// the index directory
///
/// the analyzer to use
///
/// CorruptIndexException if the index is corrupt
/// LockObtainFailedException if another writer
/// has this index open (write.lock
could not
/// be obtained)
///
/// IOException if the directory cannot be
/// read/written to or if there is any other low-level
/// IO error
///
public IndexWriter(Directory d, Analyzer a)
{
InitBlock();
Init(d, a, false, null, true);
}
/// Constructs an IndexWriter for the index in
/// d
, first creating it if it does not
/// already exist. Text will be analyzed with
/// a
.
///
///
/// the index directory
///
/// see above
///
/// the analyzer to use
///
/// CorruptIndexException if the index is corrupt
/// LockObtainFailedException if another writer
/// has this index open (write.lock
could not
/// be obtained)
///
/// IOException if the directory cannot be
/// read/written to or if there is any other low-level
/// IO error
///
public IndexWriter(Directory d, bool autoCommit, Analyzer a)
{
InitBlock();
Init(d, a, false, null, autoCommit);
}
/// Constructs an IndexWriter for the index in d
.
/// Text will be analyzed with a
. If create
/// is true, then a new, empty index will be created in
/// d
, replacing the index already there, if any.
///
///
/// the index directory
///
/// see above
///
/// the analyzer to use
///
/// true
to create the index or overwrite
/// the existing one; false
to append to the existing
/// index
///
/// CorruptIndexException if the index is corrupt
/// LockObtainFailedException if another writer
/// has this index open (write.lock
could not
/// be obtained)
///
/// IOException if the directory cannot be read/written to, or
/// if it does not exist and create
is
/// false
or if there is any other low-level
/// IO error
///
public IndexWriter(Directory d, bool autoCommit, Analyzer a, bool create)
{
InitBlock();
Init(d, a, create, false, null, autoCommit);
}
/// Expert: constructs an IndexWriter with a custom {@link
/// IndexDeletionPolicy}, for the index in d
,
/// first creating it if it does not already exist. Text
/// will be analyzed with a
.
///
///
/// the index directory
///
/// see above
///
/// the analyzer to use
///
/// see above
///
/// CorruptIndexException if the index is corrupt
/// LockObtainFailedException if another writer
/// has this index open (write.lock
could not
/// be obtained)
///
/// IOException if the directory cannot be
/// read/written to or if there is any other low-level
/// IO error
///
public IndexWriter(Directory d, bool autoCommit, Analyzer a, IndexDeletionPolicy deletionPolicy)
{
InitBlock();
Init(d, a, false, deletionPolicy, autoCommit);
}
/// Expert: constructs an IndexWriter with a custom {@link
/// IndexDeletionPolicy}, for the index in d
.
/// Text will be analyzed with a
. If
/// create
is true, then a new, empty index
/// will be created in d
, replacing the index
/// already there, if any.
///
///
/// the index directory
///
/// see above
///
/// the analyzer to use
///
/// true
to create the index or overwrite
/// the existing one; false
to append to the existing
/// index
///
/// see above
///
/// CorruptIndexException if the index is corrupt
/// LockObtainFailedException if another writer
/// has this index open (write.lock
could not
/// be obtained)
///
/// IOException if the directory cannot be read/written to, or
/// if it does not exist and create
is
/// false
or if there is any other low-level
/// IO error
///
public IndexWriter(Directory d, bool autoCommit, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy)
{
InitBlock();
Init(d, a, create, false, deletionPolicy, autoCommit);
}
private void Init(Directory d, Analyzer a, bool closeDir, IndexDeletionPolicy deletionPolicy, bool autoCommit)
{
if (IndexReader.IndexExists(d))
{
Init(d, a, false, closeDir, deletionPolicy, autoCommit);
}
else
{
Init(d, a, true, closeDir, deletionPolicy, autoCommit);
}
}
private void Init(Directory d, Analyzer a, bool create, bool closeDir, IndexDeletionPolicy deletionPolicy, bool autoCommit)
{
this.closeDir = closeDir;
directory = d;
analyzer = a;
this.infoStream = defaultInfoStream;
SetMessageID();
if (create)
{
// Clear the write lock in case it's leftover:
directory.ClearLock(IndexWriter.WRITE_LOCK_NAME);
}
Lock writeLock = directory.MakeLock(IndexWriter.WRITE_LOCK_NAME);
if (!writeLock.Obtain(writeLockTimeout))
// obtain write lock
{
throw new LockObtainFailedException("Index locked for write: " + writeLock);
}
this.writeLock = writeLock; // save it
try
{
if (create)
{
// Try to read first. This is to allow create
// against an index that's currently open for
// searching. In this case we write the next
// segments_N file with no segments:
try
{
segmentInfos.Read(directory);
segmentInfos.Clear();
}
catch (System.IO.IOException e)
{
// Likely this means it's a fresh directory
}
segmentInfos.Write(directory);
}
else
{
segmentInfos.Read(directory);
}
this.autoCommit = autoCommit;
if (!autoCommit)
{
rollbackSegmentInfos = (SegmentInfos) segmentInfos.Clone();
}
docWriter = new DocumentsWriter(directory, this);
docWriter.SetInfoStream(infoStream);
// Default deleter (for backwards compatibility) is
// KeepOnlyLastCommitDeleter:
deleter = new IndexFileDeleter(directory, deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy, segmentInfos, infoStream, docWriter);
PushMaxBufferedDocs();
if (infoStream != null)
{
Message("init: create=" + create);
MessageState();
}
}
catch (System.IO.IOException e)
{
this.writeLock.Release();
this.writeLock = null;
throw e;
}
}
/// Expert: set the merge policy used by this writer.
public virtual void SetMergePolicy(MergePolicy mp)
{
EnsureOpen();
if (mp == null)
throw new System.NullReferenceException("MergePolicy must be non-null");
if (mergePolicy != mp)
mergePolicy.Close();
mergePolicy = mp;
PushMaxBufferedDocs();
if (infoStream != null)
{
Message("setMergePolicy " + mp);
}
}
/// Expert: returns the current MergePolicy in use by this writer.
///
///
public virtual MergePolicy GetMergePolicy()
{
EnsureOpen();
return mergePolicy;
}
/// Expert: set the merge scheduler used by this writer.
public virtual void SetMergeScheduler(MergeScheduler mergeScheduler)
{
EnsureOpen();
if (mergeScheduler == null)
throw new System.NullReferenceException("MergeScheduler must be non-null");
if (this.mergeScheduler != mergeScheduler)
{
FinishMerges(true);
this.mergeScheduler.Close();
}
this.mergeScheduler = mergeScheduler;
if (infoStream != null)
{
Message("setMergeScheduler " + mergeScheduler);
}
}
/// Expert: returns the current MergePolicy in use by this
/// writer.
///
///
///
public virtual MergeScheduler GetMergeScheduler()
{
EnsureOpen();
return mergeScheduler;
}
/// Determines the largest segment (measured by
/// document count) that may be merged with other segments.
/// Small values (e.g., less than 10,000) are best for
/// interactive indexing, as this limits the length of
/// pauses while indexing to a few seconds. Larger values
/// are best for batched indexing and speedier
/// searches.
///
/// The default value is {@link Integer#MAX_VALUE}.
///
/// Note that this method is a convenience method: it
/// just calls mergePolicy.setMaxMergeDocs as long as
/// mergePolicy is an instance of {@link LogMergePolicy}.
/// Otherwise an IllegalArgumentException is thrown.
///
/// The default merge policy ({@link
/// LogByteSizeMergePolicy}) also allows you to set this
/// limit by net size (in MB) of the segment, using {@link
/// LogByteSizeMergePolicy#setMaxMergeMB}.
///
public virtual void SetMaxMergeDocs(int maxMergeDocs)
{
GetLogMergePolicy().SetMaxMergeDocs(maxMergeDocs);
}
/// Returns the largest segment (measured by document
/// count) that may be merged with other segments.
///
/// Note that this method is a convenience method: it
/// just calls mergePolicy.getMaxMergeDocs as long as
/// mergePolicy is an instance of {@link LogMergePolicy}.
/// Otherwise an IllegalArgumentException is thrown.
///
///
///
///
public virtual int GetMaxMergeDocs()
{
return GetLogMergePolicy().GetMaxMergeDocs();
}
/// The maximum number of terms that will be indexed for a single field in a
/// document. This limits the amount of memory required for indexing, so that
/// collections with very large files will not crash the indexing process by
/// running out of memory. This setting refers to the number of running terms,
/// not to the number of different terms.
/// Note: this silently truncates large documents, excluding from the
/// index all terms that occur further in the document. If you know your source
/// documents are large, be sure to set this value high enough to accomodate
/// the expected size. If you set it to Integer.MAX_VALUE, then the only limit
/// is your memory, but you should anticipate an OutOfMemoryError.
/// By default, no more than 10,000 terms will be indexed for a field.
///
public virtual void SetMaxFieldLength(int maxFieldLength)
{
EnsureOpen();
this.maxFieldLength = maxFieldLength;
if (infoStream != null)
Message("setMaxFieldLength " + maxFieldLength);
}
/// Returns the maximum number of terms that will be
/// indexed for a single field in a document.
///
///
///
public virtual int GetMaxFieldLength()
{
EnsureOpen();
return maxFieldLength;
}
/// Determines the minimal number of documents required
/// before the buffered in-memory documents are flushed as
/// a new Segment. Large values generally gives faster
/// indexing.
///
/// When this is set, the writer will flush every
/// maxBufferedDocs added documents. Pass in {@link
/// #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
/// to number of buffered documents. Note that if flushing
/// by RAM usage is also enabled, then the flush will be
/// triggered by whichever comes first.
///
/// Disabled by default (writer flushes by RAM usage).
///
///
/// IllegalArgumentException if maxBufferedDocs is
/// enabled but smaller than 2, or it disables maxBufferedDocs
/// when ramBufferSize is already disabled
///
///
///
public virtual void SetMaxBufferedDocs(int maxBufferedDocs)
{
EnsureOpen();
if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2)
throw new System.ArgumentException("maxBufferedDocs must at least be 2 when enabled");
if (maxBufferedDocs == DISABLE_AUTO_FLUSH && GetRAMBufferSizeMB() == DISABLE_AUTO_FLUSH)
throw new System.ArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled");
docWriter.SetMaxBufferedDocs(maxBufferedDocs);
PushMaxBufferedDocs();
if (infoStream != null)
Message("setMaxBufferedDocs " + maxBufferedDocs);
}
/// If we are flushing by doc count (not by RAM usage), and
/// using LogDocMergePolicy then push maxBufferedDocs down
/// as its minMergeDocs, to keep backwards compatibility.
///
private void PushMaxBufferedDocs()
{
if (docWriter.GetMaxBufferedDocs() != DISABLE_AUTO_FLUSH)
{
MergePolicy mp = mergePolicy;
if (mp is LogDocMergePolicy)
{
LogDocMergePolicy lmp = (LogDocMergePolicy) mp;
int maxBufferedDocs = docWriter.GetMaxBufferedDocs();
if (lmp.GetMinMergeDocs() != maxBufferedDocs)
{
if (infoStream != null)
Message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy");
lmp.SetMinMergeDocs(maxBufferedDocs);
}
}
}
}
/// Returns the number of buffered added documents that will
/// trigger a flush if enabled.
///
///
///
public virtual int GetMaxBufferedDocs()
{
EnsureOpen();
return docWriter.GetMaxBufferedDocs();
}
/// Determines the amount of RAM that may be used for
/// buffering added documents before they are flushed as a
/// new Segment. Generally for faster indexing performance
/// it's best to flush by RAM usage instead of document
/// count and use as large a RAM buffer as you can.
///
/// When this is set, the writer will flush whenever
/// buffered documents use this much RAM. Pass in {@link
/// #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
/// to RAM usage. Note that if flushing by document count
/// is also enabled, then the flush will be triggered by
/// whichever comes first.
///
/// The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.
///
///
/// IllegalArgumentException if ramBufferSize is
/// enabled but non-positive, or it disables ramBufferSize
/// when maxBufferedDocs is already disabled
///
public virtual void SetRAMBufferSizeMB(double mb)
{
if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0)
throw new System.ArgumentException("ramBufferSize should be > 0.0 MB when enabled");
if (mb == DISABLE_AUTO_FLUSH && GetMaxBufferedDocs() == DISABLE_AUTO_FLUSH)
throw new System.ArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled");
docWriter.SetRAMBufferSizeMB(mb);
if (infoStream != null)
Message("setRAMBufferSizeMB " + mb);
}
/// Returns the value set by {@link #setRAMBufferSizeMB} if enabled.
public virtual double GetRAMBufferSizeMB()
{
return docWriter.GetRAMBufferSizeMB();
}
/// Determines the minimal number of delete terms required before the buffered
/// in-memory delete terms are applied and flushed. If there are documents
/// buffered in memory at the time, they are merged and a new segment is
/// created.
/// Disabled by default (writer flushes by RAM usage).
///
///
/// IllegalArgumentException if maxBufferedDeleteTerms
/// is enabled but smaller than 1
///
///
///
public virtual void SetMaxBufferedDeleteTerms(int maxBufferedDeleteTerms)
{
EnsureOpen();
if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH && maxBufferedDeleteTerms < 1)
throw new System.ArgumentException("maxBufferedDeleteTerms must at least be 1 when enabled");
docWriter.SetMaxBufferedDeleteTerms(maxBufferedDeleteTerms);
if (infoStream != null)
Message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms);
}
/// Returns the number of buffered deleted terms that will
/// trigger a flush if enabled.
///
///
///
public virtual int GetMaxBufferedDeleteTerms()
{
EnsureOpen();
return docWriter.GetMaxBufferedDeleteTerms();
}
/// Determines how often segment indices are merged by addDocument(). With
/// smaller values, less RAM is used while indexing, and searches on
/// unoptimized indices are faster, but indexing speed is slower. With larger
/// values, more RAM is used during indexing, and while searches on unoptimized
/// indices are slower, indexing is faster. Thus larger values (> 10) are best
/// for batch index creation, and smaller values (< 10) for indices that are
/// interactively maintained.
///
/// Note that this method is a convenience method: it
/// just calls mergePolicy.setMergeFactor as long as
/// mergePolicy is an instance of {@link LogMergePolicy}.
/// Otherwise an IllegalArgumentException is thrown.
///
/// This must never be less than 2. The default value is 10.
///
public virtual void SetMergeFactor(int mergeFactor)
{
GetLogMergePolicy().SetMergeFactor(mergeFactor);
}
/// Returns the number of segments that are merged at
/// once and also controls the total number of segments
/// allowed to accumulate in the index.
///
/// Note that this method is a convenience method: it
/// just calls mergePolicy.getMergeFactor as long as
/// mergePolicy is an instance of {@link LogMergePolicy}.
/// Otherwise an IllegalArgumentException is thrown.
///
///
///
///
public virtual int GetMergeFactor()
{
return GetLogMergePolicy().GetMergeFactor();
}
/// If non-null, this will be the default infoStream used
/// by a newly instantiated IndexWriter.
///
///
///
public static void SetDefaultInfoStream(System.IO.TextWriter infoStream)
{
IndexWriter.defaultInfoStream = infoStream;
}
/// Returns the current default infoStream for newly
/// instantiated IndexWriters.
///
///
///
public static System.IO.TextWriter GetDefaultInfoStream()
{
return IndexWriter.defaultInfoStream;
}
/// If non-null, information about merges, deletes and a
/// message when maxFieldLength is reached will be printed
/// to this.
///
public virtual void SetInfoStream(System.IO.TextWriter infoStream)
{
EnsureOpen();
this.infoStream = infoStream;
SetMessageID();
docWriter.SetInfoStream(infoStream);
deleter.SetInfoStream(infoStream);
if (infoStream != null)
MessageState();
}
private void MessageState()
{
Message("setInfoStream: dir=" + directory + " autoCommit=" + autoCommit + " mergePolicy=" + mergePolicy + " mergeScheduler=" + mergeScheduler + " ramBufferSizeMB=" + docWriter.GetRAMBufferSizeMB() + " maxBuffereDocs=" + docWriter.GetMaxBufferedDocs() + " maxBuffereDeleteTerms=" + docWriter.GetMaxBufferedDeleteTerms() + " maxFieldLength=" + maxFieldLength + " index=" + SegString());
}
/// Returns the current infoStream in use by this writer.
///
///
public virtual System.IO.TextWriter GetInfoStream()
{
EnsureOpen();
return infoStream;
}
///
///
///
///
public virtual void SetWriteLockTimeout(long writeLockTimeout)
{
EnsureOpen();
this.writeLockTimeout = writeLockTimeout;
}
/// Returns allowed timeout when acquiring the write lock.
///
///
public virtual long GetWriteLockTimeout()
{
EnsureOpen();
return writeLockTimeout;
}
/// Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in
/// milliseconds).
///
public static void SetDefaultWriteLockTimeout(long writeLockTimeout)
{
IndexWriter.WRITE_LOCK_TIMEOUT = writeLockTimeout;
}
/// Returns default write lock timeout for newly
/// instantiated IndexWriters.
///
///
///
public static long GetDefaultWriteLockTimeout()
{
return IndexWriter.WRITE_LOCK_TIMEOUT;
}
/// Flushes all changes to an index and closes all
/// associated files.
///
/// If an Exception is hit during close, eg due to disk
/// full or some other reason, then both the on-disk index
/// and the internal state of the IndexWriter instance will
/// be consistent. However, the close will not be complete
/// even though part of it (flushing buffered documents)
/// may have succeeded, so the write lock will still be
/// held.
///
/// If you can correct the underlying cause (eg free up
/// some disk space) then you can call close() again.
/// Failing that, if you want to force the write lock to be
/// released (dangerous, because you may then lose buffered
/// docs in the IndexWriter instance) then you can do
/// something like this:
///
///
/// try {
/// writer.close();
/// } finally {
/// if (IndexReader.isLocked(directory)) {
/// IndexReader.unlock(directory);
/// }
/// }
///
///
/// after which, you must be certain not to use the writer
/// instance anymore.
///
/// CorruptIndexException if the index is corrupt
/// IOException if there is a low-level IO error
public virtual void Close()
{
Close(true);
}
/// Closes the index with or without waiting for currently
/// running merges to finish. This is only meaningful when
/// using a MergeScheduler that runs merges in background
/// threads.
///
/// if true, this call will block
/// until all merges complete; else, it will ask all
/// running merges to abort, wait until those merges have
/// finished (which should be at most a few seconds), and
/// then return.
///
public virtual void Close(bool waitForMerges)
{
bool doClose;
lock (this)
{
// Ensure that only one thread actually gets to do the closing:
if (!closing)
{
doClose = true;
closing = true;
}
else
doClose = false;
}
if (doClose)
CloseInternal(waitForMerges);
// Another thread beat us to it (is actually doing the
// close), so we will block until that other thread
// has finished closing
else
WaitForClose();
}
private void WaitForClose()
{
lock (this)
{
while (!closed && closing)
{
try
{
System.Threading.Monitor.Wait(this);
}
catch (System.Threading.ThreadInterruptedException ie)
{
}
}
}
}
private void CloseInternal(bool waitForMerges)
{
try
{
if (infoStream != null)
Message("now flush at close");
docWriter.Close();
// Only allow a new merge to be triggered if we are
// going to wait for merges:
Flush(waitForMerges, true);
mergePolicy.Close();
FinishMerges(waitForMerges);
mergeScheduler.Close();
lock (this)
{
if (commitPending)
{
bool success = false;
try
{
segmentInfos.Write(directory); // now commit changes
success = true;
}
finally
{
if (!success)
{
if (infoStream != null)
Message("hit exception committing segments file during close");
DeletePartialSegmentsFile();
}
}
if (infoStream != null)
Message("close: wrote segments file \"" + segmentInfos.GetCurrentSegmentFileName() + "\"");
deleter.Checkpoint(segmentInfos, true);
commitPending = false;
rollbackSegmentInfos = null;
}
if (infoStream != null)
Message("at close: " + SegString());
docWriter = null;
deleter.Close();
}
if (closeDir)
directory.Close();
if (writeLock != null)
{
writeLock.Release(); // release write lock
writeLock = null;
}
closed = true;
}
finally
{
lock (this)
{
if (!closed)
closing = false;
System.Threading.Monitor.PulseAll(this);
}
}
}
/// Tells the docWriter to close its currently open shared
/// doc stores (stored fields & vectors files).
/// Return value specifices whether new doc store files are compound or not.
///
private bool FlushDocStores()
{
lock (this)
{
System.Collections.IList files = docWriter.Files();
bool useCompoundDocStore = false;
if (files.Count > 0)
{
System.String docStoreSegment;
bool success = false;
try
{
docStoreSegment = docWriter.CloseDocStore();
success = true;
}
finally
{
if (!success)
{
if (infoStream != null)
Message("hit exception closing doc store segment");
docWriter.Abort(null);
}
}
useCompoundDocStore = mergePolicy.UseCompoundDocStore(segmentInfos);
if (useCompoundDocStore && docStoreSegment != null)
{
// Now build compound doc store file
success = false;
int numSegments = segmentInfos.Count;
System.String compoundFileName = docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION;
try
{
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, compoundFileName);
int size = files.Count;
for (int i = 0; i < size; i++)
cfsWriter.AddFile((System.String) files[i]);
// Perform the merge
cfsWriter.Close();
for (int i = 0; i < numSegments; i++)
{
SegmentInfo si = segmentInfos.Info(i);
if (si.GetDocStoreOffset() != - 1 && si.GetDocStoreSegment().Equals(docStoreSegment))
si.SetDocStoreIsCompoundFile(true);
}
Checkpoint();
success = true;
}
finally
{
if (!success)
{
if (infoStream != null)
Message("hit exception building compound file doc store for segment " + docStoreSegment);
// Rollback to no compound file
for (int i = 0; i < numSegments; i++)
{
SegmentInfo si = segmentInfos.Info(i);
if (si.GetDocStoreOffset() != - 1 && si.GetDocStoreSegment().Equals(docStoreSegment))
si.SetDocStoreIsCompoundFile(false);
}
deleter.DeleteFile(compoundFileName);
DeletePartialSegmentsFile();
}
}
deleter.Checkpoint(segmentInfos, false);
}
}
return useCompoundDocStore;
}
}
/// Release the write lock, if needed.
~IndexWriter()
{
try
{
if (writeLock != null)
{
writeLock.Release(); // release write lock
writeLock = null;
}
}
finally
{
}
}
/// Returns the Directory used by this index.
public virtual Directory GetDirectory()
{
EnsureOpen();
return directory;
}
/// Returns the analyzer used by this index.
public virtual Analyzer GetAnalyzer()
{
EnsureOpen();
return analyzer;
}
/// Returns the number of documents currently in this index.
public virtual int DocCount()
{
lock (this)
{
EnsureOpen();
int count = docWriter.GetNumDocsInRAM();
for (int i = 0; i < segmentInfos.Count; i++)
{
SegmentInfo si = segmentInfos.Info(i);
count += si.docCount;
}
return count;
}
}
/// The maximum number of terms that will be indexed for a single field in a
/// document. This limits the amount of memory required for indexing, so that
/// collections with very large files will not crash the indexing process by
/// running out of memory.
/// Note that this effectively truncates large documents, excluding from the
/// index terms that occur further in the document. If you know your source
/// documents are large, be sure to set this value high enough to accomodate
/// the expected size. If you set it to Integer.MAX_VALUE, then the only limit
/// is your memory, but you should anticipate an OutOfMemoryError.
/// By default, no more than 10,000 terms will be indexed for a field.
///
///
private int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;
/// Adds a document to this index. If the document contains more than
/// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
/// discarded.
///
/// Note that if an Exception is hit (for example disk full)
/// then the index will be consistent, but this document
/// may not have been added. Furthermore, it's possible
/// the index will have one segment in non-compound format
/// even when using compound files (when a merge has
/// partially succeeded).
///
/// This method periodically flushes pending documents
/// to the Directory (every {@link #setMaxBufferedDocs}),
/// and also periodically merges segments in the index
/// (every {@link #setMergeFactor} flushes). When this
/// occurs, the method will take more time to run (possibly
/// a long time if the index is large), and will require
/// free temporary space in the Directory to do the
/// merging.
///
/// The amount of free space required when a merge is triggered is
/// up to 1X the size of all segments being merged, when no
/// readers/searchers are open against the index, and up to 2X the
/// size of all segments being merged when readers/searchers are open
/// against the index (see {@link #Optimize()} for details). The
/// sequence of primitive merge operations performed is governed by
/// the merge policy.
///
///
Note that each term in the document can be no longer
/// than 16383 characters, otherwise an
/// IllegalArgumentException will be thrown.
///
///
/// CorruptIndexException if the index is corrupt
/// IOException if there is a low-level IO error
public virtual void AddDocument(Document doc)
{
AddDocument(doc, analyzer);
}
/// Adds a document to this index, using the provided analyzer instead of the
/// value of {@link #GetAnalyzer()}. If the document contains more than
/// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
/// discarded.
///
/// See {@link #AddDocument(Document)} for details on
/// index and IndexWriter state after an Exception, and
/// flushing/merging temporary free space requirements.
///
///
/// CorruptIndexException if the index is corrupt
/// IOException if there is a low-level IO error
public virtual void AddDocument(Document doc, Analyzer analyzer)
{
EnsureOpen();
bool doFlush = false;
bool success = false;
try
{
doFlush = docWriter.AddDocument(doc, analyzer);
success = true;
}
finally
{
if (!success)
{
if (infoStream != null)
Message("hit exception adding document");
lock (this)
{
// If docWriter has some aborted files that were
// never incref'd, then we clean them up here
if (docWriter != null)
{
System.Collections.IList files = docWriter.AbortedFiles();
if (files != null)
deleter.DeleteNewFiles(files);
}
}
}
}
if (doFlush)
Flush(true, false);
}
/// Deletes the document(s) containing term
.
/// the term to identify the documents to be deleted
///
/// CorruptIndexException if the index is corrupt
/// IOException if there is a low-level IO error
public virtual void DeleteDocuments(Term term)
{
EnsureOpen();
bool doFlush = docWriter.BufferDeleteTerm(term);
if (doFlush)
Flush(true, false);
}
/// Deletes the document(s) containing any of the
/// terms. All deletes are flushed at the same time.
///
/// array of terms to identify the documents
/// to be deleted
///
/// CorruptIndexException if the index is corrupt
/// IOException if there is a low-level IO error
public virtual void DeleteDocuments(Term[] terms)
{
EnsureOpen();
bool doFlush = docWriter.BufferDeleteTerms(terms);
if (doFlush)
Flush(true, false);
}
/// Updates a document by first deleting the document(s)
/// containing term
and then adding the new
/// document. The delete and then add are atomic as seen
/// by a reader on the same index (flush may happen only after
/// the add).
///
/// the term to identify the document(s) to be
/// deleted
///
/// the document to be added
///
/// CorruptIndexException if the index is corrupt
/// IOException if there is a low-level IO error
public virtual void UpdateDocument(Term term, Document doc)
{
EnsureOpen();
UpdateDocument(term, doc, GetAnalyzer());
}
/// Updates a document by first deleting the document(s)
/// containing term
and then adding the new
/// document. The delete and then add are atomic as seen
/// by a reader on the same index (flush may happen only after
/// the add).
///
/// the term to identify the document(s) to be
/// deleted
///
/// the document to be added
///
/// the analyzer to use when analyzing the document
///
/// CorruptIndexException if the index is corrupt
/// IOException if there is a low-level IO error
public virtual void UpdateDocument(Term term, Document doc, Analyzer analyzer)
{
EnsureOpen();
bool doFlush = false;
bool success = false;
try
{
doFlush = docWriter.UpdateDocument(term, doc, analyzer);
success = true;
}
finally
{
if (!success)
{
if (infoStream != null)
Message("hit exception updating document");
lock (this)
{
// If docWriter has some aborted files that were
// never incref'd, then we clean them up here
System.Collections.IList files = docWriter.AbortedFiles();
if (files != null)
deleter.DeleteNewFiles(files);
}
}
}
if (doFlush)
Flush(true, false);
}
// for test purpose
public /*internal*/ int GetSegmentCount()
{
lock (this)
{
return segmentInfos.Count;
}
}
// for test purpose
public /*internal*/ int GetNumBufferedDocuments()
{
lock (this)
{
return docWriter.GetNumDocsInRAM();
}
}
// for test purpose
public /*internal*/ int GetDocCount(int i)
{
lock (this)
{
if (i >= 0 && i < segmentInfos.Count)
{
return segmentInfos.Info(i).docCount;
}
else
{
return - 1;
}
}
}
internal System.String NewSegmentName()
{
// Cannot synchronize on IndexWriter because that causes
// deadlock
lock (segmentInfos)
{
// Important to set commitPending so that the
// segmentInfos is written on close. Otherwise we
// could close, re-open and re-return the same segment
// name that was previously returned which can cause
// problems at least with ConcurrentMergeScheduler.
commitPending = true;
return "_" + SupportClass.Number.ToString(segmentInfos.counter++);
}
}
/// If non-null, information about merges will be printed to this.
private System.IO.TextWriter infoStream = null;
private static System.IO.TextWriter defaultInfoStream = null;
/// Requests an "optimize" operation on an index, priming the index
/// for the fastest available search. Traditionally this has meant
/// merging all segments into a single segment as is done in the
/// default merge policy, but individaul merge policies may implement
/// optimize in different ways.
///
///
///
///
/// It is recommended that this method be called upon completion of indexing. In
/// environments with frequent updates, optimize is best done during low volume times, if at all.
///
///
/// See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion.
///
/// Note that this can require substantial temporary free
/// space in the Directory (see LUCENE-764
/// for details):
///
///
/// -
///
///
If no readers/searchers are open against the index,
/// then free space required is up to 1X the total size of
/// the starting index. For example, if the starting
/// index is 10 GB, then you must have up to 10 GB of free
/// space before calling optimize.
///
/// -
///
///
If readers/searchers are using the index, then free
/// space required is up to 2X the size of the starting
/// index. This is because in addition to the 1X used by
/// optimize, the original 1X of the starting index is
/// still consuming space in the Directory as the readers
/// are holding the segments files open. Even on Unix,
/// where it will appear as if the files are gone ("ls"
/// won't list them), they still consume storage due to
/// "delete on last close" semantics.
///
/// Furthermore, if some but not all readers re-open
/// while the optimize is underway, this will cause > 2X
/// temporary space to be consumed as those new readers
/// will then hold open the partially optimized segments at
/// that time. It is best not to re-open readers while
/// optimize is running.
///
///
///
/// The actual temporary usage could be much less than
/// these figures (it depends on many factors).
///
/// In general, once the optimize completes, the total size of the
/// index will be less than the size of the starting index.
/// It could be quite a bit smaller (if there were many
/// pending deletes) or just slightly smaller.
///
/// If an Exception is hit during optimize(), for example
/// due to disk full, the index will not be corrupt and no
/// documents will have been lost. However, it may have
/// been partially optimized (some segments were merged but
/// not all), and it's possible that one of the segments in
/// the index will be in non-compound format even when
/// using compound file format. This will occur when the
/// Exception is hit during conversion of the segment into
/// compound format.
///
/// This call will optimize those segments present in
/// the index when the call started. If other threads are
/// still adding documents and flushing segments, those
/// newly created segments will not be optimized unless you
/// call optimize again.
///
///
/// CorruptIndexException if the index is corrupt
/// IOException if there is a low-level IO error
public virtual void Optimize()
{
Optimize(true);
}
/// Optimize the index down to <= maxNumSegments. If
/// maxNumSegments==1 then this is the same as {@link
/// #Optimize()}.
///
/// maximum number of segments left
/// in the index after optimization finishes
///
public virtual void Optimize(int maxNumSegments)
{
Optimize(maxNumSegments, true);
}
/// Just like {@link #Optimize()}, except you can specify
/// whether the call should block until the optimize
/// completes. This is only meaningful with a
/// {@link MergeScheduler} that is able to run merges in
/// background threads.
///
public virtual void Optimize(bool doWait)
{
Optimize(1, true);
}
/// Just like {@link #Optimize(int)}, except you can
/// specify whether the call should block until the
/// optimize completes. This is only meaningful with a
/// {@link MergeScheduler} that is able to run merges in
/// background threads.
///
public virtual void Optimize(int maxNumSegments, bool doWait)
{
EnsureOpen();
if (maxNumSegments < 1)
throw new System.ArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments);
if (infoStream != null)
Message("optimize: index now " + SegString());
Flush();
lock (this)
{
ResetMergeExceptions();
segmentsToOptimize = new System.Collections.Hashtable();
int numSegments = segmentInfos.Count;
for (int i = 0; i < numSegments; i++)
if (!segmentsToOptimize.ContainsKey(segmentInfos.Info(i)))
segmentsToOptimize.Add(segmentInfos.Info(i), segmentInfos.Info(i));
// Now mark all pending & running merges as optimize
// merge:
System.Collections.IEnumerator it = pendingMerges.GetEnumerator();
while (it.MoveNext())
{
MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it.Current;
merge.optimize = true;
merge.maxNumSegmentsOptimize = maxNumSegments;
}
it = runningMerges.GetEnumerator();
while (it.MoveNext())
{
MergePolicy.OneMerge merge = (MergePolicy.OneMerge)((DictionaryEntry)it.Current).Value;
merge.optimize = true;
merge.maxNumSegmentsOptimize = maxNumSegments;
}
}
MaybeMerge(maxNumSegments, true);
if (doWait)
{
lock (this)
{
while (OptimizeMergesPending())
{
try
{
System.Threading.Monitor.Wait(this);
}
catch (System.Threading.ThreadInterruptedException ie)
{
}
if (mergeExceptions.Count > 0)
{
// Forward any exceptions in background merge
// threads to the current thread:
int size = mergeExceptions.Count;
for (int i = 0; i < size; i++)
{
MergePolicy.OneMerge merge = (MergePolicy.OneMerge) mergeExceptions[0];
if (merge.optimize)
{
System.IO.IOException err = new System.IO.IOException("background merge hit exception: " + merge.SegString(directory),
merge.GetException());
throw err;
}
}
}
}
}
}
// NOTE: in the ConcurrentMergeScheduler case, when
// doWait is false, we can return immediately while
// background threads accomplish the optimization
}
/// Returns true if any merges in pendingMerges or
/// runningMerges are optimization merges.
///
private bool OptimizeMergesPending()
{
lock (this)
{
System.Collections.IEnumerator it = pendingMerges.GetEnumerator();
while (it.MoveNext())
{
if (((MergePolicy.OneMerge) it.Current).optimize)
return true;
}
it = runningMerges.GetEnumerator();
while (it.MoveNext())
{
if (((MergePolicy.OneMerge) ((DictionaryEntry)it.Current).Value).optimize)
return true;
}
return false;
}
}
/// Expert: asks the mergePolicy whether any merges are
/// necessary now and if so, runs the requested merges and
/// then iterate (test again if merges are needed) until no
/// more merges are returned by the mergePolicy.
///
/// Explicit calls to maybeMerge() are usually not
/// necessary. The most common case is when merge policy
/// parameters have changed.
///
public void MaybeMerge()
{
MaybeMerge(false);
}
private void MaybeMerge(bool optimize)
{
MaybeMerge(1, optimize);
}
private void MaybeMerge(int maxNumSegmentsOptimize, bool optimize)
{
UpdatePendingMerges(maxNumSegmentsOptimize, optimize);
mergeScheduler.Merge(this);
}
private void UpdatePendingMerges(int maxNumSegmentsOptimize, bool optimize)
{
lock (this)
{
System.Diagnostics.Debug.Assert(!optimize || maxNumSegmentsOptimize > 0);
if (stopMerges)
return ;
MergePolicy.MergeSpecification spec;
if (optimize)
{
spec = mergePolicy.FindMergesForOptimize(segmentInfos, this, maxNumSegmentsOptimize, segmentsToOptimize);
if (spec != null)
{
int numMerges = spec.merges.Count;
for (int i = 0; i < numMerges; i++)
{
MergePolicy.OneMerge merge = ((MergePolicy.OneMerge) spec.merges[i]);
merge.optimize = true;
merge.maxNumSegmentsOptimize = maxNumSegmentsOptimize;
}
}
}
else
spec = mergePolicy.FindMerges(segmentInfos, this);
if (spec != null)
{
int numMerges = spec.merges.Count;
for (int i = 0; i < numMerges; i++)
RegisterMerge((MergePolicy.OneMerge) spec.merges[i]);
}
}
}
/// Expert: the {@link MergeScheduler} calls this method
/// to retrieve the next merge requested by the
/// MergePolicy
///
public /*internal*/ virtual MergePolicy.OneMerge GetNextMerge()
{
lock (this)
{
if (pendingMerges.Count == 0)
return null;
else
{
// Advance the merge from pending to running
System.Object tempObject;
tempObject = pendingMerges[0];
pendingMerges.RemoveAt(0);
MergePolicy.OneMerge merge = (MergePolicy.OneMerge) tempObject;
runningMerges.Add(merge, merge);
return merge;
}
}
}
/*
* Begin a transaction. During a transaction, any segment
* merges that happen (or ram segments flushed) will not
* write a new segments file and will not remove any files
* that were present at the start of the transaction. You
* must make a matched (try/finally) call to
* commitTransaction() or rollbackTransaction() to finish
* the transaction.
*
* Note that buffered documents and delete terms are not handled
* within the transactions, so they must be flushed before the
* transaction is started.
*/
private void StartTransaction()
{
if (infoStream != null)
Message("now start transaction");
System.Diagnostics.Debug.Assert(docWriter.GetNumBufferedDeleteTerms() == 0, "calling startTransaction with buffered delete terms not supported");
System.Diagnostics.Debug.Assert(docWriter.GetNumDocsInRAM() == 0, "calling startTransaction with buffered documents not supported");
localRollbackSegmentInfos = (SegmentInfos) segmentInfos.Clone();
localAutoCommit = autoCommit;
if (localAutoCommit)
{
if (infoStream != null)
Message("flush at startTransaction");
Flush();
// Turn off auto-commit during our local transaction:
autoCommit = false;
}
// We must "protect" our files at this point from
// deletion in case we need to rollback:
else
deleter.IncRef(segmentInfos, false);
}
/*
* Rolls back the transaction and restores state to where
* we were at the start.
*/
private void RollbackTransaction()
{
if (infoStream != null)
Message("now rollback transaction");
// First restore autoCommit in case we hit an exception below:
autoCommit = localAutoCommit;
// Keep the same segmentInfos instance but replace all
// of its SegmentInfo instances. This is so the next
// attempt to commit using this instance of IndexWriter
// will always write to a new generation ("write once").
segmentInfos.Clear();
segmentInfos.AddRange(localRollbackSegmentInfos);
localRollbackSegmentInfos = null;
// Ask deleter to locate unreferenced files we had
// created & remove them:
deleter.Checkpoint(segmentInfos, false);
if (!autoCommit)
// Remove the incRef we did in startTransaction:
deleter.DecRef(segmentInfos);
deleter.Refresh();
FinishMerges(false);
stopMerges = false;
}
/*
* Commits the transaction. This will write the new
* segments file and remove and pending deletions we have
* accumulated during the transaction
*/
private void CommitTransaction()
{
if (infoStream != null)
Message("now commit transaction");
// First restore autoCommit in case we hit an exception below:
autoCommit = localAutoCommit;
bool success = false;
try
{
Checkpoint();
success = true;
}
finally
{
if (!success)
{
if (infoStream != null)
Message("hit exception committing transaction");
RollbackTransaction();
}
}
if (!autoCommit)
// Remove the incRef we did in startTransaction.
deleter.DecRef(localRollbackSegmentInfos);
localRollbackSegmentInfos = null;
// Give deleter a chance to remove files now:
deleter.Checkpoint(segmentInfos, autoCommit);
}
/// Close the IndexWriter
without committing
/// any of the changes that have occurred since it was
/// opened. This removes any temporary files that had been
/// created, after which the state of the index will be the
/// same as it was when this writer was first opened. This
/// can only be called when this IndexWriter was opened
/// with autoCommit=false
.
///
/// IllegalStateException if this is called when
/// the writer was opened with autoCommit=true
.
///
/// IOException if there is a low-level IO error
public virtual void Abort()
{
EnsureOpen();
if (autoCommit)
throw new System.SystemException("abort() can only be called when IndexWriter was opened with autoCommit=false");
bool doClose;
lock (this)
{
// Ensure that only one thread actually gets to do the closing:
if (!closing)
{
doClose = true;
closing = true;
}
else
doClose = false;
}
if (doClose)
{
FinishMerges(false);
// Must pre-close these two, in case they set
// commitPending=true, so that we can then set it to
// false before calling closeInternal
mergePolicy.Close();
mergeScheduler.Close();
lock (this)
{
// Keep the same segmentInfos instance but replace all
// of its SegmentInfo instances. This is so the next
// attempt to commit using this instance of IndexWriter
// will always write to a new generation ("write
// once").
segmentInfos.Clear();
segmentInfos.AddRange(rollbackSegmentInfos);
docWriter.Abort(null);
// Ask deleter to locate unreferenced files & remove
// them:
deleter.Checkpoint(segmentInfos, false);
deleter.Refresh();
}
commitPending = false;
CloseInternal(false);
}
else
WaitForClose();
}
private void FinishMerges(bool waitForMerges)
{
lock (this)
{
if (!waitForMerges)
{
stopMerges = true;
// Abort all pending & running merges:
System.Collections.IEnumerator it = pendingMerges.GetEnumerator();
while (it.MoveNext())
{
MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it.Current;
if (infoStream != null)
Message("now abort pending merge " + merge.SegString(directory));
merge.Abort();
MergeFinish(merge);
}
pendingMerges.Clear();
it = runningMerges.GetEnumerator();
while (it.MoveNext())
{
MergePolicy.OneMerge merge = (MergePolicy.OneMerge)((DictionaryEntry)it.Current).Value;
if (infoStream != null)
Message("now abort running merge " + merge.SegString(directory));
merge.Abort();
}
// These merges periodically check whether they have
// been aborted, and stop if so. We wait here to make
// sure they all stop. It should not take very long
// because the merge threads periodically check if
// they are aborted.
while (runningMerges.Count > 0)
{
if (infoStream != null)
Message("now wait for " + runningMerges.Count + " running merge to abort");
try
{
System.Threading.Monitor.Wait(this);
}
catch (System.Threading.ThreadInterruptedException ie)
{
SupportClass.ThreadClass.Current().Interrupt();
}
}
System.Diagnostics.Debug.Assert(0 == mergingSegments.Count);
if (infoStream != null)
Message("all running merges have aborted");
}
else
{
while (pendingMerges.Count > 0 || runningMerges.Count > 0)
{
try
{
System.Threading.Monitor.Wait(this);
}
catch (System.Threading.ThreadInterruptedException ie)
{
}
}
System.Diagnostics.Debug.Assert(0 == mergingSegments.Count);
}
}
}
/*
* Called whenever the SegmentInfos has been updated and
* the index files referenced exist (correctly) in the
* index directory. If we are in autoCommit mode, we
* commit the change immediately. Else, we mark
* commitPending.
*/
private void Checkpoint()
{
lock (this)
{
if (autoCommit)
{
segmentInfos.Write(directory);
commitPending = false;
if (infoStream != null)
Message("checkpoint: wrote segments file \"" + segmentInfos.GetCurrentSegmentFileName() + "\"");
}
else
{
commitPending = true;
}
}
}
/// Merges all segments from an array of indexes into this index.
///
/// This may be used to parallelize batch indexing. A large document
/// collection can be broken into sub-collections. Each sub-collection can be
/// indexed in parallel, on a different thread, process or machine. The
/// complete index can then be created by merging sub-collection indexes
/// with this method.
///
///
NOTE: the index in each Directory must not be
/// changed (opened by a writer) while this method is
/// running. This method does not acquire a write lock in
/// each input Directory, so it is up to the caller to
/// enforce this.
///
///
After this completes, the index is optimized.
///
///
This method is transactional in how Exceptions are
/// handled: it does not commit a new segments_N file until
/// all indexes are added. This means if an Exception
/// occurs (for example disk full), then either no indexes
/// will have been added or they all will have been.
///
/// If an Exception is hit, it's still possible that all
/// indexes were successfully added. This happens when the
/// Exception is hit when trying to build a CFS file. In
/// this case, one segment in the index will be in non-CFS
/// format, even when using compound file format.
///
/// Also note that on an Exception, the index may still
/// have been partially or fully optimized even though none
/// of the input indexes were added.
///
/// Note that this requires temporary free space in the
/// Directory up to 2X the sum of all input indexes
/// (including the starting index). If readers/searchers
/// are open against the starting index, then temporary
/// free space required will be higher by the size of the
/// starting index (see {@link #Optimize()} for details).
///
///
/// Once this completes, the final size of the index
/// will be less than the sum of all input index sizes
/// (including the starting index). It could be quite a
/// bit smaller (if there were many pending deletes) or
/// just slightly smaller.
///
/// See LUCENE-702
/// for details.
///
/// CorruptIndexException if the index is corrupt
/// IOException if there is a low-level IO error
public virtual void AddIndexes(Directory[] dirs)
{
lock (this)
{
EnsureOpen();
if (infoStream != null)
Message("flush at addIndexes");
Flush();
bool success = false;
StartTransaction();
try
{
for (int i = 0; i < dirs.Length; i++)
{
SegmentInfos sis = new SegmentInfos(); // read infos from dir
sis.Read(dirs[i]);
for (int j = 0; j < sis.Count; j++)
{
segmentInfos.Add(sis.Info(j)); // add each info
}
}
Optimize();
success = true;
}
finally
{
if (success)
{
CommitTransaction();
}
else
{
RollbackTransaction();
}
}
}
}
private void ResetMergeExceptions()
{
lock (this)
{
mergeExceptions = new System.Collections.ArrayList();
mergeGen++;
}
}
/// Merges all segments from an array of indexes into this index.
///
/// This is similar to addIndexes(Directory[]). However, no optimize()
/// is called either at the beginning or at the end. Instead, merges
/// are carried out as necessary.
///
///
NOTE: the index in each Directory must not be
/// changed (opened by a writer) while this method is
/// running. This method does not acquire a write lock in
/// each input Directory, so it is up to the caller to
/// enforce this.
///
///
/// This requires this index not be among those to be added, and the
/// upper bound* of those segment doc counts not exceed maxMergeDocs.
///
///
See {@link #AddIndexes(Directory[])} for
/// details on transactional semantics, temporary free
/// space required in the Directory, and non-CFS segments
/// on an Exception.
///
/// CorruptIndexException if the index is corrupt
/// IOException if there is a low-level IO error
public virtual void AddIndexesNoOptimize(Directory[] dirs)
{
lock (this)
{
EnsureOpen();
if (infoStream != null)
Message("flush at addIndexesNoOptimize");
Flush();
bool success = false;
StartTransaction();
try
{
for (int i = 0; i < dirs.Length; i++)
{
if (directory == dirs[i])
{
// cannot add this index: segments may be deleted in merge before added
throw new System.ArgumentException("Cannot add this index to itself");
}
SegmentInfos sis = new SegmentInfos(); // read infos from dir
sis.Read(dirs[i]);
for (int j = 0; j < sis.Count; j++)
{
SegmentInfo info = sis.Info(j);
segmentInfos.Add(info); // add each info
}
}
MaybeMerge();
// If after merging there remain segments in the index
// that are in a different directory, just copy these
// over into our index. This is necessary (before
// finishing the transaction) to avoid leaving the
// index in an unusable (inconsistent) state.
CopyExternalSegments();
success = true;
}
finally
{
if (success)
{
CommitTransaction();
}
else
{
RollbackTransaction();
}
}
}
}
/* If any of our segments are using a directory != ours
* then copy them over. Currently this is only used by
* addIndexesNoOptimize(). */
private void CopyExternalSegments()
{
lock (this)
{
int numSegments = segmentInfos.Count;
for (int i = 0; i < numSegments; i++)
{
SegmentInfo info = segmentInfos.Info(i);
if (info.dir != directory)
{
MergePolicy.OneMerge merge = new MergePolicy.OneMerge(segmentInfos.Range(i, 1 + i), info.GetUseCompoundFile());
if (RegisterMerge(merge))
{
pendingMerges.Remove(merge);
runningMerges.Add(merge, merge);
Merge(merge);
}
// This means there is a bug in the
// MergeScheduler. MergeSchedulers in general are
// not allowed to run a merge involving segments
// external to this IndexWriter's directory in the
// background because this would put the index
// into an inconsistent state (where segmentInfos
// has been written with such external segments
// that an IndexReader would fail to load).
else
throw new MergePolicy.MergeException("segment \"" + info.name + " exists in external directory yet the MergeScheduler executed the merge in a separate thread");
}
}
}
}
/// Merges the provided indexes into this index.
/// After this completes, the index is optimized.
/// The provided IndexReaders are not closed.
/// See {@link #AddIndexes(Directory[])} for
/// details on transactional semantics, temporary free
/// space required in the Directory, and non-CFS segments
/// on an Exception.
///
/// CorruptIndexException if the index is corrupt
/// IOException if there is a low-level IO error
public virtual void AddIndexes(IndexReader[] readers)
{
lock (this)
{
EnsureOpen();
Optimize(); // start with zero or 1 seg
System.String mergedName = NewSegmentName();
SegmentMerger merger = new SegmentMerger(this, mergedName, null);
SegmentInfo info;
IndexReader sReader = null;
try
{
if (segmentInfos.Count == 1)
{
// add existing index, if any
sReader = SegmentReader.Get(segmentInfos.Info(0));
merger.Add(sReader);
}
for (int i = 0; i < readers.Length; i++)
// add new indexes
merger.Add(readers[i]);
bool success = false;
StartTransaction();
try
{
int docCount = merger.Merge(); // merge 'em
if (sReader != null)
{
sReader.Close();
sReader = null;
}
segmentInfos.RemoveRange(0, segmentInfos.Count); // pop old infos & add new
info = new SegmentInfo(mergedName, docCount, directory, false, true, - 1, null, false);
segmentInfos.Add(info);
success = true;
}
finally
{
if (!success)
{
if (infoStream != null)
Message("hit exception in addIndexes during merge");
RollbackTransaction();
}
else
{
CommitTransaction();
}
}
}
finally
{
if (sReader != null)
{
sReader.Close();
}
}
if (mergePolicy is LogMergePolicy && GetUseCompoundFile())
{
bool success = false;
StartTransaction();
try
{
merger.CreateCompoundFile(mergedName + ".cfs");
info.SetUseCompoundFile(true);
}
finally
{
if (!success)
{
if (infoStream != null)
Message("hit exception building compound file in addIndexes during merge");
RollbackTransaction();
}
else
{
CommitTransaction();
}
}
}
}
}
// This is called after pending added and deleted
// documents have been flushed to the Directory but before
// the change is committed (new segments_N file written).
internal virtual void DoAfterFlush()
{
}
/// Flush all in-memory buffered updates (adds and deletes)
/// to the Directory.
/// Note: if autoCommit=false
, flushed data would still
/// not be visible to readers, until {@link #close} is called.
///
/// CorruptIndexException if the index is corrupt
/// IOException if there is a low-level IO error
public void Flush()
{
Flush(true, false);
}
/// Flush all in-memory buffered udpates (adds and deletes)
/// to the Directory.
///
/// if true, we may merge segments (if
/// deletes or docs were flushed) if necessary
///
/// if false we are allowed to keep
/// doc stores open to share with the next segment
///
public /*protected internal*/ void Flush(bool triggerMerge, bool flushDocStores)
{
EnsureOpen();
if (DoFlush(flushDocStores) && triggerMerge)
MaybeMerge();
}
private bool DoFlush(bool flushDocStores)
{
lock (this)
{
// Make sure no threads are actively adding a document
// Returns true if docWriter is currently aborting, in
// which case we skip flushing this segment
if (docWriter.PauseAllThreads())
{
docWriter.ResumeAllThreads();
return false;
}
try
{
SegmentInfo newSegment = null;
int numDocs = docWriter.GetNumDocsInRAM();
// Always flush docs if there are any
bool flushDocs = numDocs > 0;
// With autoCommit=true we always must flush the doc
// stores when we flush
flushDocStores |= autoCommit;
System.String docStoreSegment = docWriter.GetDocStoreSegment();
if (docStoreSegment == null)
flushDocStores = false;
// Always flush deletes if there are any delete terms.
// TODO: when autoCommit=false we don't have to flush
// deletes with every flushed segment; we can save
// CPU/IO by buffering longer & flushing deletes only
// when they are full or writer is being closed. We
// have to fix the "applyDeletesSelectively" logic to
// apply to more than just the last flushed segment
bool flushDeletes = docWriter.HasDeletes();
if (infoStream != null)
{
Message(" flush: segment=" + docWriter.GetSegment() + " docStoreSegment=" + docWriter.GetDocStoreSegment() + " docStoreOffset=" + docWriter.GetDocStoreOffset() + " flushDocs=" + flushDocs + " flushDeletes=" + flushDeletes + " flushDocStores=" + flushDocStores + " numDocs=" + numDocs + " numBufDelTerms=" + docWriter.GetNumBufferedDeleteTerms());
Message(" index before flush " + SegString());
}
int docStoreOffset = docWriter.GetDocStoreOffset();
// docStoreOffset should only be non-zero when
// autoCommit == false
System.Diagnostics.Debug.Assert(!autoCommit || 0 == docStoreOffset);
bool docStoreIsCompoundFile = false;
// Check if the doc stores must be separately flushed
// because other segments, besides the one we are about
// to flush, reference it
if (flushDocStores && (!flushDocs || !docWriter.GetSegment().Equals(docWriter.GetDocStoreSegment())))
{
// We must separately flush the doc store
if (infoStream != null)
Message(" flush shared docStore segment " + docStoreSegment);
docStoreIsCompoundFile = FlushDocStores();
flushDocStores = false;
}
System.String segment = docWriter.GetSegment();
// If we are flushing docs, segment must not be null:
System.Diagnostics.Debug.Assert(segment != null || !flushDocs);
if (flushDocs || flushDeletes)
{
SegmentInfos rollback = null;
if (flushDeletes)
rollback = (SegmentInfos) segmentInfos.Clone();
bool success = false;
try
{
if (flushDocs)
{
if (0 == docStoreOffset && flushDocStores)
{
// This means we are flushing private doc stores
// with this segment, so it will not be shared
// with other segments
System.Diagnostics.Debug.Assert(docStoreSegment != null);
System.Diagnostics.Debug.Assert(docStoreSegment.Equals(segment));
docStoreOffset = - 1;
docStoreIsCompoundFile = false;
docStoreSegment = null;
}
int flushedDocCount = docWriter.Flush(flushDocStores);
newSegment = new SegmentInfo(segment, flushedDocCount, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile);
segmentInfos.Add(newSegment);
}
if (flushDeletes)
{
// we should be able to change this so we can
// buffer deletes longer and then flush them to
// multiple flushed segments, when
// autoCommit=false
ApplyDeletes(flushDocs);
DoAfterFlush();
}
Checkpoint();
success = true;
}
finally
{
if (!success)
{
if (infoStream != null)
Message("hit exception flushing segment " + segment);
if (flushDeletes)
{
// Carefully check if any partial .del files
// should be removed:
int size = rollback.Count;
for (int i = 0; i < size; i++)
{
System.String newDelFileName = segmentInfos.Info(i).GetDelFileName();
System.String delFileName = rollback.Info(i).GetDelFileName();
if (newDelFileName != null && !newDelFileName.Equals(delFileName))
deleter.DeleteFile(newDelFileName);
}
// Fully replace the segmentInfos since flushed
// deletes could have changed any of the
// SegmentInfo instances:
segmentInfos.Clear();
segmentInfos.AddRange(rollback);
}
else
{
// Remove segment we added, if any:
if (newSegment != null && segmentInfos.Count > 0 && segmentInfos.Info(segmentInfos.Count - 1) == newSegment)
segmentInfos.RemoveAt(segmentInfos.Count - 1);
}
if (flushDocs)
docWriter.Abort(null);
DeletePartialSegmentsFile();
deleter.Checkpoint(segmentInfos, false);
if (segment != null)
deleter.Refresh(segment);
}
}
deleter.Checkpoint(segmentInfos, autoCommit);
if (flushDocs && mergePolicy.UseCompoundFile(segmentInfos, newSegment))
{
success = false;
try
{
docWriter.CreateCompoundFile(segment);
newSegment.SetUseCompoundFile(true);
Checkpoint();
success = true;
}
finally
{
if (!success)
{
if (infoStream != null)
Message("hit exception creating compound file for newly flushed segment " + segment);
newSegment.SetUseCompoundFile(false);
deleter.DeleteFile(segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
DeletePartialSegmentsFile();
}
}
deleter.Checkpoint(segmentInfos, autoCommit);
}
return true;
}
else
{
return false;
}
}
finally
{
docWriter.ClearFlushPending();
docWriter.ResumeAllThreads();
}
}
}
/// Expert: Return the total size of all index files currently cached in memory.
/// Useful for size management with flushRamDocs()
///
public long RamSizeInBytes()
{
EnsureOpen();
return docWriter.GetRAMUsed();
}
/// Expert: Return the number of documents whose segments are currently cached in memory.
/// Useful when calling flush()
///
public int NumRamDocs()
{
lock (this)
{
EnsureOpen();
return docWriter.GetNumDocsInRAM();
}
}
private int EnsureContiguousMerge(MergePolicy.OneMerge merge)
{
int first = segmentInfos.IndexOf(merge.segments.Info(0));
if (first == - 1)
throw new MergePolicy.MergeException("could not find segment " + merge.segments.Info(0).name + " in current segments");
int numSegments = segmentInfos.Count;
int numSegmentsToMerge = merge.segments.Count;
for (int i = 0; i < numSegmentsToMerge; i++)
{
SegmentInfo info = merge.segments.Info(i);
if (first + i >= numSegments || !segmentInfos.Info(first + i).Equals(info))
{
if (segmentInfos.IndexOf(info) == - 1)
throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the index");
else
{
throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge + " vs " + SegString() + "), which IndexWriter (currently) cannot handle");
}
}
}
return first;
}
/* FIXME if we want to support non-contiguous segment merges */
private bool CommitMerge(MergePolicy.OneMerge merge)
{
lock (this)
{
System.Diagnostics.Debug.Assert(merge.registerDone);
// If merge was explicitly aborted, or, if abort() or
// rollbackTransaction() had been called since our merge
// started (which results in an unqualified
// deleter.refresh() call that will remove any index
// file that current segments does not reference), we
// abort this merge
if (merge.IsAborted())
{
if (infoStream != null)
Message("commitMerge: skipping merge " + merge.SegString(directory) + ": it was aborted");
System.Diagnostics.Debug.Assert(merge.increfDone);
DecrefMergeSegments(merge);
deleter.Refresh(merge.info.name);
return false;
}
bool success = false;
int start;
try
{
SegmentInfos sourceSegmentsClone = merge.segmentsClone;
SegmentInfos sourceSegments = merge.segments;
start = EnsureContiguousMerge(merge);
if (infoStream != null)
Message("commitMerge " + merge.SegString(directory));
// Carefully merge deletes that occurred after we
// started merging:
BitVector deletes = null;
int docUpto = 0;
int numSegmentsToMerge = sourceSegments.Count;
for (int i = 0; i < numSegmentsToMerge; i++)
{
SegmentInfo previousInfo = sourceSegmentsClone.Info(i);
SegmentInfo currentInfo = sourceSegments.Info(i);
System.Diagnostics.Debug.Assert(currentInfo.docCount == previousInfo.docCount);
int docCount = currentInfo.docCount;
if (previousInfo.HasDeletions())
{
// There were deletes on this segment when the merge
// started. The merge has collapsed away those
// deletes, but, if new deletes were flushed since
// the merge started, we must now carefully keep any
// newly flushed deletes but mapping them to the new
// docIDs.
System.Diagnostics.Debug.Assert(currentInfo.HasDeletions());
// Load deletes present @ start of merge, for this segment:
BitVector previousDeletes = new BitVector(previousInfo.dir, previousInfo.GetDelFileName());
if (!currentInfo.GetDelFileName().Equals(previousInfo.GetDelFileName()))
{
// This means this segment has had new deletes
// committed since we started the merge, so we
// must merge them:
if (deletes == null)
deletes = new BitVector(merge.info.docCount);
BitVector currentDeletes = new BitVector(currentInfo.dir, currentInfo.GetDelFileName());
for (int j = 0; j < docCount; j++)
{
if (previousDeletes.Get(j))
System.Diagnostics.Debug.Assert(currentDeletes.Get(j));
else
{
if (currentDeletes.Get(j))
deletes.Set(docUpto);
docUpto++;
}
}
}
else
docUpto += docCount - previousDeletes.Count();
}
else if (currentInfo.HasDeletions())
{
// This segment had no deletes before but now it
// does:
if (deletes == null)
deletes = new BitVector(merge.info.docCount);
BitVector currentDeletes = new BitVector(directory, currentInfo.GetDelFileName());
for (int j = 0; j < docCount; j++)
{
if (currentDeletes.Get(j))
deletes.Set(docUpto);
docUpto++;
}
}
// No deletes before or after
else
docUpto += currentInfo.docCount;
merge.CheckAborted(directory);
}
if (deletes != null)
{
merge.info.AdvanceDelGen();
deletes.Write(directory, merge.info.GetDelFileName());
}
success = true;
}
finally
{
if (!success)
{
if (infoStream != null)
Message("hit exception creating merged deletes file");
deleter.Refresh(merge.info.name);
}
}
// Simple optimization: if the doc store we are using
// has been closed and is in now compound format (but
// wasn't when we started), then we will switch to the
// compound format as well:
System.String mergeDocStoreSegment = merge.info.GetDocStoreSegment();
if (mergeDocStoreSegment != null && !merge.info.GetDocStoreIsCompoundFile())
{
int size = segmentInfos.Count;
for (int i = 0; i < size; i++)
{
SegmentInfo info = segmentInfos.Info(i);
System.String docStoreSegment = info.GetDocStoreSegment();
if (docStoreSegment != null && docStoreSegment.Equals(mergeDocStoreSegment) && info.GetDocStoreIsCompoundFile())
{
merge.info.SetDocStoreIsCompoundFile(true);
break;
}
}
}
success = false;
SegmentInfos rollback = null;
try
{
rollback = (SegmentInfos) segmentInfos.Clone();
((System.Collections.IList) ((System.Collections.ArrayList) segmentInfos).GetRange(start, start + merge.segments.Count - start)).Clear();
segmentInfos.Insert(start, merge.info);
Checkpoint();
success = true;
}
finally
{
if (!success && rollback != null)
{
if (infoStream != null)
Message("hit exception when checkpointing after merge");
segmentInfos.Clear();
segmentInfos.AddRange(rollback);
DeletePartialSegmentsFile();
deleter.Refresh(merge.info.name);
}
}
if (merge.optimize)
segmentsToOptimize.Add(merge.info, merge.info);
// Must checkpoint before decrefing so any newly
// referenced files in the new merge.info are incref'd
// first:
deleter.Checkpoint(segmentInfos, autoCommit);
DecrefMergeSegments(merge);
return true;
}
}
private void DecrefMergeSegments(MergePolicy.OneMerge merge)
{
SegmentInfos sourceSegmentsClone = merge.segmentsClone;
int numSegmentsToMerge = sourceSegmentsClone.Count;
System.Diagnostics.Debug.Assert(merge.increfDone);
merge.increfDone = false;
for (int i = 0; i < numSegmentsToMerge; i++)
{
SegmentInfo previousInfo = sourceSegmentsClone.Info(i);
// Decref all files for this SegmentInfo (this
// matches the incref in mergeInit):
if (previousInfo.dir == directory)
deleter.DecRef(previousInfo.Files());
}
}
/// Merges the indicated segments, replacing them in the stack with a
/// single segment.
///
public /*internal*/ void Merge(MergePolicy.OneMerge merge)
{
System.Diagnostics.Debug.Assert(merge.registerDone);
System.Diagnostics.Debug.Assert(!merge.optimize || merge.maxNumSegmentsOptimize > 0);
bool success = false;
try
{
try
{
if (merge.info == null)
MergeInit(merge);
if (infoStream != null)
Message("now merge\n merge=" + merge.SegString(directory) + "\n index=" + SegString());
MergeMiddle(merge);
success = true;
}
catch (MergePolicy.MergeAbortedException e)
{
merge.SetException(e);
AddMergeException(merge);
// We can ignore this exception, unless the merge
// involves segments from external directories, in
// which case we must throw it so, for example, the
// rollbackTransaction code in addIndexes* is
// executed.
if (merge.isExternal)
throw e;
}
}
finally
{
lock (this)
{
try
{
if (!success && infoStream != null)
Message("hit exception during merge");
MergeFinish(merge);
// This merge (and, generally, any change to the
// segments) may now enable new merges, so we call
// merge policy & update pending merges.
if (success && !merge.IsAborted() && !closed && !closing)
UpdatePendingMerges(merge.maxNumSegmentsOptimize, merge.optimize);
}
finally
{
runningMerges.Remove(merge);
// Optimize may be waiting on the final optimize
// merge to finish; and finishMerges() may be
// waiting for all merges to finish:
System.Threading.Monitor.PulseAll(this);
}
}
}
}
/// Checks whether this merge involves any segments
/// already participating in a merge. If not, this merge
/// is "registered", meaning we record that its segments
/// are now participating in a merge, and true is
/// returned. Else (the merge conflicts) false is
/// returned.
///
internal bool RegisterMerge(MergePolicy.OneMerge merge)
{
lock (this)
{
if (merge.registerDone)
return true;
int count = merge.segments.Count;
bool isExternal = false;
for (int i = 0; i < count; i++)
{
SegmentInfo info = merge.segments.Info(i);
if (mergingSegments.Contains(info))
return false;
if (segmentInfos.IndexOf(info) == - 1)
return false;
if (info.dir != directory)
isExternal = true;
}
pendingMerges.Add(merge);
if (infoStream != null)
Message("add merge to pendingMerges: " + merge.SegString(directory) + " [total " + pendingMerges.Count + " pending]");
merge.mergeGen = mergeGen;
merge.isExternal = isExternal;
// OK it does not conflict; now record that this merge
// is running (while synchronized) to avoid race
// condition where two conflicting merges from different
// threads, start
for (int i = 0; i < count; i++)
if (!mergingSegments.Contains(merge.segments.Info(i)))
mergingSegments.Add(merge.segments.Info(i), merge.segments.Info(i));
// Merge is now registered
merge.registerDone = true;
return true;
}
}
/// Does initial setup for a merge, which is fast but holds
/// the synchronized lock on IndexWriter instance.
///
internal void MergeInit(MergePolicy.OneMerge merge)
{
lock (this)
{
System.Diagnostics.Debug.Assert(merge.registerDone);
if (merge.IsAborted())
return ;
SegmentInfos sourceSegments = merge.segments;
int end = sourceSegments.Count;
EnsureContiguousMerge(merge);
// Check whether this merge will allow us to skip
// merging the doc stores (stored field & vectors).
// This is a very substantial optimization (saves tons
// of IO) that can only be applied with
// autoCommit=false.
Directory lastDir = directory;
System.String lastDocStoreSegment = null;
int next = - 1;
bool mergeDocStores = false;
bool doFlushDocStore = false;
System.String currentDocStoreSegment = docWriter.GetDocStoreSegment();
// Test each segment to be merged: check if we need to
// flush/merge doc stores
for (int i = 0; i < end; i++)
{
SegmentInfo si = sourceSegments.Info(i);
// If it has deletions we must merge the doc stores
if (si.HasDeletions())
mergeDocStores = true;
// If it has its own (private) doc stores we must
// merge the doc stores
if (- 1 == si.GetDocStoreOffset())
mergeDocStores = true;
// If it has a different doc store segment than
// previous segments, we must merge the doc stores
System.String docStoreSegment = si.GetDocStoreSegment();
if (docStoreSegment == null)
mergeDocStores = true;
else if (lastDocStoreSegment == null)
lastDocStoreSegment = docStoreSegment;
else if (!lastDocStoreSegment.Equals(docStoreSegment))
mergeDocStores = true;
// Segments' docScoreOffsets must be in-order,
// contiguous. For the default merge policy now
// this will always be the case but for an arbitrary
// merge policy this may not be the case
if (- 1 == next)
next = si.GetDocStoreOffset() + si.docCount;
else if (next != si.GetDocStoreOffset())
mergeDocStores = true;
else
next = si.GetDocStoreOffset() + si.docCount;
// If the segment comes from a different directory
// we must merge
if (lastDir != si.dir)
mergeDocStores = true;
// If the segment is referencing the current "live"
// doc store outputs then we must merge
if (si.GetDocStoreOffset() != - 1 && currentDocStoreSegment != null && si.GetDocStoreSegment().Equals(currentDocStoreSegment))
doFlushDocStore = true;
}
int docStoreOffset;
System.String docStoreSegment2;
bool docStoreIsCompoundFile;
if (mergeDocStores)
{
docStoreOffset = - 1;
docStoreSegment2 = null;
docStoreIsCompoundFile = false;
}
else
{
SegmentInfo si = sourceSegments.Info(0);
docStoreOffset = si.GetDocStoreOffset();
docStoreSegment2 = si.GetDocStoreSegment();
docStoreIsCompoundFile = si.GetDocStoreIsCompoundFile();
}
if (mergeDocStores && doFlushDocStore)
{
// SegmentMerger intends to merge the doc stores
// (stored fields, vectors), and at least one of the
// segments to be merged refers to the currently
// live doc stores.
// TODO: if we know we are about to merge away these
// newly flushed doc store files then we should not
// make compound file out of them...
if (infoStream != null)
Message("flush at merge");
Flush(false, true);
}
// We must take a full copy at this point so that we can
// properly merge deletes in commitMerge()
merge.segmentsClone = (SegmentInfos) merge.segments.Clone();
for (int i = 0; i < end; i++)
{
SegmentInfo si = merge.segmentsClone.Info(i);
// IncRef all files for this segment info to make sure
// they are not removed while we are trying to merge.
if (si.dir == directory)
deleter.IncRef(si.Files());
}
merge.increfDone = true;
merge.mergeDocStores = mergeDocStores;
// Bind a new segment name here so even with
// ConcurrentMergePolicy we keep deterministic segment
// names.
merge.info = new SegmentInfo(NewSegmentName(), 0, directory, false, true, docStoreOffset, docStoreSegment2, docStoreIsCompoundFile);
// Also enroll the merged segment into mergingSegments;
// this prevents it from getting selected for a merge
// after our merge is done but while we are building the
// CFS:
mergingSegments.Add(merge.info, merge.info);
}
}
/// Does fininishing for a merge, which is fast but holds
/// the synchronized lock on IndexWriter instance.
///
internal void MergeFinish(MergePolicy.OneMerge merge)
{
lock (this)
{
if (merge.increfDone)
DecrefMergeSegments(merge);
System.Diagnostics.Debug.Assert(merge.registerDone);
SegmentInfos sourceSegments = merge.segments;
int end = sourceSegments.Count;
for (int i = 0; i < end; i++)
mergingSegments.Remove(sourceSegments.Info(i));
if (merge.info != null)
mergingSegments.Remove(merge.info);
merge.registerDone = false;
}
}
/// Does the actual (time-consuming) work of the merge,
/// but without holding synchronized lock on IndexWriter
/// instance
///
private int MergeMiddle(MergePolicy.OneMerge merge)
{
merge.CheckAborted(directory);
System.String mergedName = merge.info.name;
SegmentMerger merger = null;
int mergedDocCount = 0;
SegmentInfos sourceSegments = merge.segments;
SegmentInfos sourceSegmentsClone = merge.segmentsClone;
int numSegments = sourceSegments.Count;
if (infoStream != null)
Message("merging " + merge.SegString(directory));
merger = new SegmentMerger(this, mergedName, merge);
// This is try/finally to make sure merger's readers are
// closed:
bool success = false;
try
{
int totDocCount = 0;
for (int i = 0; i < numSegments; i++)
{
SegmentInfo si = sourceSegmentsClone.Info(i);
IndexReader reader = SegmentReader.Get(si, MERGE_READ_BUFFER_SIZE, merge.mergeDocStores); // no need to set deleter (yet)
merger.Add(reader);
totDocCount += reader.NumDocs();
}
if (infoStream != null)
{
Message("merge: total " + totDocCount + " docs");
}
merge.CheckAborted(directory);
mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores);
System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount);
success = true;
}
finally
{
// close readers before we attempt to delete
// now-obsolete segments
if (merger != null)
{
merger.CloseReaders();
}
if (!success)
{
if (infoStream != null)
Message("hit exception during merge; now refresh deleter on segment " + mergedName);
lock (this)
{
AddMergeException(merge);
deleter.Refresh(mergedName);
}
}
}
if (!CommitMerge(merge))
// commitMerge will return false if this merge was aborted
return 0;
if (merge.useCompoundFile)
{
success = false;
bool skip = false;
System.String compoundFileName = mergedName + "." + IndexFileNames.COMPOUND_FILE_EXTENSION;
try
{
try
{
merger.CreateCompoundFile(compoundFileName);
success = true;
}
catch (System.IO.IOException ioe)
{
lock (this)
{
if (segmentInfos.IndexOf(merge.info) == - 1)
{
// If another merge kicked in and merged our
// new segment away while we were trying to
// build the compound file, we can hit a
// FileNotFoundException and possibly
// IOException over NFS. We can tell this has
// happened because our SegmentInfo is no
// longer in the segments; if this has
// happened it is safe to ignore the exception
// & skip finishing/committing our compound
// file creating.
if (infoStream != null)
Message("hit exception creating compound file; ignoring it because our info (segment " + merge.info.name + ") has been merged away");
skip = true;
}
else
throw ioe;
}
}
}
finally
{
if (!success)
{
if (infoStream != null)
Message("hit exception creating compound file during merge: skip=" + skip);
lock (this)
{
if (!skip)
AddMergeException(merge);
deleter.DeleteFile(compoundFileName);
}
}
}
if (!skip)
{
lock (this)
{
if (skip || segmentInfos.IndexOf(merge.info) == - 1 || merge.IsAborted())
{
// Our segment (committed in non-compound
// format) got merged away while we were
// building the compound format.
deleter.DeleteFile(compoundFileName);
}
else
{
success = false;
try
{
merge.info.SetUseCompoundFile(true);
Checkpoint();
success = true;
}
finally
{
if (!success)
{
if (infoStream != null)
Message("hit exception checkpointing compound file during merge");
// Must rollback:
AddMergeException(merge);
merge.info.SetUseCompoundFile(false);
DeletePartialSegmentsFile();
deleter.DeleteFile(compoundFileName);
}
}
// Give deleter a chance to remove files now.
deleter.Checkpoint(segmentInfos, autoCommit);
}
}
}
}
return mergedDocCount;
}
internal virtual void AddMergeException(MergePolicy.OneMerge merge)
{
lock (this)
{
if (!mergeExceptions.Contains(merge) && mergeGen == merge.mergeGen)
mergeExceptions.Add(merge);
}
}
private void DeletePartialSegmentsFile()
{
if (segmentInfos.GetLastGeneration() != segmentInfos.GetGeneration())
{
System.String segmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", segmentInfos.GetGeneration());
if (infoStream != null)
Message("now delete partial segments file \"" + segmentFileName + "\"");
deleter.DeleteFile(segmentFileName);
}
}
// Called during flush to apply any buffered deletes. If
// flushedNewSegment is true then a new segment was just
// created and flushed from the ram segments, so we will
// selectively apply the deletes to that new segment.
private void ApplyDeletes(bool flushedNewSegment)
{
System.Collections.Hashtable bufferedDeleteTerms = docWriter.GetBufferedDeleteTerms();
System.Collections.IList bufferedDeleteDocIDs = docWriter.GetBufferedDeleteDocIDs();
if (infoStream != null)
Message("flush " + docWriter.GetNumBufferedDeleteTerms() + " buffered deleted terms and " + bufferedDeleteDocIDs.Count + " deleted docIDs on " + segmentInfos.Count + " segments.");
if (flushedNewSegment)
{
IndexReader reader = null;
try
{
// Open readers w/o opening the stored fields /
// vectors because these files may still be held
// open for writing by docWriter
reader = SegmentReader.Get(segmentInfos.Info(segmentInfos.Count - 1), false);
// Apply delete terms to the segment just flushed from ram
// apply appropriately so that a delete term is only applied to
// the documents buffered before it, not those buffered after it.
ApplyDeletesSelectively(bufferedDeleteTerms, bufferedDeleteDocIDs, reader);
}
finally
{
if (reader != null)
{
try
{
reader.DoCommit();
}
finally
{
reader.DoClose();
}
}
}
}
int infosEnd = segmentInfos.Count;
if (flushedNewSegment)
{
infosEnd--;
}
for (int i = 0; i < infosEnd; i++)
{
IndexReader reader = null;
try
{
reader = SegmentReader.Get(segmentInfos.Info(i), false);
// Apply delete terms to disk segments
// except the one just flushed from ram.
ApplyDeletes(bufferedDeleteTerms, reader);
}
finally
{
if (reader != null)
{
try
{
reader.DoCommit();
}
finally
{
reader.DoClose();
}
}
}
}
// Clean up bufferedDeleteTerms.
docWriter.ClearBufferedDeletes();
}
// For test purposes.
public /*internal*/ int GetBufferedDeleteTermsSize()
{
lock (this)
{
return docWriter.GetBufferedDeleteTerms().Count;
}
}
// For test purposes.
public /*internal*/ int GetNumBufferedDeleteTerms()
{
lock (this)
{
return docWriter.GetNumBufferedDeleteTerms();
}
}
// Apply buffered delete terms to the segment just flushed from ram
// apply appropriately so that a delete term is only applied to
// the documents buffered before it, not those buffered after it.
private void ApplyDeletesSelectively(System.Collections.Hashtable deleteTerms, System.Collections.IList deleteIds, IndexReader reader)
{
System.Collections.IEnumerator iter = new System.Collections.Hashtable(deleteTerms).GetEnumerator();
while (iter.MoveNext())
{
System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current;
Term term = (Term) entry.Key;
TermDocs docs = reader.TermDocs(term);
if (docs != null)
{
int num = ((DocumentsWriter.Num) entry.Value).GetNum();
try
{
while (docs.Next())
{
int doc = docs.Doc();
if (doc >= num)
{
break;
}
reader.DeleteDocument(doc);
}
}
finally
{
docs.Close();
}
}
}
if (deleteIds.Count > 0)
{
iter = deleteIds.GetEnumerator();
while (iter.MoveNext())
{
reader.DeleteDocument(((System.Int32) iter.Current));
}
}
}
// Apply buffered delete terms to this reader.
private void ApplyDeletes(System.Collections.Hashtable deleteTerms, IndexReader reader)
{
System.Collections.IEnumerator iter = new System.Collections.Hashtable(deleteTerms).GetEnumerator();
while (iter.MoveNext())
{
System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current;
reader.DeleteDocuments((Term) entry.Key);
}
}
// utility routines for tests
public /*internal*/ virtual SegmentInfo NewestSegment()
{
return segmentInfos.Info(segmentInfos.Count - 1);
}
public virtual System.String SegString()
{
lock (this)
{
System.Text.StringBuilder buffer = new System.Text.StringBuilder();
for (int i = 0; i < segmentInfos.Count; i++)
{
if (i > 0)
{
buffer.Append(' ');
}
buffer.Append(segmentInfos.Info(i).SegString(directory));
}
return buffer.ToString();
}
}
static IndexWriter()
{
DEFAULT_MERGE_FACTOR = LogMergePolicy.DEFAULT_MERGE_FACTOR;
DEFAULT_MAX_MERGE_DOCS = LogDocMergePolicy.DEFAULT_MAX_MERGE_DOCS;
MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH;
}
}
}