/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using Analyzer = Lucene.Net.Analysis.Analyzer; using Document = Lucene.Net.Documents.Document; using Directory = Lucene.Net.Store.Directory; using FSDirectory = Lucene.Net.Store.FSDirectory; namespace Lucene.Net.Index { ///

[Note that as of 2.1, all but one of the /// methods in this class are available via {@link /// IndexWriter}. The one method that is not available is /// {@link #DeleteDocument(int)}.]

/// /// A class to modify an index, i.e. to delete and add documents. This /// class hides {@link IndexReader} and {@link IndexWriter} so that you /// do not need to care about implementation details such as that adding /// documents is done via IndexWriter and deletion is done via IndexReader. /// ///

Note that you cannot create more than one IndexModifier object /// on the same directory at the same time. /// ///

Example usage: /// /// /// /// /// ///

/// /// /// /// /// /// ///
/// ///     Analyzer analyzer = new StandardAnalyzer();
///     // create an index in /tmp/index, overwriting an existing one:
///     IndexModifier indexModifier = new IndexModifier("/tmp/index", analyzer, true);
///     Document doc = new Document();
///     doc.add(new Field("id""1", Field.Store.YES, Field.Index.UN_TOKENIZED));
///     doc.add(new Field("body""a simple test", Field.Store.YES, Field.Index.TOKENIZED));
///     indexModifier.addDocument(doc);
///     int deleted = indexModifier.delete(new Term("id""1"));
///     System.out.println("Deleted " + deleted + " document");
///     indexModifier.flush();
///     System.out.println(indexModifier.docCount() " docs in index");
///     indexModifier.close();
///
///
/// /// /// ///

Not all methods of IndexReader and IndexWriter are offered by this /// class. If you need access to additional methods, either use those classes /// directly or implement your own class that extends IndexModifier. /// ///

Although an instance of this class can be used from more than one /// thread, you will not get the best performance. You might want to use /// IndexReader and IndexWriter directly for that (but you will need to /// care about synchronization yourself then). /// ///

While you can freely mix calls to add() and delete() using this class, /// you should batch you calls for best performance. For example, if you /// want to update 20 documents, you should first delete all those documents, /// then add all the new documents. /// ///

/// Daniel Naber /// public class IndexModifier { private void InitBlock() { maxBufferedDocs = IndexWriter.DEFAULT_MAX_BUFFERED_DOCS; maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH; mergeFactor = IndexWriter.DEFAULT_MERGE_FACTOR; } protected internal IndexWriter indexWriter = null; protected internal IndexReader indexReader = null; protected internal Directory directory = null; protected internal Analyzer analyzer = null; protected internal bool open = false; // Lucene defaults: protected internal System.IO.StreamWriter infoStream = null; protected internal bool useCompoundFile = true; protected internal int maxBufferedDocs; protected internal int maxFieldLength; protected internal int mergeFactor; /// Open an index with write access. /// /// /// the index directory /// /// the analyzer to use for adding new documents /// /// true to create the index or overwrite the existing one; /// false to append to the existing index /// public IndexModifier(Directory directory, Analyzer analyzer, bool create) { InitBlock(); Init(directory, analyzer, create); } /// Open an index with write access. /// /// /// the index directory /// /// the analyzer to use for adding new documents /// /// true to create the index or overwrite the existing one; /// false to append to the existing index /// public IndexModifier(System.String dirName, Analyzer analyzer, bool create) { InitBlock(); Directory dir = FSDirectory.GetDirectory(dirName); Init(dir, analyzer, create); } /// Open an index with write access. /// /// /// the index directory /// /// the analyzer to use for adding new documents /// /// true to create the index or overwrite the existing one; /// false to append to the existing index /// public IndexModifier(System.IO.FileInfo file, Analyzer analyzer, bool create) { InitBlock(); Directory dir = FSDirectory.GetDirectory(file); Init(dir, analyzer, create); } /// Initialize an IndexWriter. /// IOException protected internal virtual void Init(Directory directory, Analyzer analyzer, bool create) { this.directory = directory; lock (this.directory) { this.analyzer = analyzer; indexWriter = new IndexWriter(directory, analyzer, create); open = true; } } /// Throw an IllegalStateException if the index is closed. /// IllegalStateException protected internal virtual void AssureOpen() { if (!open) { throw new System.SystemException("Index is closed"); } } /// Close the IndexReader and open an IndexWriter. /// IOException protected internal virtual void CreateIndexWriter() { if (indexWriter == null) { if (indexReader != null) { indexReader.Close(); indexReader = null; } indexWriter = new IndexWriter(directory, analyzer, false); indexWriter.SetInfoStream(infoStream); indexWriter.SetUseCompoundFile(useCompoundFile); indexWriter.SetMaxBufferedDocs(maxBufferedDocs); indexWriter.SetMaxFieldLength(maxFieldLength); indexWriter.SetMergeFactor(mergeFactor); } } /// Close the IndexWriter and open an IndexReader. /// IOException protected internal virtual void CreateIndexReader() { if (indexReader == null) { if (indexWriter != null) { indexWriter.Close(); indexWriter = null; } indexReader = IndexReader.Open(directory); } } /// Make sure all changes are written to disk. /// IOException public virtual void Flush() { lock (directory) { AssureOpen(); if (indexWriter != null) { indexWriter.Close(); indexWriter = null; CreateIndexWriter(); } else { indexReader.Close(); indexReader = null; CreateIndexReader(); } } } /// Adds a document to this index, using the provided analyzer instead of the /// one specific in the constructor. If the document contains more than /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are /// discarded. /// /// /// /// IllegalStateException if the index is closed public virtual void AddDocument(Document doc, Analyzer docAnalyzer) { lock (directory) { AssureOpen(); CreateIndexWriter(); if (docAnalyzer != null) indexWriter.AddDocument(doc, docAnalyzer); else indexWriter.AddDocument(doc); } } /// Adds a document to this index. If the document contains more than /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are /// discarded. /// /// /// /// IllegalStateException if the index is closed public virtual void AddDocument(Document doc) { AddDocument(doc, null); } /// Deletes all documents containing term. /// This is useful if one uses a document field to hold a unique ID string for /// the document. Then to delete such a document, one merely constructs a /// term with the appropriate field and the unique ID string as its text and /// passes it to this method. Returns the number of documents deleted. /// /// the number of documents deleted /// /// /// /// IllegalStateException if the index is closed public virtual int DeleteDocuments(Term term) { lock (directory) { AssureOpen(); CreateIndexReader(); return indexReader.DeleteDocuments(term); } } /// Deletes the document numbered docNum. /// /// /// IllegalStateException if the index is closed public virtual void DeleteDocument(int docNum) { lock (directory) { AssureOpen(); CreateIndexReader(); indexReader.DeleteDocument(docNum); } } /// Returns the number of documents currently in this index. /// /// /// /// /// IllegalStateException if the index is closed public virtual int DocCount() { lock (directory) { AssureOpen(); if (indexWriter != null) { return indexWriter.DocCount(); } else { return indexReader.NumDocs(); } } } /// Merges all segments together into a single segment, optimizing an index /// for search. /// /// /// /// IllegalStateException if the index is closed public virtual void Optimize() { lock (directory) { AssureOpen(); CreateIndexWriter(); indexWriter.Optimize(); } } /// If non-null, information about merges and a message when /// {@link #GetMaxFieldLength()} is reached will be printed to this. ///

Example: index.setInfoStream(System.err); ///

/// /// /// IllegalStateException if the index is closed public virtual void SetInfoStream(System.IO.StreamWriter infoStream) { lock (directory) { AssureOpen(); if (indexWriter != null) { indexWriter.SetInfoStream(infoStream); } this.infoStream = infoStream; } } /// IOException /// /// public virtual System.IO.TextWriter GetInfoStream() { lock (directory) { AssureOpen(); CreateIndexWriter(); return indexWriter.GetInfoStream(); } } /// Setting to turn on usage of a compound file. When on, multiple files /// for each segment are merged into a single file once the segment creation /// is finished. This is done regardless of what directory is in use. /// /// /// /// IllegalStateException if the index is closed public virtual void SetUseCompoundFile(bool useCompoundFile) { lock (directory) { AssureOpen(); if (indexWriter != null) { indexWriter.SetUseCompoundFile(useCompoundFile); } this.useCompoundFile = useCompoundFile; } } /// IOException /// /// public virtual bool GetUseCompoundFile() { lock (directory) { AssureOpen(); CreateIndexWriter(); return indexWriter.GetUseCompoundFile(); } } /// The maximum number of terms that will be indexed for a single field in a /// document. This limits the amount of memory required for indexing, so that /// collections with very large files will not crash the indexing process by /// running out of memory.

/// Note that this effectively truncates large documents, excluding from the /// index terms that occur further in the document. If you know your source /// documents are large, be sure to set this value high enough to accomodate /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit /// is your memory, but you should anticipate an OutOfMemoryError.

/// By default, no more than 10,000 terms will be indexed for a field. ///

/// /// /// IllegalStateException if the index is closed public virtual void SetMaxFieldLength(int maxFieldLength) { lock (directory) { AssureOpen(); if (indexWriter != null) { indexWriter.SetMaxFieldLength(maxFieldLength); } this.maxFieldLength = maxFieldLength; } } /// IOException /// /// public virtual int GetMaxFieldLength() { lock (directory) { AssureOpen(); CreateIndexWriter(); return indexWriter.GetMaxFieldLength(); } } /// Determines the minimal number of documents required before the buffered /// in-memory documents are merging and a new Segment is created. /// Since Documents are merged in a {@link Lucene.Net.Store.RAMDirectory}, /// large value gives faster indexing. At the same time, mergeFactor limits /// the number of files open in a FSDirectory. /// ///

The default value is 10. /// ///

/// /// /// IllegalStateException if the index is closed /// IllegalArgumentException if maxBufferedDocs is smaller than 2 public virtual void SetMaxBufferedDocs(int maxBufferedDocs) { lock (directory) { AssureOpen(); if (indexWriter != null) { indexWriter.SetMaxBufferedDocs(maxBufferedDocs); } this.maxBufferedDocs = maxBufferedDocs; } } /// IOException /// /// public virtual int GetMaxBufferedDocs() { lock (directory) { AssureOpen(); CreateIndexWriter(); return indexWriter.GetMaxBufferedDocs(); } } /// Determines how often segment indices are merged by addDocument(). With /// smaller values, less RAM is used while indexing, and searches on /// unoptimized indices are faster, but indexing speed is slower. With larger /// values, more RAM is used during indexing, and while searches on unoptimized /// indices are slower, indexing is faster. Thus larger values (> 10) are best /// for batch index creation, and smaller values (< 10) for indices that are /// interactively maintained. ///

This must never be less than 2. The default value is 10. /// ///

/// /// /// IllegalStateException if the index is closed public virtual void SetMergeFactor(int mergeFactor) { lock (directory) { AssureOpen(); if (indexWriter != null) { indexWriter.SetMergeFactor(mergeFactor); } this.mergeFactor = mergeFactor; } } /// IOException /// /// public virtual int GetMergeFactor() { lock (directory) { AssureOpen(); CreateIndexWriter(); return indexWriter.GetMergeFactor(); } } /// Close this index, writing all pending changes to disk. /// /// /// IllegalStateException if the index has been closed before already public virtual void Close() { lock (directory) { if (!open) throw new System.SystemException("Index is closed already"); if (indexWriter != null) { indexWriter.Close(); indexWriter = null; } else { indexReader.Close(); indexReader = null; } open = false; } } public override System.String ToString() { return "Index@" + directory; } /* // used as an example in the javadoc: public static void main(String[] args) throws IOException { Analyzer analyzer = new StandardAnalyzer(); // create an index in /tmp/index, overwriting an existing one: IndexModifier indexModifier = new IndexModifier("/tmp/index", analyzer, true); Document doc = new Document(); doc.add(new Fieldable("id", "1", Fieldable.Store.YES, Fieldable.Index.UN_TOKENIZED)); doc.add(new Fieldable("body", "a simple test", Fieldable.Store.YES, Fieldable.Index.TOKENIZED)); indexModifier.addDocument(doc); int deleted = indexModifier.delete(new Term("id", "1")); System.out.println("Deleted " + deleted + " document"); indexModifier.flush(); System.out.println(indexModifier.docCount() + " docs in index"); indexModifier.close(); }*/ } }