/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using Lucene.Net.Documents; using Document = Lucene.Net.Documents.Document; using FieldSelector = Lucene.Net.Documents.FieldSelector; using Lucene.Net.Store; using Similarity = Lucene.Net.Search.Similarity; namespace Lucene.Net.Index { /// IndexReader is an abstract class, providing an interface for accessing an /// index. Search of an index is done entirely through this abstract interface, /// so that any subclass which implements it is searchable. ///

Concrete subclasses of IndexReader are usually constructed with a call to /// one of the static open() methods, e.g. ///. ///

For efficiency, in this API documents are often referred to via /// document numbers, non-negative integers which each name a unique /// document in the index. These document numbers are ephemeral--they may change /// as documents are added to and deleted from an index. Clients should thus not /// rely on a given document having the same number between sessions. ///

An IndexReader can be opened on a directory for which an IndexWriter is /// opened already, but it cannot be used to delete documents from the index then. ///

/// NOTE: for backwards API compatibility, several methods are not listed /// as abstract, but have no useful implementations in this base class and /// instead always throw UnsupportedOperationException. Subclasses are /// strongly encouraged to override these methods, but in many cases may not /// need to. ///

///

/// NOTE: as of 2.4, it's possible to open a read-only /// IndexReader using the static open methods that accepts the /// boolean readOnly parameter. Such a reader has better /// better concurrency as it's not necessary to synchronize on the /// isDeleted method. You must explicitly specify false /// if you want to make changes with the resulting IndexReader. ///

///

NOTE: /// instances are completely thread /// safe, meaning multiple threads can call any of its methods, /// concurrently. If your application requires external /// synchronization, you should not synchronize on the /// IndexReader instance; use your own /// (non-Lucene) objects instead. ///

public abstract class IndexReader : System.ICloneable, System.IDisposable { private class AnonymousClassFindSegmentsFile : SegmentInfos.FindSegmentsFile { private void InitBlock(Lucene.Net.Store.Directory directory2) { this.directory2 = directory2; } private Lucene.Net.Store.Directory directory2; internal AnonymousClassFindSegmentsFile(Lucene.Net.Store.Directory directory2, Lucene.Net.Store.Directory Param1):base(Param1) { InitBlock(directory2); } public override System.Object DoBody(System.String segmentFileName) { return (long) directory2.FileModified(segmentFileName); } } /// Constants describing field properties, for example used for /// . /// public sealed class FieldOption { private readonly System.String option; internal FieldOption() { } internal FieldOption(System.String option) { this.option = option; } public override System.String ToString() { return this.option; } /// All fields public static readonly FieldOption ALL = new FieldOption("ALL"); /// All indexed fields public static readonly FieldOption INDEXED = new FieldOption("INDEXED"); /// All fields that store payloads public static readonly FieldOption STORES_PAYLOADS = new FieldOption("STORES_PAYLOADS"); /// All fields that omit tf public static readonly FieldOption OMIT_TERM_FREQ_AND_POSITIONS = new FieldOption("OMIT_TERM_FREQ_AND_POSITIONS"); /// All fields which are not indexed public static readonly FieldOption UNINDEXED = new FieldOption("UNINDEXED"); /// All fields which are indexed with termvectors enabled public static readonly FieldOption INDEXED_WITH_TERMVECTOR = new FieldOption("INDEXED_WITH_TERMVECTOR"); /// All fields which are indexed but don't have termvectors enabled public static readonly FieldOption INDEXED_NO_TERMVECTOR = new FieldOption("INDEXED_NO_TERMVECTOR"); /// All fields with termvectors enabled. Please note that only standard termvector fields are returned public static readonly FieldOption TERMVECTOR = new FieldOption("TERMVECTOR"); /// All fields with termvectors with position values enabled public static readonly FieldOption TERMVECTOR_WITH_POSITION = new FieldOption("TERMVECTOR_WITH_POSITION"); /// All fields with termvectors with offset values enabled public static readonly FieldOption TERMVECTOR_WITH_OFFSET = new FieldOption("TERMVECTOR_WITH_OFFSET"); /// All fields with termvectors with offset values and position values enabled public static readonly FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption("TERMVECTOR_WITH_POSITION_OFFSET"); } private bool closed; protected internal bool hasChanges; private int refCount; protected internal static int DEFAULT_TERMS_INDEX_DIVISOR = 1; /// Expert: returns the current refCount for this reader public virtual int RefCount { get { lock (this) { return refCount; } } } /// Expert: increments the refCount of this IndexReader /// instance. RefCounts are used to determine when a /// reader can be closed safely, i.e. as soon as there are /// no more references. Be sure to always call a /// corresponding , in a finally clause; /// otherwise the reader may never be closed. Note that /// simply calls decRef(), which means that /// the IndexReader will not really be closed until /// has been called for all outstanding /// references. /// /// /// /// public virtual void IncRef() { lock (this) { System.Diagnostics.Debug.Assert(refCount > 0); EnsureOpen(); refCount++; } } /// Expert: decreases the refCount of this IndexReader /// instance. If the refCount drops to 0, then pending /// changes (if any) are committed to the index and this /// reader is closed. /// /// /// IOException in case an IOException occurs in commit() or doClose() /// /// /// /// public virtual void DecRef() { lock (this) { System.Diagnostics.Debug.Assert(refCount > 0); EnsureOpen(); if (refCount == 1) { Commit(); DoClose(); } refCount--; } } protected internal IndexReader() { refCount = 1; } /// AlreadyClosedException if this IndexReader is closed protected internal void EnsureOpen() { if (refCount <= 0) { throw new AlreadyClosedException("this IndexReader is closed"); } } /// Returns an IndexReader reading the index in the given /// Directory. You should pass readOnly=true, since it /// gives much better concurrent performance, unless you /// intend to do write operations (delete documents or /// change norms) with the reader. /// /// the index directory /// true if no changes (deletions, norms) will be made with this IndexReader /// CorruptIndexException if the index is corrupt /// IOException if there is a low-level IO error public static IndexReader Open(Directory directory, bool readOnly) { return Open(directory, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } /// Expert: returns an IndexReader reading the index in the given /// . You should pass readOnly=true, since it /// gives much better concurrent performance, unless you /// intend to do write operations (delete documents or /// change norms) with the reader. /// /// the commit point to open /// /// true if no changes (deletions, norms) will be made with this IndexReader /// /// CorruptIndexException if the index is corrupt /// If there is a low-level IO error public static IndexReader Open(IndexCommit commit, bool readOnly) { return Open(commit.Directory, null, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } /// Expert: returns an IndexReader reading the index in /// the given Directory, with a custom ///. You should pass readOnly=true, /// since it gives much better concurrent performance, /// unless you intend to do write operations (delete /// documents or change norms) with the reader. /// /// the index directory /// /// a custom deletion policy (only used /// if you use this reader to perform deletes or to set /// norms); see for details. /// /// true if no changes (deletions, norms) will be made with this IndexReader /// /// CorruptIndexException if the index is corrupt /// If there is a low-level IO error public static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, bool readOnly) { return Open(directory, deletionPolicy, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } /// Expert: returns an IndexReader reading the index in /// the given Directory, with a custom ///. You should pass readOnly=true, /// since it gives much better concurrent performance, /// unless you intend to do write operations (delete /// documents or change norms) with the reader. /// /// the index directory /// /// a custom deletion policy (only used /// if you use this reader to perform deletes or to set /// norms); see for details. /// /// true if no changes (deletions, norms) will be made with this IndexReader /// /// Subsamples which indexed /// terms are loaded into RAM. This has the same effect as /// IndexWriter.SetTermIndexInterval /// except that setting /// must be done at indexing time while this setting can be /// set per reader. When set to N, then one in every /// N*termIndexInterval terms in the index is loaded into /// memory. By setting this to a value > 1 you can reduce /// memory usage, at the expense of higher latency when /// loading a TermInfo. The default value is 1. Set this /// to -1 to skip loading the terms index entirely. /// /// CorruptIndexException if the index is corrupt /// If there is a low-level IO error public static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor) { return Open(directory, deletionPolicy, null, readOnly, termInfosIndexDivisor); } /// Expert: returns an IndexReader reading the index in /// the given Directory, using a specific commit and with /// a custom . You should pass /// readOnly=true, since it gives much better concurrent /// performance, unless you intend to do write operations /// (delete documents or change norms) with the reader. /// /// the specific to open; /// see to list all commits /// in a directory /// /// a custom deletion policy (only used /// if you use this reader to perform deletes or to set /// norms); see for details. /// /// true if no changes (deletions, norms) will be made with this IndexReader /// /// CorruptIndexException if the index is corrupt /// If there is a low-level IO error public static IndexReader Open(IndexCommit commit, IndexDeletionPolicy deletionPolicy, bool readOnly) { return Open(commit.Directory, deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } /// Expert: returns an IndexReader reading the index in /// the given Directory, using a specific commit and with /// a custom . You should pass /// readOnly=true, since it gives much better concurrent /// performance, unless you intend to do write operations /// (delete documents or change norms) with the reader. /// /// the specific to open; /// see to list all commits /// in a directory /// /// a custom deletion policy (only used /// if you use this reader to perform deletes or to set /// norms); see for details. /// /// true if no changes (deletions, norms) will be made with this IndexReader /// /// Subsambles which indexed /// terms are loaded into RAM. This has the same effect as /// IndexWriter.SetTermIndexInterval /// except that setting /// must be done at indexing time while this setting can be /// set per reader. When set to N, then one in every /// N*termIndexInterval terms in the index is loaded into /// memory. By setting this to a value > 1 you can reduce /// memory usage, at the expense of higher latency when /// loading a TermInfo. The default value is 1. Set this /// to -1 to skip loading the terms index entirely. /// /// CorruptIndexException if the index is corrupt /// If there is a low-level IO error public static IndexReader Open(IndexCommit commit, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor) { return Open(commit.Directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor); } private static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, IndexCommit commit, bool readOnly, int termInfosIndexDivisor) { return DirectoryReader.Open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor); } /// Refreshes an IndexReader if the index has changed since this instance /// was (re)opened. ///

/// Opening an IndexReader is an expensive operation. This method can be used /// to refresh an existing IndexReader to reduce these costs. This method /// tries to only load segments that have changed or were created after the /// IndexReader was (re)opened. ///

/// If the index has not changed since this instance was (re)opened, then this /// call is a NOOP and returns this instance. Otherwise, a new instance is /// returned. The old instance is not closed and remains usable.
///

/// If the reader is reopened, even though they share /// resources internally, it's safe to make changes /// (deletions, norms) with the new reader. All shared /// mutable state obeys "copy on write" semantics to ensure /// the changes are not seen by other readers. ///

/// You can determine whether a reader was actually reopened by comparing the /// old instance with the instance returned by this method: /// /// IndexReader reader = ... /// ... /// IndexReader newReader = r.reopen(); /// if (newReader != reader) { /// ... // reader was reopened /// reader.close(); /// } /// reader = newReader; /// ... /// /// /// Be sure to synchronize that code so that other threads, /// if present, can never use reader after it has been /// closed and before it's switched to newReader. /// ///

NOTE: If this reader is a near real-time /// reader (obtained from , /// reopen() will simply call writer.getReader() again for /// you, though this may change in the future. /// ///

/// CorruptIndexException if the index is corrupt /// If there is a low-level IO error public virtual IndexReader Reopen() { lock (this) { throw new NotSupportedException("This reader does not support reopen()."); } } /// Just like , except you can change the /// readOnly of the original reader. If the index is /// unchanged but readOnly is different then a new reader /// will be returned. /// public virtual IndexReader Reopen(bool openReadOnly) { lock (this) { throw new NotSupportedException("This reader does not support reopen()."); } } /// Expert: reopen this reader on a specific commit point. /// This always returns a readOnly reader. If the /// specified commit point matches what this reader is /// already on, and this reader is already readOnly, then /// this same instance is returned; if it is not already /// readOnly, a readOnly clone is returned. /// public virtual IndexReader Reopen(IndexCommit commit) { lock (this) { throw new NotSupportedException("This reader does not support reopen(IndexCommit)."); } } /// Efficiently clones the IndexReader (sharing most /// internal state). ///

/// On cloning a reader with pending changes (deletions, /// norms), the original reader transfers its write lock to /// the cloned reader. This means only the cloned reader /// may make further changes to the index, and commit the /// changes to the index on close, but the old reader still /// reflects all changes made up until it was cloned. ///

/// Like , it's safe to make changes to /// either the original or the cloned reader: all shared /// mutable state obeys "copy on write" semantics to ensure /// the changes are not seen by other readers. ///

///

/// CorruptIndexException if the index is corrupt /// If there is a low-level IO error public virtual System.Object Clone() { throw new System.NotSupportedException("This reader does not implement clone()"); } /// Clones the IndexReader and optionally changes readOnly. A readOnly /// reader cannot open a writeable reader. /// /// CorruptIndexException if the index is corrupt /// If there is a low-level IO error public virtual IndexReader Clone(bool openReadOnly) { lock (this) { throw new System.NotSupportedException("This reader does not implement clone()"); } } /// Returns the directory associated with this index. The Default /// implementation returns the directory specified by subclasses when /// delegating to the IndexReader(Directory) constructor, or throws an /// UnsupportedOperationException if one was not specified. /// /// UnsupportedOperationException if no directory public virtual Directory Directory() { EnsureOpen(); throw new NotSupportedException("This reader does not support this method."); } /// Returns the time the index in the named directory was last modified. /// Do not use this to check whether the reader is still up-to-date, use /// instead. /// /// CorruptIndexException if the index is corrupt /// If there is a low-level IO error public static long LastModified(Directory directory2) { return (long) ((System.Int64) new AnonymousClassFindSegmentsFile(directory2, directory2).Run()); } /// Reads version number from segments files. The version number is /// initialized with a timestamp and then increased by one for each change of /// the index. /// /// /// where the index resides. /// /// version number. /// /// CorruptIndexException if the index is corrupt /// If there is a low-level IO error public static long GetCurrentVersion(Directory directory) { return SegmentInfos.ReadCurrentVersion(directory); } /// Reads commitUserData, previously passed to /// , /// from current index segments file. This will return null if /// /// has never been called for this index. /// /// where the index resides. /// /// commit userData. /// /// CorruptIndexException if the index is corrupt /// If there is a low-level IO error /// /// /// /// public static System.Collections.Generic.IDictionary GetCommitUserData(Directory directory) { return SegmentInfos.ReadCurrentUserData(directory); } /// Version number when this IndexReader was opened. Not implemented in the /// IndexReader base class. /// ///

/// If this reader is based on a Directory (ie, was created by calling /// , or /// on a reader based on a Directory), then /// this method returns the version recorded in the commit that the reader /// opened. This version is advanced every time is /// called. ///

/// ///

/// If instead this reader is a near real-time reader (ie, obtained by a call /// to , or by calling on a near /// real-time reader), then this method returns the version of the last /// commit done by the writer. Note that even as further changes are made /// with the writer, the version will not changed until a commit is /// completed. Thus, you should not rely on this method to determine when a /// near real-time reader should be opened. Use instead. ///

/// ///

/// UnsupportedOperationException /// unless overridden in subclass /// public virtual long Version { get { throw new System.NotSupportedException("This reader does not support this method."); } } /// Retrieve the String userData optionally passed to /// . /// This will return null if /// /// has never been called for this index. /// /// /// public virtual IDictionary CommitUserData { get { throw new System.NotSupportedException("This reader does not support this method."); } } /// Check whether any new changes have occurred to the index since this /// reader was opened. /// ///

/// If this reader is based on a Directory (ie, was created by calling /// /// Open(Store.Directory) /// , or on a reader based on a Directory), then /// this method checks if any further commits (see /// have occurred in that directory). ///

/// ///

/// If instead this reader is a near real-time reader (ie, obtained by a call /// to , or by calling on a near /// real-time reader), then this method checks if either a new commmit has /// occurred, or any new uncommitted changes have taken place via the writer. /// Note that even if the writer has only performed merging, this method will /// still return false. ///

/// ///

/// In any event, if this returns false, you should call to /// get a new reader that sees the changes. ///

/// ///

/// CorruptIndexException if the index is corrupt /// If there is a low-level IO error /// UnsupportedOperationException unless overridden in subclass public virtual bool IsCurrent() { throw new NotSupportedException("This reader does not support this method."); } /// Checks is the index is optimized (if it has a single segment and /// no deletions). Not implemented in the IndexReader base class. /// /// &lt;c&gt;true&lt;/c&gt; if the index is optimized; &lt;c&gt;false&lt;/c&gt; otherwise /// UnsupportedOperationException unless overridden in subclass public virtual bool IsOptimized() { throw new NotSupportedException("This reader does not support this method."); } /// Return an array of term frequency vectors for the specified document. /// The array contains a vector for each vectorized field in the document. /// Each vector contains terms and frequencies for all terms in a given vectorized field. /// If no such fields existed, the method returns null. The term vectors that are /// returned may either be of type /// or of type if /// positions or offsets have been stored. /// /// /// document for which term frequency vectors are returned /// /// array of term frequency vectors. May be null if no term vectors have been /// stored for the specified document. /// /// IOException if index cannot be accessed /// /// abstract public ITermFreqVector[] GetTermFreqVectors(int docNumber); /// Return a term frequency vector for the specified document and field. The /// returned vector contains terms and frequencies for the terms in /// the specified field of this document, if the field had the storeTermVector /// flag set. If termvectors had been stored with positions or offsets, a /// is returned. /// /// /// document for which the term frequency vector is returned /// /// field for which the term frequency vector is returned. /// /// term frequency vector May be null if field does not exist in the specified /// document or term vector was not stored. /// /// IOException if index cannot be accessed /// /// abstract public ITermFreqVector GetTermFreqVector(int docNumber, String field); /// Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of /// the . /// /// The number of the document to load the vector for /// /// The name of the field to load /// /// The to process the vector. Must not be null /// /// IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. /// /// abstract public void GetTermFreqVector(int docNumber, String field, TermVectorMapper mapper); /// Map all the term vectors for all fields in a Document /// The number of the document to load the vector for /// /// The to process the vector. Must not be null /// /// IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. abstract public void GetTermFreqVector(int docNumber, TermVectorMapper mapper); /// Returns true if an index exists at the specified directory. /// If the directory does not exist or if there is no index in it. /// /// the directory to check for an index /// /// true if an index exists; false otherwise /// /// IOException if there is a problem with accessing the index public static bool IndexExists(Directory directory) { return SegmentInfos.GetCurrentSegmentGeneration(directory) != - 1; } /// Returns the number of documents in this index. [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] public abstract int NumDocs(); /// Returns one greater than the largest possible document number. /// This may be used to, e.g., determine how big to allocate an array which /// will have an element for every document number in an index. /// public abstract int MaxDoc { get; } /// Returns the number of deleted documents. public virtual int NumDeletedDocs { get { return MaxDoc - NumDocs(); } } /// Returns the stored fields of the nth /// Document in this index. ///

/// NOTE: for performance reasons, this method does not check if the /// requested document is deleted, and therefore asking for a deleted document /// may yield unspecified results. Usually this is not required, however you /// can call with the requested document ID to verify /// the document is not deleted. /// ///

/// CorruptIndexException if the index is corrupt /// If there is a low-level IO error public virtual Document Document(int n) { EnsureOpen(); return Document(n, null); } /// Returns the stored fields of the nth /// Document in this index. ///

/// NOTE: for performance reasons, this method does not check if the /// requested document is deleted, and therefore asking for a deleted document /// may yield unspecified results. Usually this is not required, however you /// can call with the requested document ID to verify /// the document is not deleted. /// ///

/// CorruptIndexException if the index is corrupt /// If there is a low-level IO error public Document this[int doc] { get { return Document(doc); } } /// Get the at the n /// th position. The may be used to determine /// what s to load and how they should /// be loaded. NOTE: If this Reader (more specifically, the underlying /// FieldsReader) is closed before the lazy /// is loaded an exception may be /// thrown. If you want the value of a lazy /// to be available after closing you /// must explicitly load it or fetch the Document again with a new loader. ///

/// NOTE: for performance reasons, this method does not check if the /// requested document is deleted, and therefore asking for a deleted document /// may yield unspecified results. Usually this is not required, however you /// can call with the requested document ID to verify /// the document is not deleted. /// ///

/// Get the document at the nth position /// /// The to use to determine what /// Fields should be loaded on the Document. May be null, in which case /// all Fields will be loaded. /// /// The stored fields of the /// at the nth position /// /// CorruptIndexException if the index is corrupt /// If there is a low-level IO error /// /// /// /// /// /// /// /// // TODO (1.5): When we convert to JDK 1.5 make this Set public abstract Document Document(int n, FieldSelector fieldSelector); /// Returns true if document n has been deleted public abstract bool IsDeleted(int n); /// Returns true if any documents have been deleted public abstract bool HasDeletions { get; } /// Returns true if there are norms stored for this field. public virtual bool HasNorms(System.String field) { // backward compatible implementation. // SegmentReader has an efficient implementation. EnsureOpen(); return Norms(field) != null; } /// /// Returns the byte-encoded normalization factor for the named field of /// every document. This is used by the search code to score documents. /// /// public abstract byte[] Norms(System.String field); /// /// Reads the byte-encoded normalization factor for the named field of every /// document. This is used by the search code to score documents. /// /// public abstract void Norms(System.String field, byte[] bytes, int offset); /// Expert: Resets the normalization factor for the named field of the named /// document. The norm represents the product of the field's boost /// and its length normalization. Thus, to preserve the length normalization /// values when resetting this, one should base the new value upon the old. /// /// NOTE: If this field does not store norms, then /// this method call will silently do nothing. /// /// /// /// /// If the index has changed since this reader was opened /// /// /// If the index is corrupt /// /// /// If another writer has this index open (write.lock could not be obtained) /// /// /// If there is a low-level IO error /// public virtual void SetNorm(int doc, String field, byte value) { lock (this) { EnsureOpen(); AcquireWriteLock(); hasChanges = true; DoSetNorm(doc, field, value); } } /// Implements setNorm in subclass. protected internal abstract void DoSetNorm(int doc, System.String field, byte value_Renamed); /// /// Expert: Resets the normalization factor for the named field of the named document. /// /// /// /// /// If the index has changed since this reader was opened /// /// /// If the index is corrupt /// /// /// If another writer has this index open (write.lock could not be obtained) /// /// /// If there is a low-level IO error /// public virtual void SetNorm(int doc, System.String field, float value) { EnsureOpen(); SetNorm(doc, field, Similarity.EncodeNorm(value)); } /// Returns an enumeration of all the terms in the index. The /// enumeration is ordered by Term.compareTo(). Each term is greater /// than all that precede it in the enumeration. Note that after /// calling terms(), must be called /// on the resulting enumeration before calling other methods such as /// . /// /// /// If there is a low-level IO error /// public abstract TermEnum Terms(); /// Returns an enumeration of all terms starting at a given term. If /// the given term does not exist, the enumeration is positioned at the /// first term greater than the supplied term. The enumeration is /// ordered by Term.compareTo(). Each term is greater than all that /// precede it in the enumeration. /// /// /// If there is a low-level IO error /// public abstract TermEnum Terms(Term t); /// Returns the number of documents containing the term t. /// If there is a low-level IO error public abstract int DocFreq(Term t); /// Returns an enumeration of all the documents which contain /// term. For each document, the document number, the frequency of /// the term in that document is also provided, for use in /// search scoring. If term is null, then all non-deleted /// docs are returned with freq=1. /// Thus, this method implements the mapping: ///

/// Term    =>    <docNum, freq>* /// ///

The enumeration is ordered by document number. Each document number /// is greater than all that precede it in the enumeration. ///

/// If there is a low-level IO error public virtual TermDocs TermDocs(Term term) { EnsureOpen(); TermDocs termDocs = TermDocs(); termDocs.Seek(term); return termDocs; } /// Returns an unpositioned enumerator. /// If there is a low-level IO error public abstract TermDocs TermDocs(); /// Returns an enumeration of all the documents which contain /// term. For each document, in addition to the document number /// and frequency of the term in that document, a list of all of the ordinal /// positions of the term in the document is available. Thus, this method /// implements the mapping: /// ///

/// Term    =>    <docNum, freq, /// <pos1, pos2, ... /// posfreq-1> /// >* /// ///

This positional information facilitates phrase and proximity searching. ///

The enumeration is ordered by document number. Each document number is /// greater than all that precede it in the enumeration. ///

/// If there is a low-level IO error public virtual TermPositions TermPositions(Term term) { EnsureOpen(); TermPositions termPositions = TermPositions(); termPositions.Seek(term); return termPositions; } /// Returns an unpositioned enumerator. /// If there is a low-level IO error public abstract TermPositions TermPositions(); /// /// Deletes the document numbered docNum. Once a document is /// deleted it will not appear in TermDocs or TermPostitions enumerations. /// Attempts to read its field with the /// method will result in an error. The presence of this document may still be /// reflected in the statistic, though /// this will be corrected eventually as the index is further modified. /// /// /// If the index has changed since this reader was opened /// /// If the index is corrupt /// /// If another writer has this index open (write.lock could not be obtained) /// /// If there is a low-level IO error public virtual void DeleteDocument(int docNum) { lock (this) { EnsureOpen(); AcquireWriteLock(); hasChanges = true; DoDelete(docNum); } } /// Implements deletion of the document numbered docNum. /// Applications should call or . /// protected internal abstract void DoDelete(int docNum); /// /// Deletes all documents that have a given term indexed. /// This is useful if one uses a document field to hold a unique ID string for /// the document. Then to delete such a document, one merely constructs a /// term with the appropriate field and the unique ID string as its text and /// passes it to this method. /// See for information about when this deletion will /// become effective. /// /// The number of documents deleted /// /// If the index has changed since this reader was opened /// /// If the index is corrupt /// /// If another writer has this index open (write.lock could not be obtained) /// /// If there is a low-level IO error public virtual int DeleteDocuments(Term term) { EnsureOpen(); TermDocs docs = TermDocs(term); if (docs == null) return 0; int n = 0; try { while (docs.Next()) { DeleteDocument(docs.Doc); n++; } } finally { docs.Close(); } return n; } /// Undeletes all documents currently marked as deleted in this index. /// /// /// /// If the index has changed since this reader was opened /// /// If the index is corrupt /// /// If another writer has this index open (write.lock could not be obtained) /// /// If there is a low-level IO error public virtual void UndeleteAll() { lock (this) { EnsureOpen(); AcquireWriteLock(); hasChanges = true; DoUndeleteAll(); } } /// Implements actual undeleteAll() in subclass. protected internal abstract void DoUndeleteAll(); /// /// Does nothing by default. Subclasses that require a write lock for /// index modifications must implement this method. /// protected internal virtual void AcquireWriteLock() { lock (this) { /* NOOP */ } } /// /// public void Flush() { lock (this) { EnsureOpen(); Commit(); } } /// Opaque Map (String -> String) /// that's recorded into the segments file in the index, /// and retrievable by /// /// public void Flush(IDictionary commitUserData) { lock (this) { EnsureOpen(); Commit(commitUserData); } } /// Commit changes resulting from delete, undeleteAll, or /// setNorm operations /// /// If an exception is hit, then either no changes or all /// changes will have been committed to the index /// (transactional semantics). /// /// If there is a low-level IO error public /*protected internal*/ void Commit() { lock (this) { Commit(null); } } /// Commit changes resulting from delete, undeleteAll, or /// setNorm operations /// /// If an exception is hit, then either no changes or all /// changes will have been committed to the index /// (transactional semantics). /// /// If there is a low-level IO error public void Commit(IDictionary commitUserData) { lock (this) { if (hasChanges) { DoCommit(commitUserData); } hasChanges = false; } } /// Implements commit. protected internal abstract void DoCommit(IDictionary commitUserData); [Obsolete("Use Dispose() instead")] public void Close() { Dispose(); } /// Closes files associated with this index. /// Also saves any new deletions to disk. /// No other methods should be called after this has been called. /// /// If there is a low-level IO error public void Dispose() { Dispose(true); } protected virtual void Dispose(bool disposing) { if (disposing) { lock (this) { if (!closed) { DecRef(); closed = true; } } } } /// Implements close. protected internal abstract void DoClose(); /// Get a list of unique field names that exist in this index and have the specified /// field option information. /// /// specifies which field option should be available for the returned fields /// /// Collection of Strings indicating the names of the fields. /// /// /// public abstract ICollection GetFieldNames(FieldOption fldOption); /// Expert: return the IndexCommit that this reader has /// opened. This method is only implemented by those /// readers that correspond to a Directory with its own /// segments_N file. /// ///

WARNING: this API is new and experimental and /// may suddenly change.

///

public virtual IndexCommit IndexCommit { get { throw new NotSupportedException("This reader does not support this method."); } } /// Prints the filename and size of each file within a given compound file. /// Add the -extract flag to extract files to the current working directory. /// In order to make the extracted version of the index work, you have to copy /// the segments file from the compound index into the directory where the extracted files are stored. /// /// Usage: Lucene.Net.Index.IndexReader [-extract] <cfsfile> /// [STAThread] public static void Main(String[] args) { System.String filename = null; bool extract = false; foreach (string t in args) { if (t.Equals("-extract")) { extract = true; } else if (filename == null) { filename = t; } } if (filename == null) { System.Console.Out.WriteLine("Usage: Lucene.Net.Index.IndexReader [-extract] "); return ; } Directory dir = null; CompoundFileReader cfr = null; try { var file = new System.IO.FileInfo(filename); System.String dirname = new System.IO.FileInfo(file.FullName).DirectoryName; filename = file.Name; dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirname)); cfr = new CompoundFileReader(dir, filename); System.String[] files = cfr.ListAll(); System.Array.Sort(files); // sort the array of filename so that the output is more readable foreach (string t in files) { long len = cfr.FileLength(t); if (extract) { System.Console.Out.WriteLine("extract " + t + " with " + len + " bytes to local directory..."); IndexInput ii = cfr.OpenInput(t); var f = new System.IO.FileStream(t, System.IO.FileMode.Create); // read and write with a small buffer, which is more effectiv than reading byte by byte var buffer = new byte[1024]; int chunk = buffer.Length; while (len > 0) { var bufLen = (int) System.Math.Min(chunk, len); ii.ReadBytes(buffer, 0, bufLen); f.Write(buffer, 0, bufLen); len -= bufLen; } f.Close(); ii.Close(); } else System.Console.Out.WriteLine(t + ": " + len + " bytes"); } } catch (System.IO.IOException ioe) { System.Console.Error.WriteLine(ioe.StackTrace); } finally { try { if (dir != null) dir.Close(); if (cfr != null) cfr.Close(); } catch (System.IO.IOException ioe) { System.Console.Error.WriteLine(ioe.StackTrace); } } } /// Returns all commit points that exist in the Directory. /// Normally, because the default is ///, there would be only /// one commit point. But if you're using a custom /// then there could be many commits. /// Once you have a given commit, you can open a reader on /// it by calling /// There must be at least one commit in /// the Directory, else this method throws . /// Note that if a commit is in /// progress while this method is running, that commit /// may or may not be returned array. /// public static System.Collections.Generic.ICollection ListCommits(Directory dir) { return DirectoryReader.ListCommits(dir); } /// Expert: returns the sequential sub readers that this /// reader is logically composed of. For example, /// IndexSearcher uses this API to drive searching by one /// sub reader at a time. If this reader is not composed /// of sequential child readers, it should return null. /// If this method returns an empty array, that means this /// reader is a null reader (for example a MultiReader /// that has no sub readers). ///

/// NOTE: You should not try using sub-readers returned by /// this method to make any changes (setNorm, deleteDocument, /// etc.). While this might succeed for one composite reader /// (like MultiReader), it will most likely lead to index /// corruption for other readers (like DirectoryReader obtained /// through . Use the parent reader directly. ///

public virtual IndexReader[] GetSequentialSubReaders() { return null; } /// Expert public virtual object FieldCacheKey { get { return this; } } /* Expert. Warning: this returns null if the reader has * no deletions */ public virtual object DeletesCacheKey { get { return this; } } /// Returns the number of unique terms (across all fields) /// in this reader. /// /// This method returns long, even though internally /// Lucene cannot handle more than 2^31 unique terms, for /// a possible future when this limitation is removed. /// /// /// UnsupportedOperationException if this count /// cannot be easily determined (eg Multi*Readers). /// Instead, you should call /// and ask each sub reader for /// its unique term count. /// public virtual long UniqueTermCount { get { throw new System.NotSupportedException("this reader does not implement getUniqueTermCount()"); } } /// /// For IndexReader implementations that use /// TermInfosReader to read terms, this returns the /// current indexDivisor as specified when the reader was /// opened. /// public virtual int TermInfosIndexDivisor { get { throw new NotSupportedException("This reader does not support this method."); } } } }