/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using Lucene.Net.Documents;
using Document = Lucene.Net.Documents.Document;
using FieldSelector = Lucene.Net.Documents.FieldSelector;
using Lucene.Net.Store;
using Similarity = Lucene.Net.Search.Similarity;
namespace Lucene.Net.Index
{
/// IndexReader is an abstract class, providing an interface for accessing an
/// index. Search of an index is done entirely through this abstract interface,
/// so that any subclass which implements it is searchable.
/// Concrete subclasses of IndexReader are usually constructed with a call to
/// one of the static open() methods, e.g.
///.
/// For efficiency, in this API documents are often referred to via
/// document numbers, non-negative integers which each name a unique
/// document in the index. These document numbers are ephemeral--they may change
/// as documents are added to and deleted from an index. Clients should thus not
/// rely on a given document having the same number between sessions.
/// An IndexReader can be opened on a directory for which an IndexWriter is
/// opened already, but it cannot be used to delete documents from the index then.
///
/// NOTE: for backwards API compatibility, several methods are not listed
/// as abstract, but have no useful implementations in this base class and
/// instead always throw UnsupportedOperationException. Subclasses are
/// strongly encouraged to override these methods, but in many cases may not
/// need to.
///
///
/// NOTE: as of 2.4, it's possible to open a read-only
/// IndexReader using the static open methods that accepts the
/// boolean readOnly parameter. Such a reader has better
/// better concurrency as it's not necessary to synchronize on the
/// isDeleted method. You must explicitly specify false
/// if you want to make changes with the resulting IndexReader.
///
/// NOTE:
/// instances are completely thread
/// safe, meaning multiple threads can call any of its methods,
/// concurrently. If your application requires external
/// synchronization, you should not synchronize on the
/// IndexReader instance; use your own
/// (non-Lucene) objects instead.
///
public abstract class IndexReader : System.ICloneable, System.IDisposable
{
private class AnonymousClassFindSegmentsFile : SegmentInfos.FindSegmentsFile
{
private void InitBlock(Lucene.Net.Store.Directory directory2)
{
this.directory2 = directory2;
}
private Lucene.Net.Store.Directory directory2;
internal AnonymousClassFindSegmentsFile(Lucene.Net.Store.Directory directory2, Lucene.Net.Store.Directory Param1):base(Param1)
{
InitBlock(directory2);
}
public override System.Object DoBody(System.String segmentFileName)
{
return (long) directory2.FileModified(segmentFileName);
}
}
/// Constants describing field properties, for example used for
/// .
///
public sealed class FieldOption
{
private readonly System.String option;
internal FieldOption()
{
}
internal FieldOption(System.String option)
{
this.option = option;
}
public override System.String ToString()
{
return this.option;
}
/// All fields
public static readonly FieldOption ALL = new FieldOption("ALL");
/// All indexed fields
public static readonly FieldOption INDEXED = new FieldOption("INDEXED");
/// All fields that store payloads
public static readonly FieldOption STORES_PAYLOADS = new FieldOption("STORES_PAYLOADS");
/// All fields that omit tf
public static readonly FieldOption OMIT_TERM_FREQ_AND_POSITIONS = new FieldOption("OMIT_TERM_FREQ_AND_POSITIONS");
/// All fields which are not indexed
public static readonly FieldOption UNINDEXED = new FieldOption("UNINDEXED");
/// All fields which are indexed with termvectors enabled
public static readonly FieldOption INDEXED_WITH_TERMVECTOR = new FieldOption("INDEXED_WITH_TERMVECTOR");
/// All fields which are indexed but don't have termvectors enabled
public static readonly FieldOption INDEXED_NO_TERMVECTOR = new FieldOption("INDEXED_NO_TERMVECTOR");
/// All fields with termvectors enabled. Please note that only standard termvector fields are returned
public static readonly FieldOption TERMVECTOR = new FieldOption("TERMVECTOR");
/// All fields with termvectors with position values enabled
public static readonly FieldOption TERMVECTOR_WITH_POSITION = new FieldOption("TERMVECTOR_WITH_POSITION");
/// All fields with termvectors with offset values enabled
public static readonly FieldOption TERMVECTOR_WITH_OFFSET = new FieldOption("TERMVECTOR_WITH_OFFSET");
/// All fields with termvectors with offset values and position values enabled
public static readonly FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption("TERMVECTOR_WITH_POSITION_OFFSET");
}
private bool closed;
protected internal bool hasChanges;
private int refCount;
protected internal static int DEFAULT_TERMS_INDEX_DIVISOR = 1;
/// Expert: returns the current refCount for this reader
public virtual int RefCount
{
get
{
lock (this)
{
return refCount;
}
}
}
/// Expert: increments the refCount of this IndexReader
/// instance. RefCounts are used to determine when a
/// reader can be closed safely, i.e. as soon as there are
/// no more references. Be sure to always call a
/// corresponding , in a finally clause;
/// otherwise the reader may never be closed. Note that
/// simply calls decRef(), which means that
/// the IndexReader will not really be closed until
/// has been called for all outstanding
/// references.
///
///
///
///
public virtual void IncRef()
{
lock (this)
{
System.Diagnostics.Debug.Assert(refCount > 0);
EnsureOpen();
refCount++;
}
}
/// Expert: decreases the refCount of this IndexReader
/// instance. If the refCount drops to 0, then pending
/// changes (if any) are committed to the index and this
/// reader is closed.
///
///
/// IOException in case an IOException occurs in commit() or doClose()
///
///
///
///
public virtual void DecRef()
{
lock (this)
{
System.Diagnostics.Debug.Assert(refCount > 0);
EnsureOpen();
if (refCount == 1)
{
Commit();
DoClose();
}
refCount--;
}
}
protected internal IndexReader()
{
refCount = 1;
}
/// AlreadyClosedException if this IndexReader is closed
protected internal void EnsureOpen()
{
if (refCount <= 0)
{
throw new AlreadyClosedException("this IndexReader is closed");
}
}
/// Returns an IndexReader reading the index in the given
/// Directory. You should pass readOnly=true, since it
/// gives much better concurrent performance, unless you
/// intend to do write operations (delete documents or
/// change norms) with the reader.
///
/// the index directory
/// true if no changes (deletions, norms) will be made with this IndexReader
/// CorruptIndexException if the index is corrupt
/// IOException if there is a low-level IO error
public static IndexReader Open(Directory directory, bool readOnly)
{
return Open(directory, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
}
/// Expert: returns an IndexReader reading the index in the given
/// . You should pass readOnly=true, since it
/// gives much better concurrent performance, unless you
/// intend to do write operations (delete documents or
/// change norms) with the reader.
///
/// the commit point to open
///
/// true if no changes (deletions, norms) will be made with this IndexReader
///
/// CorruptIndexException if the index is corrupt
/// If there is a low-level IO error
public static IndexReader Open(IndexCommit commit, bool readOnly)
{
return Open(commit.Directory, null, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
}
/// Expert: returns an IndexReader reading the index in
/// the given Directory, with a custom
///. You should pass readOnly=true,
/// since it gives much better concurrent performance,
/// unless you intend to do write operations (delete
/// documents or change norms) with the reader.
///
/// the index directory
///
/// a custom deletion policy (only used
/// if you use this reader to perform deletes or to set
/// norms); see for details.
///
/// true if no changes (deletions, norms) will be made with this IndexReader
///
/// CorruptIndexException if the index is corrupt
/// If there is a low-level IO error
public static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, bool readOnly)
{
return Open(directory, deletionPolicy, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
}
/// Expert: returns an IndexReader reading the index in
/// the given Directory, with a custom
///. You should pass readOnly=true,
/// since it gives much better concurrent performance,
/// unless you intend to do write operations (delete
/// documents or change norms) with the reader.
///
/// the index directory
///
/// a custom deletion policy (only used
/// if you use this reader to perform deletes or to set
/// norms); see for details.
///
/// true if no changes (deletions, norms) will be made with this IndexReader
///
/// Subsamples which indexed
/// terms are loaded into RAM. This has the same effect as
/// IndexWriter.SetTermIndexInterval
/// except that setting
/// must be done at indexing time while this setting can be
/// set per reader. When set to N, then one in every
/// N*termIndexInterval terms in the index is loaded into
/// memory. By setting this to a value > 1 you can reduce
/// memory usage, at the expense of higher latency when
/// loading a TermInfo. The default value is 1. Set this
/// to -1 to skip loading the terms index entirely.
///
/// CorruptIndexException if the index is corrupt
/// If there is a low-level IO error
public static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor)
{
return Open(directory, deletionPolicy, null, readOnly, termInfosIndexDivisor);
}
/// Expert: returns an IndexReader reading the index in
/// the given Directory, using a specific commit and with
/// a custom . You should pass
/// readOnly=true, since it gives much better concurrent
/// performance, unless you intend to do write operations
/// (delete documents or change norms) with the reader.
///
/// the specific to open;
/// see to list all commits
/// in a directory
///
/// a custom deletion policy (only used
/// if you use this reader to perform deletes or to set
/// norms); see for details.
///
/// true if no changes (deletions, norms) will be made with this IndexReader
///
/// CorruptIndexException if the index is corrupt
/// If there is a low-level IO error
public static IndexReader Open(IndexCommit commit, IndexDeletionPolicy deletionPolicy, bool readOnly)
{
return Open(commit.Directory, deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
}
/// Expert: returns an IndexReader reading the index in
/// the given Directory, using a specific commit and with
/// a custom . You should pass
/// readOnly=true, since it gives much better concurrent
/// performance, unless you intend to do write operations
/// (delete documents or change norms) with the reader.
///
/// the specific to open;
/// see to list all commits
/// in a directory
///
/// a custom deletion policy (only used
/// if you use this reader to perform deletes or to set
/// norms); see for details.
///
/// true if no changes (deletions, norms) will be made with this IndexReader
///
/// Subsambles which indexed
/// terms are loaded into RAM. This has the same effect as
/// IndexWriter.SetTermIndexInterval
/// except that setting
/// must be done at indexing time while this setting can be
/// set per reader. When set to N, then one in every
/// N*termIndexInterval terms in the index is loaded into
/// memory. By setting this to a value > 1 you can reduce
/// memory usage, at the expense of higher latency when
/// loading a TermInfo. The default value is 1. Set this
/// to -1 to skip loading the terms index entirely.
///
/// CorruptIndexException if the index is corrupt
/// If there is a low-level IO error
public static IndexReader Open(IndexCommit commit, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor)
{
return Open(commit.Directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor);
}
private static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, IndexCommit commit, bool readOnly, int termInfosIndexDivisor)
{
return DirectoryReader.Open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor);
}
/// Refreshes an IndexReader if the index has changed since this instance
/// was (re)opened.
///
/// Opening an IndexReader is an expensive operation. This method can be used
/// to refresh an existing IndexReader to reduce these costs. This method
/// tries to only load segments that have changed or were created after the
/// IndexReader was (re)opened.
///
/// If the index has not changed since this instance was (re)opened, then this
/// call is a NOOP and returns this instance. Otherwise, a new instance is
/// returned. The old instance is not closed and remains usable.
///
/// If the reader is reopened, even though they share
/// resources internally, it's safe to make changes
/// (deletions, norms) with the new reader. All shared
/// mutable state obeys "copy on write" semantics to ensure
/// the changes are not seen by other readers.
///
/// You can determine whether a reader was actually reopened by comparing the
/// old instance with the instance returned by this method:
///
/// IndexReader reader = ...
/// ...
/// IndexReader newReader = r.reopen();
/// if (newReader != reader) {
/// ... // reader was reopened
/// reader.close();
/// }
/// reader = newReader;
/// ...
///
///
/// Be sure to synchronize that code so that other threads,
/// if present, can never use reader after it has been
/// closed and before it's switched to newReader.
///
/// NOTE: If this reader is a near real-time
/// reader (obtained from ,
/// reopen() will simply call writer.getReader() again for
/// you, though this may change in the future.
///
///
/// CorruptIndexException if the index is corrupt
/// If there is a low-level IO error
public virtual IndexReader Reopen()
{
lock (this)
{
throw new NotSupportedException("This reader does not support reopen().");
}
}
/// Just like , except you can change the
/// readOnly of the original reader. If the index is
/// unchanged but readOnly is different then a new reader
/// will be returned.
///
public virtual IndexReader Reopen(bool openReadOnly)
{
lock (this)
{
throw new NotSupportedException("This reader does not support reopen().");
}
}
/// Expert: reopen this reader on a specific commit point.
/// This always returns a readOnly reader. If the
/// specified commit point matches what this reader is
/// already on, and this reader is already readOnly, then
/// this same instance is returned; if it is not already
/// readOnly, a readOnly clone is returned.
///
public virtual IndexReader Reopen(IndexCommit commit)
{
lock (this)
{
throw new NotSupportedException("This reader does not support reopen(IndexCommit).");
}
}
/// Efficiently clones the IndexReader (sharing most
/// internal state).
///
/// On cloning a reader with pending changes (deletions,
/// norms), the original reader transfers its write lock to
/// the cloned reader. This means only the cloned reader
/// may make further changes to the index, and commit the
/// changes to the index on close, but the old reader still
/// reflects all changes made up until it was cloned.
///
/// Like , it's safe to make changes to
/// either the original or the cloned reader: all shared
/// mutable state obeys "copy on write" semantics to ensure
/// the changes are not seen by other readers.
///
///
/// CorruptIndexException if the index is corrupt
/// If there is a low-level IO error
public virtual System.Object Clone()
{
throw new System.NotSupportedException("This reader does not implement clone()");
}
/// Clones the IndexReader and optionally changes readOnly. A readOnly
/// reader cannot open a writeable reader.
///
/// CorruptIndexException if the index is corrupt
/// If there is a low-level IO error
public virtual IndexReader Clone(bool openReadOnly)
{
lock (this)
{
throw new System.NotSupportedException("This reader does not implement clone()");
}
}
/// Returns the directory associated with this index. The Default
/// implementation returns the directory specified by subclasses when
/// delegating to the IndexReader(Directory) constructor, or throws an
/// UnsupportedOperationException if one was not specified.
///
/// UnsupportedOperationException if no directory
public virtual Directory Directory()
{
EnsureOpen();
throw new NotSupportedException("This reader does not support this method.");
}
/// Returns the time the index in the named directory was last modified.
/// Do not use this to check whether the reader is still up-to-date, use
/// instead.
///
/// CorruptIndexException if the index is corrupt
/// If there is a low-level IO error
public static long LastModified(Directory directory2)
{
return (long) ((System.Int64) new AnonymousClassFindSegmentsFile(directory2, directory2).Run());
}
/// Reads version number from segments files. The version number is
/// initialized with a timestamp and then increased by one for each change of
/// the index.
///
///
/// where the index resides.
///
/// version number.
///
/// CorruptIndexException if the index is corrupt
/// If there is a low-level IO error
public static long GetCurrentVersion(Directory directory)
{
return SegmentInfos.ReadCurrentVersion(directory);
}
/// Reads commitUserData, previously passed to
/// ,
/// from current index segments file. This will return null if
///
/// has never been called for this index.
///
/// where the index resides.
///
/// commit userData.
///
/// CorruptIndexException if the index is corrupt
/// If there is a low-level IO error
///
///
///
///
public static System.Collections.Generic.IDictionary GetCommitUserData(Directory directory)
{
return SegmentInfos.ReadCurrentUserData(directory);
}
/// Version number when this IndexReader was opened. Not implemented in the
/// IndexReader base class.
///
///
/// If this reader is based on a Directory (ie, was created by calling
/// , or
/// on a reader based on a Directory), then
/// this method returns the version recorded in the commit that the reader
/// opened. This version is advanced every time is
/// called.
///
///
///
/// If instead this reader is a near real-time reader (ie, obtained by a call
/// to , or by calling on a near
/// real-time reader), then this method returns the version of the last
/// commit done by the writer. Note that even as further changes are made
/// with the writer, the version will not changed until a commit is
/// completed. Thus, you should not rely on this method to determine when a
/// near real-time reader should be opened. Use instead.
///
///
///
/// UnsupportedOperationException
/// unless overridden in subclass
///
public virtual long Version
{
get { throw new System.NotSupportedException("This reader does not support this method."); }
}
/// Retrieve the String userData optionally passed to
/// .
/// This will return null if
///
/// has never been called for this index.
///
///
///
public virtual IDictionary CommitUserData
{
get { throw new System.NotSupportedException("This reader does not support this method."); }
}
/// Check whether any new changes have occurred to the index since this
/// reader was opened.
///
///
/// If this reader is based on a Directory (ie, was created by calling
///
/// Open(Store.Directory)
/// , or on a reader based on a Directory), then
/// this method checks if any further commits (see
/// have occurred in that directory).
///
///
///
/// If instead this reader is a near real-time reader (ie, obtained by a call
/// to , or by calling on a near
/// real-time reader), then this method checks if either a new commmit has
/// occurred, or any new uncommitted changes have taken place via the writer.
/// Note that even if the writer has only performed merging, this method will
/// still return false.
///
///
///
/// In any event, if this returns false, you should call to
/// get a new reader that sees the changes.
///
///
///
/// CorruptIndexException if the index is corrupt
/// If there is a low-level IO error
/// UnsupportedOperationException unless overridden in subclass
public virtual bool IsCurrent()
{
throw new NotSupportedException("This reader does not support this method.");
}
/// Checks is the index is optimized (if it has a single segment and
/// no deletions). Not implemented in the IndexReader base class.
///
/// <c>true</c> if the index is optimized; <c>false</c> otherwise
/// UnsupportedOperationException unless overridden in subclass
public virtual bool IsOptimized()
{
throw new NotSupportedException("This reader does not support this method.");
}
/// Return an array of term frequency vectors for the specified document.
/// The array contains a vector for each vectorized field in the document.
/// Each vector contains terms and frequencies for all terms in a given vectorized field.
/// If no such fields existed, the method returns null. The term vectors that are
/// returned may either be of type
/// or of type if
/// positions or offsets have been stored.
///
///
/// document for which term frequency vectors are returned
///
/// array of term frequency vectors. May be null if no term vectors have been
/// stored for the specified document.
///
/// IOException if index cannot be accessed
///
///
abstract public ITermFreqVector[] GetTermFreqVectors(int docNumber);
/// Return a term frequency vector for the specified document and field. The
/// returned vector contains terms and frequencies for the terms in
/// the specified field of this document, if the field had the storeTermVector
/// flag set. If termvectors had been stored with positions or offsets, a
/// is returned.
///
///
/// document for which the term frequency vector is returned
///
/// field for which the term frequency vector is returned.
///
/// term frequency vector May be null if field does not exist in the specified
/// document or term vector was not stored.
///
/// IOException if index cannot be accessed
///
///
abstract public ITermFreqVector GetTermFreqVector(int docNumber, String field);
/// Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of
/// the .
///
/// The number of the document to load the vector for
///
/// The name of the field to load
///
/// The to process the vector. Must not be null
///
/// IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified.
///
///
abstract public void GetTermFreqVector(int docNumber, String field, TermVectorMapper mapper);
/// Map all the term vectors for all fields in a Document
/// The number of the document to load the vector for
///
/// The to process the vector. Must not be null
///
/// IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified.
abstract public void GetTermFreqVector(int docNumber, TermVectorMapper mapper);
/// Returns true if an index exists at the specified directory.
/// If the directory does not exist or if there is no index in it.
///
/// the directory to check for an index
///
/// true if an index exists; false otherwise
///
/// IOException if there is a problem with accessing the index
public static bool IndexExists(Directory directory)
{
return SegmentInfos.GetCurrentSegmentGeneration(directory) != - 1;
}
/// Returns the number of documents in this index.
[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
public abstract int NumDocs();
/// Returns one greater than the largest possible document number.
/// This may be used to, e.g., determine how big to allocate an array which
/// will have an element for every document number in an index.
///
public abstract int MaxDoc { get; }
/// Returns the number of deleted documents.
public virtual int NumDeletedDocs
{
get { return MaxDoc - NumDocs(); }
}
/// Returns the stored fields of the nth
/// Document in this index.
///
/// NOTE: for performance reasons, this method does not check if the
/// requested document is deleted, and therefore asking for a deleted document
/// may yield unspecified results. Usually this is not required, however you
/// can call with the requested document ID to verify
/// the document is not deleted.
///
///
/// CorruptIndexException if the index is corrupt
/// If there is a low-level IO error
public virtual Document Document(int n)
{
EnsureOpen();
return Document(n, null);
}
/// Returns the stored fields of the nth
/// Document in this index.
///
/// NOTE: for performance reasons, this method does not check if the
/// requested document is deleted, and therefore asking for a deleted document
/// may yield unspecified results. Usually this is not required, however you
/// can call with the requested document ID to verify
/// the document is not deleted.
///
///
/// CorruptIndexException if the index is corrupt
/// If there is a low-level IO error
public Document this[int doc]
{
get { return Document(doc); }
}
/// Get the at the n
/// th position. The may be used to determine
/// what s to load and how they should
/// be loaded. NOTE: If this Reader (more specifically, the underlying
/// FieldsReader) is closed before the lazy
/// is loaded an exception may be
/// thrown. If you want the value of a lazy
/// to be available after closing you
/// must explicitly load it or fetch the Document again with a new loader.
///
/// NOTE: for performance reasons, this method does not check if the
/// requested document is deleted, and therefore asking for a deleted document
/// may yield unspecified results. Usually this is not required, however you
/// can call with the requested document ID to verify
/// the document is not deleted.
///
///
/// Get the document at the nth position
///
/// The to use to determine what
/// Fields should be loaded on the Document. May be null, in which case
/// all Fields will be loaded.
///
/// The stored fields of the
/// at the nth position
///
/// CorruptIndexException if the index is corrupt
/// If there is a low-level IO error
///
///
///
///
///
///
///
///
// TODO (1.5): When we convert to JDK 1.5 make this Set
public abstract Document Document(int n, FieldSelector fieldSelector);
/// Returns true if document n has been deleted
public abstract bool IsDeleted(int n);
/// Returns true if any documents have been deleted
public abstract bool HasDeletions { get; }
/// Returns true if there are norms stored for this field.
public virtual bool HasNorms(System.String field)
{
// backward compatible implementation.
// SegmentReader has an efficient implementation.
EnsureOpen();
return Norms(field) != null;
}
///
/// Returns the byte-encoded normalization factor for the named field of
/// every document. This is used by the search code to score documents.
///
///
public abstract byte[] Norms(System.String field);
///
/// Reads the byte-encoded normalization factor for the named field of every
/// document. This is used by the search code to score documents.
///
///
public abstract void Norms(System.String field, byte[] bytes, int offset);
/// Expert: Resets the normalization factor for the named field of the named
/// document. The norm represents the product of the field's boost
/// and its length normalization. Thus, to preserve the length normalization
/// values when resetting this, one should base the new value upon the old.
///
/// NOTE: If this field does not store norms, then
/// this method call will silently do nothing.
///
///
///
///
/// If the index has changed since this reader was opened
///
///
/// If the index is corrupt
///
///
/// If another writer has this index open (write.lock could not be obtained)
///
///
/// If there is a low-level IO error
///
public virtual void SetNorm(int doc, String field, byte value)
{
lock (this)
{
EnsureOpen();
AcquireWriteLock();
hasChanges = true;
DoSetNorm(doc, field, value);
}
}
/// Implements setNorm in subclass.
protected internal abstract void DoSetNorm(int doc, System.String field, byte value_Renamed);
///
/// Expert: Resets the normalization factor for the named field of the named document.
///
///
///
///
/// If the index has changed since this reader was opened
///
///
/// If the index is corrupt
///
///
/// If another writer has this index open (write.lock could not be obtained)
///
///
/// If there is a low-level IO error
///
public virtual void SetNorm(int doc, System.String field, float value)
{
EnsureOpen();
SetNorm(doc, field, Similarity.EncodeNorm(value));
}
/// Returns an enumeration of all the terms in the index. The
/// enumeration is ordered by Term.compareTo(). Each term is greater
/// than all that precede it in the enumeration. Note that after
/// calling terms(), must be called
/// on the resulting enumeration before calling other methods such as
/// .
///
///
/// If there is a low-level IO error
///
public abstract TermEnum Terms();
/// Returns an enumeration of all terms starting at a given term. If
/// the given term does not exist, the enumeration is positioned at the
/// first term greater than the supplied term. The enumeration is
/// ordered by Term.compareTo(). Each term is greater than all that
/// precede it in the enumeration.
///
///
/// If there is a low-level IO error
///
public abstract TermEnum Terms(Term t);
/// Returns the number of documents containing the term t.
/// If there is a low-level IO error
public abstract int DocFreq(Term t);
/// Returns an enumeration of all the documents which contain
/// term. For each document, the document number, the frequency of
/// the term in that document is also provided, for use in
/// search scoring. If term is null, then all non-deleted
/// docs are returned with freq=1.
/// Thus, this method implements the mapping:
///
/// Term => <docNum, freq>*
///
/// The enumeration is ordered by document number. Each document number
/// is greater than all that precede it in the enumeration.
///
/// If there is a low-level IO error
public virtual TermDocs TermDocs(Term term)
{
EnsureOpen();
TermDocs termDocs = TermDocs();
termDocs.Seek(term);
return termDocs;
}
/// Returns an unpositioned enumerator.
/// If there is a low-level IO error
public abstract TermDocs TermDocs();
/// Returns an enumeration of all the documents which contain
/// term. For each document, in addition to the document number
/// and frequency of the term in that document, a list of all of the ordinal
/// positions of the term in the document is available. Thus, this method
/// implements the mapping:
///
///
/// Term => <docNum, freq,
/// <pos1, pos2, ...
/// posfreq-1>
/// >*
///
/// This positional information facilitates phrase and proximity searching.
/// The enumeration is ordered by document number. Each document number is
/// greater than all that precede it in the enumeration.
///
/// If there is a low-level IO error
public virtual TermPositions TermPositions(Term term)
{
EnsureOpen();
TermPositions termPositions = TermPositions();
termPositions.Seek(term);
return termPositions;
}
/// Returns an unpositioned enumerator.
/// If there is a low-level IO error
public abstract TermPositions TermPositions();
///
/// Deletes the document numbered docNum. Once a document is
/// deleted it will not appear in TermDocs or TermPostitions enumerations.
/// Attempts to read its field with the
/// method will result in an error. The presence of this document may still be
/// reflected in the statistic, though
/// this will be corrected eventually as the index is further modified.
///
///
/// If the index has changed since this reader was opened
///
/// If the index is corrupt
///
/// If another writer has this index open (write.lock could not be obtained)
///
/// If there is a low-level IO error
public virtual void DeleteDocument(int docNum)
{
lock (this)
{
EnsureOpen();
AcquireWriteLock();
hasChanges = true;
DoDelete(docNum);
}
}
/// Implements deletion of the document numbered docNum.
/// Applications should call or .
///
protected internal abstract void DoDelete(int docNum);
///
/// Deletes all documents that have a given term indexed.
/// This is useful if one uses a document field to hold a unique ID string for
/// the document. Then to delete such a document, one merely constructs a
/// term with the appropriate field and the unique ID string as its text and
/// passes it to this method.
/// See for information about when this deletion will
/// become effective.
///
/// The number of documents deleted
///
/// If the index has changed since this reader was opened
///
/// If the index is corrupt
///
/// If another writer has this index open (write.lock could not be obtained)
///
/// If there is a low-level IO error
public virtual int DeleteDocuments(Term term)
{
EnsureOpen();
TermDocs docs = TermDocs(term);
if (docs == null)
return 0;
int n = 0;
try
{
while (docs.Next())
{
DeleteDocument(docs.Doc);
n++;
}
}
finally
{
docs.Close();
}
return n;
}
/// Undeletes all documents currently marked as deleted in this index.
///
///
///
/// If the index has changed since this reader was opened
///
/// If the index is corrupt
///
/// If another writer has this index open (write.lock could not be obtained)
///
/// If there is a low-level IO error
public virtual void UndeleteAll()
{
lock (this)
{
EnsureOpen();
AcquireWriteLock();
hasChanges = true;
DoUndeleteAll();
}
}
/// Implements actual undeleteAll() in subclass.
protected internal abstract void DoUndeleteAll();
///
/// Does nothing by default. Subclasses that require a write lock for
/// index modifications must implement this method.
///
protected internal virtual void AcquireWriteLock()
{
lock (this)
{
/* NOOP */
}
}
///
///
public void Flush()
{
lock (this)
{
EnsureOpen();
Commit();
}
}
/// Opaque Map (String -> String)
/// that's recorded into the segments file in the index,
/// and retrievable by
///
///
public void Flush(IDictionary commitUserData)
{
lock (this)
{
EnsureOpen();
Commit(commitUserData);
}
}
/// Commit changes resulting from delete, undeleteAll, or
/// setNorm operations
///
/// If an exception is hit, then either no changes or all
/// changes will have been committed to the index
/// (transactional semantics).
///
/// If there is a low-level IO error
public /*protected internal*/ void Commit()
{
lock (this)
{
Commit(null);
}
}
/// Commit changes resulting from delete, undeleteAll, or
/// setNorm operations
///
/// If an exception is hit, then either no changes or all
/// changes will have been committed to the index
/// (transactional semantics).
///
/// If there is a low-level IO error
public void Commit(IDictionary commitUserData)
{
lock (this)
{
if (hasChanges)
{
DoCommit(commitUserData);
}
hasChanges = false;
}
}
/// Implements commit.
protected internal abstract void DoCommit(IDictionary commitUserData);
[Obsolete("Use Dispose() instead")]
public void Close()
{
Dispose();
}
/// Closes files associated with this index.
/// Also saves any new deletions to disk.
/// No other methods should be called after this has been called.
///
/// If there is a low-level IO error
public void Dispose()
{
Dispose(true);
}
protected virtual void Dispose(bool disposing)
{
if (disposing)
{
lock (this)
{
if (!closed)
{
DecRef();
closed = true;
}
}
}
}
/// Implements close.
protected internal abstract void DoClose();
/// Get a list of unique field names that exist in this index and have the specified
/// field option information.
///
/// specifies which field option should be available for the returned fields
///
/// Collection of Strings indicating the names of the fields.
///
///
///
public abstract ICollection GetFieldNames(FieldOption fldOption);
/// Expert: return the IndexCommit that this reader has
/// opened. This method is only implemented by those
/// readers that correspond to a Directory with its own
/// segments_N file.
///
/// WARNING: this API is new and experimental and
/// may suddenly change.
///
public virtual IndexCommit IndexCommit
{
get { throw new NotSupportedException("This reader does not support this method."); }
}
/// Prints the filename and size of each file within a given compound file.
/// Add the -extract flag to extract files to the current working directory.
/// In order to make the extracted version of the index work, you have to copy
/// the segments file from the compound index into the directory where the extracted files are stored.
///
/// Usage: Lucene.Net.Index.IndexReader [-extract] <cfsfile>
///
[STAThread]
public static void Main(String[] args)
{
System.String filename = null;
bool extract = false;
foreach (string t in args)
{
if (t.Equals("-extract"))
{
extract = true;
}
else if (filename == null)
{
filename = t;
}
}
if (filename == null)
{
System.Console.Out.WriteLine("Usage: Lucene.Net.Index.IndexReader [-extract] ");
return ;
}
Directory dir = null;
CompoundFileReader cfr = null;
try
{
var file = new System.IO.FileInfo(filename);
System.String dirname = new System.IO.FileInfo(file.FullName).DirectoryName;
filename = file.Name;
dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirname));
cfr = new CompoundFileReader(dir, filename);
System.String[] files = cfr.ListAll();
System.Array.Sort(files); // sort the array of filename so that the output is more readable
foreach (string t in files)
{
long len = cfr.FileLength(t);
if (extract)
{
System.Console.Out.WriteLine("extract " + t + " with " + len + " bytes to local directory...");
IndexInput ii = cfr.OpenInput(t);
var f = new System.IO.FileStream(t, System.IO.FileMode.Create);
// read and write with a small buffer, which is more effectiv than reading byte by byte
var buffer = new byte[1024];
int chunk = buffer.Length;
while (len > 0)
{
var bufLen = (int) System.Math.Min(chunk, len);
ii.ReadBytes(buffer, 0, bufLen);
f.Write(buffer, 0, bufLen);
len -= bufLen;
}
f.Close();
ii.Close();
}
else
System.Console.Out.WriteLine(t + ": " + len + " bytes");
}
}
catch (System.IO.IOException ioe)
{
System.Console.Error.WriteLine(ioe.StackTrace);
}
finally
{
try
{
if (dir != null)
dir.Close();
if (cfr != null)
cfr.Close();
}
catch (System.IO.IOException ioe)
{
System.Console.Error.WriteLine(ioe.StackTrace);
}
}
}
/// Returns all commit points that exist in the Directory.
/// Normally, because the default is
///, there would be only
/// one commit point. But if you're using a custom
/// then there could be many commits.
/// Once you have a given commit, you can open a reader on
/// it by calling
/// There must be at least one commit in
/// the Directory, else this method throws .
/// Note that if a commit is in
/// progress while this method is running, that commit
/// may or may not be returned array.
///
public static System.Collections.Generic.ICollection ListCommits(Directory dir)
{
return DirectoryReader.ListCommits(dir);
}
/// Expert: returns the sequential sub readers that this
/// reader is logically composed of. For example,
/// IndexSearcher uses this API to drive searching by one
/// sub reader at a time. If this reader is not composed
/// of sequential child readers, it should return null.
/// If this method returns an empty array, that means this
/// reader is a null reader (for example a MultiReader
/// that has no sub readers).
///
/// NOTE: You should not try using sub-readers returned by
/// this method to make any changes (setNorm, deleteDocument,
/// etc.). While this might succeed for one composite reader
/// (like MultiReader), it will most likely lead to index
/// corruption for other readers (like DirectoryReader obtained
/// through . Use the parent reader directly.
///
public virtual IndexReader[] GetSequentialSubReaders()
{
return null;
}
/// Expert
public virtual object FieldCacheKey
{
get { return this; }
}
/* Expert. Warning: this returns null if the reader has
* no deletions
*/
public virtual object DeletesCacheKey
{
get { return this; }
}
/// Returns the number of unique terms (across all fields)
/// in this reader.
///
/// This method returns long, even though internally
/// Lucene cannot handle more than 2^31 unique terms, for
/// a possible future when this limitation is removed.
///
///
/// UnsupportedOperationException if this count
/// cannot be easily determined (eg Multi*Readers).
/// Instead, you should call
/// and ask each sub reader for
/// its unique term count.
///
public virtual long UniqueTermCount
{
get { throw new System.NotSupportedException("this reader does not implement getUniqueTermCount()"); }
}
///
/// For IndexReader implementations that use
/// TermInfosReader to read terms, this returns the
/// current indexDivisor as specified when the reader was
/// opened.
///
public virtual int TermInfosIndexDivisor
{
get { throw new NotSupportedException("This reader does not support this method."); }
}
}
}