/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using Lucene.Net.Util;
using Document = Lucene.Net.Documents.Document;
using Field = Lucene.Net.Documents.Field;
using IndexWriter = Lucene.Net.Index.IndexWriter;
using AttributeSource = Lucene.Net.Util.AttributeSource;
namespace Lucene.Net.Analysis
{
/// A TokenStream enumerates the sequence of tokens, either from
/// s of a or from query text.
///
/// This is an abstract class. Concrete subclasses are:
///
/// - , a TokenStream whose input is a Reader; and
/// - , a TokenStream whose input is another
/// TokenStream.
///
/// A new TokenStream API has been introduced with Lucene 2.9. This API
/// has moved from being based to based. While
/// still exists in 2.9 as a convenience class, the preferred way
/// to store the information of a is to use s.
///
/// TokenStream now extends , which provides
/// access to all of the token s for the TokenStream.
/// Note that only one instance per is created and reused
/// for every token. This approach reduces object creation and allows local
/// caching of references to the s. See
/// for further details.
///
/// The workflow of the new TokenStream API is as follows:
///
/// - Instantiation of TokenStream/s which add/get
/// attributes to/from the .
/// - The consumer calls .
/// - The consumer retrieves attributes from the stream and stores local
/// references to all attributes it wants to access
/// - The consumer calls until it returns false and
/// consumes the attributes after each call.
/// - The consumer calls so that any end-of-stream operations
/// can be performed.
/// - The consumer calls to release any resource when finished
/// using the TokenStream
///
/// To make sure that filters and consumers know which attributes are available,
/// the attributes must be added during instantiation. Filters and consumers are
/// not required to check for availability of attributes in
/// .
///
/// You can find some example code for the new API in the analysis package level
/// Javadoc.
///
/// Sometimes it is desirable to capture a current state of a TokenStream
/// , e. g. for buffering purposes (see ,
/// ). For this usecase
/// and
/// can be used.
///
public abstract class TokenStream : AttributeSource, IDisposable
{
/// A TokenStream using the default attribute factory.
protected internal TokenStream()
{ }
/// A TokenStream that uses the same attributes as the supplied one.
protected internal TokenStream(AttributeSource input)
: base(input)
{ }
/// A TokenStream using the supplied AttributeFactory for creating new instances.
protected internal TokenStream(AttributeFactory factory)
: base(factory)
{ }
/// Consumers (i.e., ) use this method to advance the stream to
/// the next token. Implementing classes must implement this method and update
/// the appropriate s with the attributes of the next
/// token.
///
/// The producer must make no assumptions about the attributes after the
/// method has been returned: the caller may arbitrarily change it. If the
/// producer needs to preserve the state for subsequent calls, it can use
/// to create a copy of the current attribute state.
///
/// This method is called for every token of a document, so an efficient
/// implementation is crucial for good performance. To avoid calls to
/// and ,
/// references to all s that this stream uses should be
/// retrieved during instantiation.
///
/// To ensure that filters and consumers know which attributes are available,
/// the attributes must be added during instantiation. Filters and consumers
/// are not required to check for availability of attributes in
/// .
///
///
/// false for end of stream; true otherwise
public abstract bool IncrementToken();
/// This method is called by the consumer after the last token has been
/// consumed, after returned false
/// (using the new TokenStream API). Streams implementing the old API
/// should upgrade to use this feature.
///
/// This method can be used to perform any end-of-stream operations, such as
/// setting the final offset of a stream. The final offset of a stream might
/// differ from the offset of the last token eg in case one or more whitespaces
/// followed after the last token, but a was used.
///
///
/// IOException
public virtual void End()
{
// do nothing by default
}
/// Resets this stream to the beginning. This is an optional operation, so
/// subclasses may or may not implement this method. is not needed for
/// the standard indexing process. However, if the tokens of a
/// TokenStream are intended to be consumed more than once, it is
/// necessary to implement . Note that if your TokenStream
/// caches tokens and feeds them back again after a reset, it is imperative
/// that you clone the tokens when you store them away (on the first pass) as
/// well as when you return them (on future passes after ).
///
public virtual void Reset()
{
}
/// Releases resources associated with this stream.
[Obsolete("Use Dispose() instead")]
public void Close()
{
Dispose();
}
public void Dispose()
{
Dispose(true);
}
protected abstract void Dispose(bool disposing);
}
}