/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using Lucene.Net.Util; using Document = Lucene.Net.Documents.Document; using Field = Lucene.Net.Documents.Field; using IndexWriter = Lucene.Net.Index.IndexWriter; using AttributeSource = Lucene.Net.Util.AttributeSource; namespace Lucene.Net.Analysis { /// A TokenStream enumerates the sequence of tokens, either from /// s of a or from query text. ///

/// This is an abstract class. Concrete subclasses are: /// /// , a TokenStream whose input is a Reader; and /// , a TokenStream whose input is another /// TokenStream. /// /// A new TokenStream API has been introduced with Lucene 2.9. This API /// has moved from being based to based. While /// still exists in 2.9 as a convenience class, the preferred way /// to store the information of a is to use s. ///

/// TokenStream now extends , which provides /// access to all of the token s for the TokenStream. /// Note that only one instance per is created and reused /// for every token. This approach reduces object creation and allows local /// caching of references to the s. See /// for further details. ///

/// The workflow of the new TokenStream API is as follows: /// /// Instantiation of TokenStream/s which add/get /// attributes to/from the . /// The consumer calls . /// The consumer retrieves attributes from the stream and stores local /// references to all attributes it wants to access /// The consumer calls until it returns false and /// consumes the attributes after each call. /// The consumer calls so that any end-of-stream operations /// can be performed. /// The consumer calls to release any resource when finished /// using the TokenStream /// /// To make sure that filters and consumers know which attributes are available, /// the attributes must be added during instantiation. Filters and consumers are /// not required to check for availability of attributes in /// . ///

/// You can find some example code for the new API in the analysis package level /// Javadoc. ///

/// Sometimes it is desirable to capture a current state of a TokenStream /// , e. g. for buffering purposes (see , /// ). For this usecase /// and /// can be used. ///

public abstract class TokenStream : AttributeSource, IDisposable { /// A TokenStream using the default attribute factory. protected internal TokenStream() { } /// A TokenStream that uses the same attributes as the supplied one. protected internal TokenStream(AttributeSource input) : base(input) { } /// A TokenStream using the supplied AttributeFactory for creating new instances. protected internal TokenStream(AttributeFactory factory) : base(factory) { } /// Consumers (i.e., ) use this method to advance the stream to /// the next token. Implementing classes must implement this method and update /// the appropriate s with the attributes of the next /// token. /// /// The producer must make no assumptions about the attributes after the /// method has been returned: the caller may arbitrarily change it. If the /// producer needs to preserve the state for subsequent calls, it can use /// to create a copy of the current attribute state. /// /// This method is called for every token of a document, so an efficient /// implementation is crucial for good performance. To avoid calls to /// and , /// references to all s that this stream uses should be /// retrieved during instantiation. /// /// To ensure that filters and consumers know which attributes are available, /// the attributes must be added during instantiation. Filters and consumers /// are not required to check for availability of attributes in /// . /// /// /// false for end of stream; true otherwise public abstract bool IncrementToken(); /// This method is called by the consumer after the last token has been /// consumed, after returned false /// (using the new TokenStream API). Streams implementing the old API /// should upgrade to use this feature. ///

/// This method can be used to perform any end-of-stream operations, such as /// setting the final offset of a stream. The final offset of a stream might /// differ from the offset of the last token eg in case one or more whitespaces /// followed after the last token, but a was used. /// ///

/// IOException public virtual void End() { // do nothing by default } /// Resets this stream to the beginning. This is an optional operation, so /// subclasses may or may not implement this method. is not needed for /// the standard indexing process. However, if the tokens of a /// TokenStream are intended to be consumed more than once, it is /// necessary to implement . Note that if your TokenStream /// caches tokens and feeds them back again after a reset, it is imperative /// that you clone the tokens when you store them away (on the first pass) as /// well as when you return them (on future passes after ). /// public virtual void Reset() { } /// Releases resources associated with this stream. [Obsolete("Use Dispose() instead")] public void Close() { Dispose(); } public void Dispose() { Dispose(true); } protected abstract void Dispose(bool disposing); } }