/* * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; namespace Lucene.Net.Analysis { /// An Analyzer builds TokenStreams, which analyze text. It thus represents a /// policy for extracting index terms from text. ///

/// Typical implementations first build a Tokenizer, which breaks the stream of /// characters from the Reader into raw Tokens. One or more TokenFilters may /// then be applied to the output of the Tokenizer. ///

/// WARNING: You must override one of the methods defined by this class in your /// subclass or the Analyzer will enter an infinite loop. ///

public abstract class Analyzer { /// Creates a TokenStream which tokenizes all the text in the provided /// Reader. Default implementation forwards to tokenStream(Reader) for /// compatibility with older version. Override to allow Analyzer to choose /// strategy based on document and/or field. Must be able to handle null /// field name for backward compatibility. /// public virtual TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader) { // implemented for backward compatibility return TokenStream(reader); } /// Creates a TokenStream which tokenizes all the text in the provided /// Reader. Provided for backward compatibility only. /// /// use tokenStream(String, Reader) instead. /// /// /// public virtual TokenStream TokenStream(System.IO.TextReader reader) { return TokenStream(null, reader); } /// Invoked before indexing a Field instance if /// terms have already been added to that field. This allows custom /// analyzers to place an automatic position increment gap between /// Field instances using the same field name. The default value /// position increment gap is 0. With a 0 position increment gap and /// the typical default token position increment of 1, all terms in a field, /// including across Field instances, are in successive positions, allowing /// exact PhraseQuery matches, for instance, across Field instance boundaries. /// /// /// Field name being indexed. /// /// position increment gap, added to the next token emitted from {@link #TokenStream(String,Reader)} /// public virtual int GetPositionIncrementGap(System.String fieldName) { return 0; } } }