/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using AttributeSource = Lucene.Net.Util.AttributeSource; namespace Lucene.Net.Analysis { /// A Tokenizer is a TokenStream whose input is a Reader. ///

/// This is an abstract class; subclasses must override {@link #IncrementToken()} ///

/// NOTE: Subclasses overriding {@link #IncrementToken()} must call /// {@link AttributeSource#ClearAttributes()} before setting attributes. /// Subclasses overriding {@link #IncrementToken()} must call /// {@link Token#Clear()} before setting Token attributes. ///

public abstract class Tokenizer:TokenStream { /// The text source for this Tokenizer. protected internal System.IO.TextReader input; /// Construct a tokenizer with null input. protected internal Tokenizer() { } /// Construct a token stream processing the given input. protected internal Tokenizer(System.IO.TextReader input) { this.input = CharReader.Get(input); } /// Construct a tokenizer with null input using the given AttributeFactory. protected internal Tokenizer(AttributeFactory factory):base(factory) { } /// Construct a token stream processing the given input using the given AttributeFactory. protected internal Tokenizer(AttributeFactory factory, System.IO.TextReader input):base(factory) { this.input = CharReader.Get(input); } /// Construct a token stream processing the given input using the given AttributeSource. protected internal Tokenizer(AttributeSource source):base(source) { } /// Construct a token stream processing the given input using the given AttributeSource. protected internal Tokenizer(AttributeSource source, System.IO.TextReader input):base(source) { this.input = CharReader.Get(input); } /// By default, closes the input Reader. public override void Close() { input.Close(); } /// Return the corrected offset. If {@link #input} is a {@link CharStream} subclass /// this method calls {@link CharStream#CorrectOffset}, else returns currentOff. /// /// offset as seen in the output /// /// corrected offset based on the input /// /// /// protected internal int CorrectOffset(int currentOff) { return (input is CharStream)?((CharStream) input).CorrectOffset(currentOff):currentOff; } /// Expert: Reset the tokenizer to a new reader. Typically, an /// analyzer (in its reusableTokenStream method) will use /// this to re-use a previously created tokenizer. /// public virtual void Reset(System.IO.TextReader input) { this.input = input; } } }