/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using AttributeSource = Lucene.Net.Util.AttributeSource; namespace Lucene.Net.Analysis { /// A Tokenizer is a TokenStream whose input is a Reader. ///

/// This is an abstract class; subclasses must override ///

/// NOTE: Subclasses overriding must call /// before setting attributes. ///

public abstract class Tokenizer:TokenStream { /// The text source for this Tokenizer. protected internal System.IO.TextReader input; private bool isDisposed; /// Construct a tokenizer with null input. protected internal Tokenizer() { } /// Construct a token stream processing the given input. protected internal Tokenizer(System.IO.TextReader input) { this.input = CharReader.Get(input); } /// Construct a tokenizer with null input using the given AttributeFactory. protected internal Tokenizer(AttributeFactory factory):base(factory) { } /// Construct a token stream processing the given input using the given AttributeFactory. protected internal Tokenizer(AttributeFactory factory, System.IO.TextReader input):base(factory) { this.input = CharReader.Get(input); } /// Construct a token stream processing the given input using the given AttributeSource. protected internal Tokenizer(AttributeSource source):base(source) { } /// Construct a token stream processing the given input using the given AttributeSource. protected internal Tokenizer(AttributeSource source, System.IO.TextReader input):base(source) { this.input = CharReader.Get(input); } protected override void Dispose(bool disposing) { if (isDisposed) return; if (disposing) { if (input != null) { input.Close(); } } // LUCENE-2387: don't hold onto Reader after close, so // GC can reclaim input = null; isDisposed = true; } /// Return the corrected offset. If is a subclass /// this method calls , else returns currentOff. /// /// offset as seen in the output /// /// corrected offset based on the input /// /// /// protected internal int CorrectOffset(int currentOff) { return (input is CharStream)?((CharStream) input).CorrectOffset(currentOff):currentOff; } /// Expert: Reset the tokenizer to a new reader. Typically, an /// analyzer (in its reusableTokenStream method) will use /// this to re-use a previously created tokenizer. /// public virtual void Reset(System.IO.TextReader input) { this.input = input; } } }