/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using Fieldable = Lucene.Net.Documents.Fieldable; using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException; using CloseableThreadLocal = Lucene.Net.Util.CloseableThreadLocal; namespace Lucene.Net.Analysis { /// An Analyzer builds TokenStreams, which analyze text. It thus represents a /// policy for extracting index terms from text. ///

/// Typical implementations first build a Tokenizer, which breaks the stream of /// characters from the Reader into raw Tokens. One or more TokenFilters may /// then be applied to the output of the Tokenizer. ///

public abstract class Analyzer { /// Creates a TokenStream which tokenizes all the text in the provided /// Reader. Must be able to handle null field name for /// backward compatibility. /// public abstract TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader); /// Creates a TokenStream that is allowed to be re-used /// from the previous time that the same thread called /// this method. Callers that do not need to use more /// than one TokenStream at the same time from this /// analyzer should use this method for better /// performance. /// public virtual TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader) { return TokenStream(fieldName, reader); } private CloseableThreadLocal tokenStreams = new CloseableThreadLocal(); /// Used by Analyzers that implement reusableTokenStream /// to retrieve previously saved TokenStreams for re-use /// by the same thread. /// protected internal virtual System.Object GetPreviousTokenStream() { try { return tokenStreams.Get(); } catch (System.NullReferenceException npe) { if (tokenStreams == null) { throw new AlreadyClosedException("this Analyzer is closed"); } else { throw npe; } } } /// Used by Analyzers that implement reusableTokenStream /// to save a TokenStream for later re-use by the same /// thread. /// protected internal virtual void SetPreviousTokenStream(System.Object obj) { try { tokenStreams.Set(obj); } catch (System.NullReferenceException npe) { if (tokenStreams == null) { throw new AlreadyClosedException("this Analyzer is closed"); } else { throw npe; } } } protected internal bool overridesTokenStreamMethod; /// This is only present to preserve /// back-compat of classes that subclass a core analyzer /// and override tokenStream but not reusableTokenStream /// [Obsolete("This is only present to preserve back-compat of classes that subclass a core analyzer and override tokenStream but not reusableTokenStream ")] protected internal virtual void SetOverridesTokenStreamMethod(System.Type baseClass) { System.Type[] params_Renamed = new System.Type[2]; params_Renamed[0] = typeof(System.String); params_Renamed[1] = typeof(System.IO.TextReader); try { System.Reflection.MethodInfo m = this.GetType().GetMethod("TokenStream", (params_Renamed == null)?new System.Type[0]:(System.Type[]) params_Renamed); if (m != null) { overridesTokenStreamMethod = m.DeclaringType != baseClass; } else { overridesTokenStreamMethod = false; } } catch (System.MethodAccessException nsme) { overridesTokenStreamMethod = false; } } /// Invoked before indexing a Fieldable instance if /// terms have already been added to that field. This allows custom /// analyzers to place an automatic position increment gap between /// Fieldable instances using the same field name. The default value /// position increment gap is 0. With a 0 position increment gap and /// the typical default token position increment of 1, all terms in a field, /// including across Fieldable instances, are in successive positions, allowing /// exact PhraseQuery matches, for instance, across Fieldable instance boundaries. /// /// /// Fieldable name being indexed. /// /// position increment gap, added to the next token emitted from {@link #TokenStream(String,Reader)} /// public virtual int GetPositionIncrementGap(System.String fieldName) { return 0; } /// Just like {@link #getPositionIncrementGap}, except for /// Token offsets instead. By default this returns 1 for /// tokenized fields and, as if the fields were joined /// with an extra space character, and 0 for un-tokenized /// fields. This method is only called if the field /// produced at least one token for indexing. /// /// /// the field just indexed /// /// offset gap, added to the next token emitted from {@link #TokenStream(String,Reader)} /// public virtual int GetOffsetGap(Fieldable field) { if (field.IsTokenized()) return 1; else return 0; } /// Frees persistent resources used by this Analyzer public virtual void Close() { tokenStreams.Close(); tokenStreams = null; } } }