using System; using System.IO; using System.Collections; namespace Lucene.Net.Analysis.De { /// /// A filter that stems German words. It supports a table of words that should /// not be stemmed at all. The stemmer used can be changed at runtime after the /// filter object is created (as long as it is a GermanStemmer). /// public sealed class GermanStemFilter : TokenFilter { /// /// The actual token in the input stream. /// private Token token = null; private GermanStemmer stemmer = null; private Hashtable exclusions = null; public GermanStemFilter( TokenStream _in ) : base(_in) { stemmer = new GermanStemmer(); } /// /// Builds a GermanStemFilter that uses an exclusiontable. /// /// /// public GermanStemFilter( TokenStream _in, Hashtable exclusiontable ): this(_in) { exclusions = exclusiontable; } /// /// /// Returns the next token in the stream, or null at EOS public override Token Next() { if ( ( token = input.Next() ) == null ) { return null; } // Check the exclusiontable else if ( exclusions != null && exclusions.Contains( token.TermText() ) ) { return token; } else { String s = stemmer.Stem( token.TermText() ); // If not stemmed, dont waste the time creating a new token if ( !s.Equals( token.TermText() ) ) { return new Token( s, token.StartOffset(), token.EndOffset(), token.Type() ); } return token; } } /// /// Set a alternative/custom GermanStemmer for this filter. /// /// public void SetStemmer( GermanStemmer stemmer ) { if ( stemmer != null ) { this.stemmer = stemmer; } } /// /// Set an alternative exclusion list for this filter. /// /// public void SetExclusionTable( Hashtable exclusiontable ) { exclusions = exclusiontable; } } }