/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
using System;
using System.Collections.Generic;
using System.IO;
using System.Collections;
using Lucene.Net.Analysis.Tokenattributes;
namespace Lucene.Net.Analysis.De
{
///
/// A filter that stems German words. It supports a table of words that should
/// not be stemmed at all. The stemmer used can be changed at runtime after the
/// filter object is created (as long as it is a GermanStemmer).
///
public sealed class GermanStemFilter : TokenFilter
{
///
/// The actual token in the input stream.
///
private GermanStemmer stemmer = null;
private ISet exclusionSet = null;
private ITermAttribute termAtt;
public GermanStemFilter(TokenStream _in)
: this(_in, false)
{ }
public GermanStemFilter(TokenStream _in, bool useDin2Stemmer)
: this(_in, null, useDin2Stemmer)
{ }
///
/// Builds a GermanStemFilter that uses an exclusiontable.
///
///
///
public GermanStemFilter(TokenStream _in, ISet exclusiontable)
: this(_in, exclusiontable, false)
{ }
///
/// Builds a GermanStemFilter that uses an exclusiontable.
///
///
///
/// Specifies if the DIN-2007-2 style stemmer should be used in addition to DIN1. This
/// will cause words with 'ae', 'ue', or 'oe' in them (expanded umlauts) to be first converted to 'a', 'u', and 'o'
/// respectively, before the DIN1 stemmer is invoked.
public GermanStemFilter(TokenStream _in, ISet exclusiontable, bool normalizeDin2)
: base(_in)
{
exclusionSet = exclusiontable;
stemmer = normalizeDin2 ? new GermanDIN2Stemmer() : new GermanStemmer();
termAtt = AddAttribute();
}
///
/// Returns true for next token in the stream, or false at EOS
///
public override bool IncrementToken()
{
if (input.IncrementToken())
{
String term = termAtt.Term;
// Check the exclusion table.
if (exclusionSet == null || !exclusionSet.Contains(term))
{
String s = stemmer.Stem(term);
// If not stemmed, don't waste the time adjusting the token.
if ((s != null) && !s.Equals(term))
termAtt.SetTermBuffer(s);
}
return true;
}
else
{
return false;
}
}
///
/// Set a alternative/custom GermanStemmer for this filter.
///
///
public void SetStemmer(GermanStemmer stemmer)
{
if (stemmer != null)
{
this.stemmer = stemmer;
}
}
///
/// Set an alternative exclusion list for this filter.
///
///
public void SetExclusionTable(ISet exclusiontable)
{
exclusionSet = exclusiontable;
}
}
}