/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
using System;
using System.IO;
using System.Collections;
using System.Collections.Generic;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Analysis;
namespace Lucene.Net.Analysis.De
{
///
/// Analyzer for German language. Supports an external list of stopwords (words that
/// will not be indexed at all) and an external list of exclusions (word that will
/// not be stemmed, but indexed).
/// A default set of stopwords is used unless an alternative list is specified, the
/// exclusion list is empty by default.
///
public class GermanAnalyzer : Analyzer
{
///
/// List of typical german stopwords.
///
private String[] GERMAN_STOP_WORDS =
{
"einer", "eine", "eines", "einem", "einen",
"der", "die", "das", "dass", "daß",
"du", "er", "sie", "es",
"was", "wer", "wie", "wir",
"und", "oder", "ohne", "mit",
"am", "im", "in", "aus", "auf",
"ist", "sein", "war", "wird",
"ihr", "ihre", "ihres",
"als", "für", "von",
"dich", "dir", "mich", "mir",
"mein", "kein",
"durch", "wegen"
};
///
/// Contains the stopwords used with the StopFilter.
///
private ICollection stoptable = new List();
///
/// Contains words that should be indexed but not stemmed.
///
private ICollection excltable = new List();
///
/// Builds an analyzer.
///
public GermanAnalyzer()
{
stoptable = StopFilter.MakeStopSet( GERMAN_STOP_WORDS );
}
///
/// Builds an analyzer with the given stop words.
///
///
public GermanAnalyzer( String[] stopwords )
{
stoptable = StopFilter.MakeStopSet( stopwords );
}
///
/// Builds an analyzer with the given stop words.
///
///
public GermanAnalyzer(ICollection stopwords)
{
stoptable = stopwords;
}
///
/// Builds an analyzer with the given stop words.
///
///
public GermanAnalyzer( FileInfo stopwords )
{
stoptable = WordlistLoader.GetWordtable( stopwords );
}
///
/// Builds an exclusionlist from an array of Strings.
///
///
public void SetStemExclusionTable( String[] exclusionlist )
{
excltable = StopFilter.MakeStopSet( exclusionlist );
}
///
/// Builds an exclusionlist from a Hashtable.
///
///
public void SetStemExclusionTable(ICollection exclusionlist)
{
excltable = exclusionlist;
}
///
/// Builds an exclusionlist from the words contained in the given file.
///
///
public void SetStemExclusionTable(FileInfo exclusionlist)
{
excltable = WordlistLoader.GetWordtable(exclusionlist);
}
///
/// Creates a TokenStream which tokenizes all the text in the provided TextReader.
///
///
///
/// A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter
public override TokenStream TokenStream(String fieldName, TextReader reader)
{
TokenStream result = new StandardTokenizer( reader );
result = new StandardFilter( result );
result = new LowerCaseFilter(result);
result = new StopFilter( result, stoptable );
result = new GermanStemFilter( result, excltable );
return result;
}
}
}