using System;
using System.IO;
using System.Collections;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Analysis;
namespace Lucene.Net.Analysis.De
{
///
/// Analyzer for German language. Supports an external list of stopwords (words that
/// will not be indexed at all) and an external list of exclusions (word that will
/// not be stemmed, but indexed).
/// A default set of stopwords is used unless an alternative list is specified, the
/// exclusion list is empty by default.
///
public class GermanAnalyzer : Analyzer
{
///
/// List of typical german stopwords.
///
private String[] GERMAN_STOP_WORDS =
{
"einer", "eine", "eines", "einem", "einen",
"der", "die", "das", "dass", "daß",
"du", "er", "sie", "es",
"was", "wer", "wie", "wir",
"und", "oder", "ohne", "mit",
"am", "im", "in", "aus", "auf",
"ist", "sein", "war", "wird",
"ihr", "ihre", "ihres",
"als", "für", "von",
"dich", "dir", "mich", "mir",
"mein", "kein",
"durch", "wegen"
};
///
/// Contains the stopwords used with the StopFilter.
///
private Hashtable stoptable = new Hashtable();
///
/// Contains words that should be indexed but not stemmed.
///
private Hashtable excltable = new Hashtable();
///
/// Builds an analyzer.
///
public GermanAnalyzer()
{
stoptable = StopFilter.MakeStopSet( GERMAN_STOP_WORDS );
}
///
/// Builds an analyzer with the given stop words.
///
///
public GermanAnalyzer( String[] stopwords )
{
stoptable = StopFilter.MakeStopSet( stopwords );
}
///
/// Builds an analyzer with the given stop words.
///
///
public GermanAnalyzer( Hashtable stopwords )
{
stoptable = stopwords;
}
///
/// Builds an analyzer with the given stop words.
///
///
public GermanAnalyzer( FileInfo stopwords )
{
stoptable = WordlistLoader.GetWordtable( stopwords );
}
///
/// Builds an exclusionlist from an array of Strings.
///
///
public void SetStemExclusionTable( String[] exclusionlist )
{
excltable = StopFilter.MakeStopSet( exclusionlist );
}
///
/// Builds an exclusionlist from a Hashtable.
///
///
public void SetStemExclusionTable( Hashtable exclusionlist )
{
excltable = exclusionlist;
}
///
/// Builds an exclusionlist from the words contained in the given file.
///
///
public void SetStemExclusionTable(FileInfo exclusionlist)
{
excltable = WordlistLoader.GetWordtable(exclusionlist);
}
///
/// Creates a TokenStream which tokenizes all the text in the provided TextReader.
///
///
///
/// A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter
public override TokenStream TokenStream(String fieldName, TextReader reader)
{
TokenStream result = new StandardTokenizer( reader );
result = new StandardFilter( result );
result = new LowerCaseFilter(result);
result = new StopFilter( result, stoptable );
result = new GermanStemFilter( result, excltable );
return result;
}
}
}