using System; using System.IO; using System.Collections; using Lucene.Net.Analysis.Standard; using Lucene.Net.Analysis; namespace Lucene.Net.Analysis.De { /// /// Analyzer for German language. Supports an external list of stopwords (words that /// will not be indexed at all) and an external list of exclusions (word that will /// not be stemmed, but indexed). /// A default set of stopwords is used unless an alternative list is specified, the /// exclusion list is empty by default. /// public class GermanAnalyzer : Analyzer { /// /// List of typical german stopwords. /// private String[] GERMAN_STOP_WORDS = { "einer", "eine", "eines", "einem", "einen", "der", "die", "das", "dass", "daß", "du", "er", "sie", "es", "was", "wer", "wie", "wir", "und", "oder", "ohne", "mit", "am", "im", "in", "aus", "auf", "ist", "sein", "war", "wird", "ihr", "ihre", "ihres", "als", "für", "von", "dich", "dir", "mich", "mir", "mein", "kein", "durch", "wegen" }; /// /// Contains the stopwords used with the StopFilter. /// private Hashtable stoptable = new Hashtable(); /// /// Contains words that should be indexed but not stemmed. /// private Hashtable excltable = new Hashtable(); /// /// Builds an analyzer. /// public GermanAnalyzer() { stoptable = StopFilter.MakeStopSet( GERMAN_STOP_WORDS ); } /// /// Builds an analyzer with the given stop words. /// /// public GermanAnalyzer( String[] stopwords ) { stoptable = StopFilter.MakeStopSet( stopwords ); } /// /// Builds an analyzer with the given stop words. /// /// public GermanAnalyzer( Hashtable stopwords ) { stoptable = stopwords; } /// /// Builds an analyzer with the given stop words. /// /// public GermanAnalyzer( FileInfo stopwords ) { stoptable = WordlistLoader.GetWordtable( stopwords ); } /// /// Builds an exclusionlist from an array of Strings. /// /// public void SetStemExclusionTable( String[] exclusionlist ) { excltable = StopFilter.MakeStopSet( exclusionlist ); } /// /// Builds an exclusionlist from a Hashtable. /// /// public void SetStemExclusionTable( Hashtable exclusionlist ) { excltable = exclusionlist; } /// /// Builds an exclusionlist from the words contained in the given file. /// /// public void SetStemExclusionTable(FileInfo exclusionlist) { excltable = WordlistLoader.GetWordtable(exclusionlist); } /// /// Creates a TokenStream which tokenizes all the text in the provided TextReader. /// /// /// /// A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter public override TokenStream TokenStream(String fieldName, TextReader reader) { TokenStream result = new StandardTokenizer( reader ); result = new StandardFilter( result ); result = new LowerCaseFilter(result); result = new StopFilter( result, stoptable ); result = new GermanStemFilter( result, excltable ); return result; } } }