/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * */ using System; using System.IO; using System.Collections; using System.Collections.Generic; using Lucene.Net.Analysis.Standard; using Lucene.Net.Analysis; namespace Lucene.Net.Analysis.De { ///

/// Analyzer for German language. Supports an external list of stopwords (words that /// will not be indexed at all) and an external list of exclusions (word that will /// not be stemmed, but indexed). /// A default set of stopwords is used unless an alternative list is specified, the /// exclusion list is empty by default. ///

public class GermanAnalyzer : Analyzer { ///

/// List of typical german stopwords. ///

private String[] GERMAN_STOP_WORDS = { "einer", "eine", "eines", "einem", "einen", "der", "die", "das", "dass", "daß", "du", "er", "sie", "es", "was", "wer", "wie", "wir", "und", "oder", "ohne", "mit", "am", "im", "in", "aus", "auf", "ist", "sein", "war", "wird", "ihr", "ihre", "ihres", "als", "für", "von", "dich", "dir", "mich", "mir", "mein", "kein", "durch", "wegen" }; ///

/// Contains the stopwords used with the StopFilter. ///

private ICollection stoptable = new List(); ///

/// Contains words that should be indexed but not stemmed. ///

private ICollection excltable = new List(); ///

/// Builds an analyzer. ///

public GermanAnalyzer() { stoptable = StopFilter.MakeStopSet( GERMAN_STOP_WORDS ); } ///

/// Builds an analyzer with the given stop words. ///

/// public GermanAnalyzer( String[] stopwords ) { stoptable = StopFilter.MakeStopSet( stopwords ); } ///

/// Builds an analyzer with the given stop words. ///

/// public GermanAnalyzer(ICollection stopwords) { stoptable = stopwords; } ///

/// Builds an analyzer with the given stop words. ///

/// public GermanAnalyzer( FileInfo stopwords ) { stoptable = WordlistLoader.GetWordtable( stopwords ); } ///

/// Builds an exclusionlist from an array of Strings. ///

/// public void SetStemExclusionTable( String[] exclusionlist ) { excltable = StopFilter.MakeStopSet( exclusionlist ); } ///

/// Builds an exclusionlist from a Hashtable. ///

/// public void SetStemExclusionTable(ICollection exclusionlist) { excltable = exclusionlist; } ///

/// Builds an exclusionlist from the words contained in the given file. ///

/// public void SetStemExclusionTable(FileInfo exclusionlist) { excltable = WordlistLoader.GetWordtable(exclusionlist); } ///

/// Creates a TokenStream which tokenizes all the text in the provided TextReader. ///

/// /// /// A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter public override TokenStream TokenStream(String fieldName, TextReader reader) { TokenStream result = new StandardTokenizer( reader ); result = new StandardFilter( result ); result = new LowerCaseFilter(result); result = new StopFilter( result, stoptable ); result = new GermanStemFilter( result, excltable ); return result; } } }