/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * */ using System; using System.IO; using System.Collections; using System.Collections.Generic; using Lucene.Net.Analysis.Standard; using Lucene.Net.Analysis; namespace Lucene.Net.Analysis.De { /// /// Analyzer for German language. Supports an external list of stopwords (words that /// will not be indexed at all) and an external list of exclusions (word that will /// not be stemmed, but indexed). /// A default set of stopwords is used unless an alternative list is specified, the /// exclusion list is empty by default. /// public class GermanAnalyzer : Analyzer { /// /// List of typical german stopwords. /// private String[] GERMAN_STOP_WORDS = { "einer", "eine", "eines", "einem", "einen", "der", "die", "das", "dass", "daß", "du", "er", "sie", "es", "was", "wer", "wie", "wir", "und", "oder", "ohne", "mit", "am", "im", "in", "aus", "auf", "ist", "sein", "war", "wird", "ihr", "ihre", "ihres", "als", "für", "von", "dich", "dir", "mich", "mir", "mein", "kein", "durch", "wegen" }; /// /// Contains the stopwords used with the StopFilter. /// private ICollection stoptable = new List(); /// /// Contains words that should be indexed but not stemmed. /// private ICollection excltable = new List(); /// /// Builds an analyzer. /// public GermanAnalyzer() { stoptable = StopFilter.MakeStopSet( GERMAN_STOP_WORDS ); } /// /// Builds an analyzer with the given stop words. /// /// public GermanAnalyzer( String[] stopwords ) { stoptable = StopFilter.MakeStopSet( stopwords ); } /// /// Builds an analyzer with the given stop words. /// /// public GermanAnalyzer(ICollection stopwords) { stoptable = stopwords; } /// /// Builds an analyzer with the given stop words. /// /// public GermanAnalyzer( FileInfo stopwords ) { stoptable = WordlistLoader.GetWordtable( stopwords ); } /// /// Builds an exclusionlist from an array of Strings. /// /// public void SetStemExclusionTable( String[] exclusionlist ) { excltable = StopFilter.MakeStopSet( exclusionlist ); } /// /// Builds an exclusionlist from a Hashtable. /// /// public void SetStemExclusionTable(ICollection exclusionlist) { excltable = exclusionlist; } /// /// Builds an exclusionlist from the words contained in the given file. /// /// public void SetStemExclusionTable(FileInfo exclusionlist) { excltable = WordlistLoader.GetWordtable(exclusionlist); } /// /// Creates a TokenStream which tokenizes all the text in the provided TextReader. /// /// /// /// A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter public override TokenStream TokenStream(String fieldName, TextReader reader) { TokenStream result = new StandardTokenizer( reader ); result = new StandardFilter( result ); result = new LowerCaseFilter(result); result = new StopFilter( result, stoptable ); result = new GermanStemFilter( result, excltable ); return result; } } }