/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using Lucene.Net.Analysis; namespace Lucene.Net.Analysis.Standard { /// Normalizes tokens extracted with {@link StandardTokenizer}. public sealed class StandardFilter : TokenFilter { /// Construct filtering in. public StandardFilter(TokenStream in_Renamed) : base(in_Renamed) { } private static readonly System.String APOSTROPHE_TYPE = Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE]; private static readonly System.String ACRONYM_TYPE = Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM]; /// Returns the next token in the stream, or null at EOS. ///

Removes 's from the end of words. ///

Removes dots from acronyms. ///

public override Lucene.Net.Analysis.Token Next() { Lucene.Net.Analysis.Token t = input.Next(); if (t == null) return null; System.String text = t.TermText(); System.String type = t.Type(); if (type == APOSTROPHE_TYPE && (text.EndsWith("'s") || text.EndsWith("'S"))) { return new Lucene.Net.Analysis.Token(text.Substring(0, (text.Length - 2) - (0)), t.StartOffset(), t.EndOffset(), type); } else if (type == ACRONYM_TYPE) { // remove dots System.Text.StringBuilder trimmed = new System.Text.StringBuilder(); for (int i = 0; i < text.Length; i++) { char c = text[i]; if (c != '.') trimmed.Append(c); } return new Lucene.Net.Analysis.Token(trimmed.ToString(), t.StartOffset(), t.EndOffset(), type); } else { return t; } } } }