/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using PositionIncrementAttribute = Lucene.Net.Analysis.Tokenattributes.PositionIncrementAttribute;
using TermAttribute = Lucene.Net.Analysis.Tokenattributes.TermAttribute;
using QueryParser = Lucene.Net.QueryParsers.QueryParser;
using Version = Lucene.Net.Util.Version;
namespace Lucene.Net.Analysis
{
/// Removes stop words from a token stream.
public sealed class StopFilter:TokenFilter
{
// deprecated
[Obsolete]
private static bool ENABLE_POSITION_INCREMENTS_DEFAULT = false;
private CharArraySet stopWords;
private bool enablePositionIncrements = ENABLE_POSITION_INCREMENTS_DEFAULT;
private TermAttribute termAtt;
private PositionIncrementAttribute posIncrAtt;
/// Construct a token stream filtering the given input.
/// Use {@link #StopFilter(boolean, TokenStream, String[])} instead
///
[Obsolete("Use StopFilter(bool, TokenStream, String[]) instead")]
public StopFilter(TokenStream input, System.String[] stopWords):this(ENABLE_POSITION_INCREMENTS_DEFAULT, input, stopWords, false)
{
}
/// Construct a token stream filtering the given input.
/// true if token positions should record the removed stop words
///
/// input TokenStream
///
/// array of stop words
///
/// Use {@link #StopFilter(boolean, TokenStream, Set)} instead.
///
[Obsolete("Use StopFilter(bool, TokenStream, Hashtable) instead.")]
public StopFilter(bool enablePositionIncrements, TokenStream input, System.String[] stopWords):this(enablePositionIncrements, input, stopWords, false)
{
}
/// Constructs a filter which removes words from the input
/// TokenStream that are named in the array of words.
///
/// Use {@link #StopFilter(boolean, TokenStream, String[], boolean)} instead
///
[Obsolete("Use {@link #StopFilter(bool, TokenStream, String[], bool)} instead")]
public StopFilter(TokenStream in_Renamed, System.String[] stopWords, bool ignoreCase):this(ENABLE_POSITION_INCREMENTS_DEFAULT, in_Renamed, stopWords, ignoreCase)
{
}
/// Constructs a filter which removes words from the input
/// TokenStream that are named in the array of words.
///
/// true if token positions should record the removed stop words
///
/// input TokenStream
///
/// array of stop words
///
/// true if case is ignored
///
/// Use {@link #StopFilter(boolean, TokenStream, Set, boolean)} instead.
///
[Obsolete("Use StopFilter(bool, TokenStream, Hashtable, bool) instead.")]
public StopFilter(bool enablePositionIncrements, TokenStream in_Renamed, System.String[] stopWords, bool ignoreCase):base(in_Renamed)
{
this.stopWords = (CharArraySet) MakeStopSet(stopWords, ignoreCase);
this.enablePositionIncrements = enablePositionIncrements;
Init();
}
/// Construct a token stream filtering the given input.
/// If stopWords
is an instance of {@link CharArraySet} (true if
/// makeStopSet()
was used to construct the set) it will be directly used
/// and ignoreCase
will be ignored since CharArraySet
/// directly controls case sensitivity.
///
/// If stopWords
is not an instance of {@link CharArraySet},
/// a new CharArraySet will be constructed and ignoreCase
will be
/// used to specify the case sensitivity of that set.
///
///
///
///
/// The set of Stop Words.
///
/// -Ignore case when stopping.
///
/// Use {@link #StopFilter(boolean, TokenStream, Set, boolean)} instead
///
[Obsolete("Use StopFilter(bool, TokenStream, Set, bool) instead")]
public StopFilter(TokenStream input, System.Collections.Hashtable stopWords, bool ignoreCase):this(ENABLE_POSITION_INCREMENTS_DEFAULT, input, stopWords, ignoreCase)
{
}
/// Construct a token stream filtering the given input.
/// If stopWords
is an instance of {@link CharArraySet} (true if
/// makeStopSet()
was used to construct the set) it will be directly used
/// and ignoreCase
will be ignored since CharArraySet
/// directly controls case sensitivity.
///
/// If stopWords
is not an instance of {@link CharArraySet},
/// a new CharArraySet will be constructed and ignoreCase
will be
/// used to specify the case sensitivity of that set.
///
///
/// true if token positions should record the removed stop words
///
/// Input TokenStream
///
/// The set of Stop Words.
///
/// -Ignore case when stopping.
///
public StopFilter(bool enablePositionIncrements, TokenStream input, System.Collections.Hashtable stopWords, bool ignoreCase):base(input)
{
if (stopWords is CharArraySet)
{
this.stopWords = (CharArraySet) stopWords;
}
else
{
this.stopWords = new CharArraySet(stopWords.Count, ignoreCase);
this.stopWords.Add(stopWords);
}
this.enablePositionIncrements = enablePositionIncrements;
Init();
}
/// Constructs a filter which removes words from the input
/// TokenStream that are named in the Set.
///
///
///
///
/// Use {@link #StopFilter(boolean, TokenStream, Set)} instead
///
[Obsolete("Use StopFilter(bool, TokenStream, Hashtable) instead")]
public StopFilter(TokenStream in_Renamed, System.Collections.Hashtable stopWords):this(ENABLE_POSITION_INCREMENTS_DEFAULT, in_Renamed, stopWords, false)
{
}
/// Constructs a filter which removes words from the input
/// TokenStream that are named in the Set.
///
///
/// true if token positions should record the removed stop words
///
/// Input stream
///
/// The set of Stop Words.
///
///
///
public StopFilter(bool enablePositionIncrements, TokenStream in_Renamed, System.Collections.Hashtable stopWords):this(enablePositionIncrements, in_Renamed, stopWords, false)
{
}
public void Init()
{
termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof(PositionIncrementAttribute));
}
/// Builds a Set from an array of stop words,
/// appropriate for passing into the StopFilter constructor.
/// This permits this stopWords construction to be cached once when
/// an Analyzer is constructed.
///
///
/// passing false to ignoreCase
///
public static System.Collections.Hashtable MakeStopSet(System.String[] stopWords)
{
return MakeStopSet(stopWords, false);
}
/// Builds a Set from an array of stop words,
/// appropriate for passing into the StopFilter constructor.
/// This permits this stopWords construction to be cached once when
/// an Analyzer is constructed.
///
///
/// passing false to ignoreCase
///
public static System.Collections.Hashtable MakeStopSet(System.Collections.IList stopWords)
{
return MakeStopSet(stopWords, false);
}
///
/// An array of stopwords
///
/// If true, all words are lower cased first.
///
/// a Set containing the words
///
public static System.Collections.Hashtable MakeStopSet(System.String[] stopWords, bool ignoreCase)
{
CharArraySet stopSet = new CharArraySet(stopWords.Length, ignoreCase);
stopSet.AddAll(new System.Collections.ArrayList(stopWords));
return stopSet;
}
///
/// A List of Strings representing the stopwords
///
/// if true, all words are lower cased first
///
/// A Set containing the words
///
public static System.Collections.Hashtable MakeStopSet(System.Collections.IList stopWords, bool ignoreCase)
{
CharArraySet stopSet = new CharArraySet(stopWords.Count, ignoreCase);
stopSet.AddAll(stopWords);
return stopSet;
}
/// Returns the next input Token whose term() is not a stop word.
public override bool IncrementToken()
{
// return the first non-stop word found
int skippedPositions = 0;
while (input.IncrementToken())
{
if (!stopWords.Contains(termAtt.TermBuffer(), 0, termAtt.TermLength()))
{
if (enablePositionIncrements)
{
posIncrAtt.SetPositionIncrement(posIncrAtt.GetPositionIncrement() + skippedPositions);
}
return true;
}
skippedPositions += posIncrAtt.GetPositionIncrement();
}
// reached EOS -- return null
return false;
}
///
///
/// Please specify this when you create the StopFilter
///
[Obsolete("Please specify this when you create the StopFilter")]
public static bool GetEnablePositionIncrementsDefault()
{
return ENABLE_POSITION_INCREMENTS_DEFAULT;
}
/// Returns version-dependent default for enablePositionIncrements. Analyzers
/// that embed StopFilter use this method when creating the StopFilter. Prior
/// to 2.9, this returns {@link #getEnablePositionIncrementsDefault}. On 2.9
/// or later, it returns true.
///
public static bool GetEnablePositionIncrementsVersionDefault(Version matchVersion)
{
if (matchVersion.OnOrAfter(Version.LUCENE_29))
{
return true;
}
else
{
return ENABLE_POSITION_INCREMENTS_DEFAULT;
}
}
/// Set the default position increments behavior of every StopFilter created
/// from now on.
///
/// Note: behavior of a single StopFilter instance can be modified with
/// {@link #SetEnablePositionIncrements(boolean)}. This static method allows
/// control over behavior of classes using StopFilters internally, for
/// example {@link Lucene.Net.Analysis.Standard.StandardAnalyzer
/// StandardAnalyzer} if used with the no-arg ctor.
///
/// Default : false.
///
///
///
///
/// Please specify this when you create the StopFilter
///
[Obsolete("Please specify this when you create the StopFilter")]
public static void SetEnablePositionIncrementsDefault(bool defaultValue)
{
ENABLE_POSITION_INCREMENTS_DEFAULT = defaultValue;
}
///
///
public bool GetEnablePositionIncrements()
{
return enablePositionIncrements;
}
/// If true
, this StopFilter will preserve
/// positions of the incoming tokens (ie, accumulate and
/// set position increments of the removed stop tokens).
/// Generally, true
is best as it does not
/// lose information (positions of the original tokens)
/// during indexing.
///
/// When set, when a token is stopped
/// (omitted), the position increment of the following
/// token is incremented.
///
/// NOTE: be sure to also
/// set {@link QueryParser#setEnablePositionIncrements} if
/// you use QueryParser to create queries.
///
public void SetEnablePositionIncrements(bool enable)
{
this.enablePositionIncrements = enable;
}
}
}