/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using NUnit.Framework;
using English = Lucene.Net.Util.English;
using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
namespace Lucene.Net.Analysis
{
/// yonik
///
[TestFixture]
public class TestStopFilter : LuceneTestCase
{
private const bool VERBOSE = false;
// other StopFilter functionality is already tested by TestStopAnalyzer
[Test]
public virtual void TestExactCase()
{
System.IO.StringReader reader = new System.IO.StringReader("Now is The Time");
System.String[] stopWords = new System.String[]{"is", "the", "Time"};
TokenStream stream = new StopFilter(new WhitespaceTokenizer(reader), stopWords);
Token reusableToken = new Token();
Assert.AreEqual("Now", stream.Next(reusableToken).Term());
Assert.AreEqual("The", stream.Next(reusableToken).Term());
Assert.AreEqual(null, stream.Next(reusableToken));
}
[Test]
public virtual void TestIgnoreCase()
{
System.IO.StringReader reader = new System.IO.StringReader("Now is The Time");
System.String[] stopWords = new System.String[]{"is", "the", "Time"};
TokenStream stream = new StopFilter(new WhitespaceTokenizer(reader), stopWords, true);
Token reusableToken = new Token();
Assert.AreEqual("Now", stream.Next(reusableToken).Term());
Assert.AreEqual(null, stream.Next(reusableToken));
}
[Test]
public virtual void TestStopFilt()
{
System.IO.StringReader reader = new System.IO.StringReader("Now is The Time");
System.String[] stopWords = new System.String[]{"is", "the", "Time"};
System.Collections.Hashtable stopSet = StopFilter.MakeStopSet(stopWords);
TokenStream stream = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
Token reusableToken = new Token();
Assert.AreEqual("Now", stream.Next(reusableToken).Term());
Assert.AreEqual("The", stream.Next(reusableToken).Term());
Assert.AreEqual(null, stream.Next(reusableToken));
}
/// Test Position increments applied by StopFilter with and without enabling this option.
[Test]
public virtual void TestStopPositons()
{
System.Text.StringBuilder sb = new System.Text.StringBuilder();
System.Collections.Generic.List a = new System.Collections.Generic.List();
for (int i = 0; i < 20; i++)
{
System.String w = English.IntToEnglish(i).Trim();
sb.Append(w).Append(" ");
if (i % 3 != 0)
a.Add(w);
}
Log(sb.ToString());
System.String[] stopWords = (System.String[]) a.ToArray();
for (int i = 0; i < a.Count; i++)
Log("Stop: " + stopWords[i]);
System.Collections.Hashtable stopSet = StopFilter.MakeStopSet(stopWords);
// with increments
System.IO.StringReader reader = new System.IO.StringReader(sb.ToString());
StopFilter stpf = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
DoTestStopPositons(stpf, true);
// without increments
reader = new System.IO.StringReader(sb.ToString());
stpf = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
DoTestStopPositons(stpf, false);
// with increments, concatenating two stop filters
System.Collections.Generic.List a0 = new System.Collections.Generic.List();
System.Collections.Generic.List a1 = new System.Collections.Generic.List();
for (int i = 0; i < a.Count; i++)
{
if (i % 2 == 0)
{
a0.Add(a[i]);
}
else
{
a1.Add(a[i]);
}
}
System.String[] stopWords0 = (System.String[]) a0.ToArray();
for (int i = 0; i < a0.Count; i++)
Log("Stop0: " + stopWords0[i]);
System.String[] stopWords1 = (System.String[]) a1.ToArray();
for (int i = 0; i < a1.Count; i++)
Log("Stop1: " + stopWords1[i]);
System.Collections.Hashtable stopSet0 = StopFilter.MakeStopSet(stopWords0);
System.Collections.Hashtable stopSet1 = StopFilter.MakeStopSet(stopWords1);
reader = new System.IO.StringReader(sb.ToString());
StopFilter stpf0 = new StopFilter(new WhitespaceTokenizer(reader), stopSet0); // first part of the set
stpf0.SetEnablePositionIncrements(true);
StopFilter stpf01 = new StopFilter(stpf0, stopSet1); // two stop filters concatenated!
DoTestStopPositons(stpf01, true);
}
private void DoTestStopPositons(StopFilter stpf, bool enableIcrements)
{
Log("---> test with enable-increments-" + (enableIcrements?"enabled":"disabled"));
stpf.SetEnablePositionIncrements(enableIcrements);
Token reusableToken = new Token();
for (int i = 0; i < 20; i += 3)
{
Token nextToken = stpf.Next(reusableToken);
Log("Token " + i + ": " + nextToken);
System.String w = English.IntToEnglish(i).Trim();
Assert.AreEqual(w, nextToken.Term(), "expecting token " + i + " to be " + w);
Assert.AreEqual(enableIcrements ? (i == 0 ? 1 : 3) : 1, nextToken.GetPositionIncrement(), "all but first token must have position increment of 3");
}
Assert.IsNull(stpf.Next(reusableToken));
}
// print debug info depending on VERBOSE
private static void Log(System.String s)
{
if (VERBOSE)
{
System.Console.Out.WriteLine(s);
}
}
}
}