/*
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using NUnit.Framework;
using Analyzer = Lucene.Net.Analysis.Analyzer;
using LowerCaseTokenizer = Lucene.Net.Analysis.LowerCaseTokenizer;
using SimpleAnalyzer = Lucene.Net.Analysis.SimpleAnalyzer;
using Token = Lucene.Net.Analysis.Token;
using TokenFilter = Lucene.Net.Analysis.TokenFilter;
using TokenStream = Lucene.Net.Analysis.TokenStream;
using WhitespaceAnalyzer = Lucene.Net.Analysis.WhitespaceAnalyzer;
using StandardAnalyzer = Lucene.Net.Analysis.Standard.StandardAnalyzer;
using DateField = Lucene.Net.Documents.DateField;
using BooleanQuery = Lucene.Net.Search.BooleanQuery;
using FuzzyQuery = Lucene.Net.Search.FuzzyQuery;
using PhraseQuery = Lucene.Net.Search.PhraseQuery;
using PrefixQuery = Lucene.Net.Search.PrefixQuery;
using Query = Lucene.Net.Search.Query;
using RangeQuery = Lucene.Net.Search.RangeQuery;
using TermQuery = Lucene.Net.Search.TermQuery;
using WildcardQuery = Lucene.Net.Search.WildcardQuery;
namespace Lucene.Net.QueryParser
{
/// Tests QueryParser.
[TestFixture]
public class TestQueryParser
{
public static Analyzer qpAnalyzer = new QPTestAnalyzer();
public class QPTestFilter : TokenFilter
{
/// Filter which discards the token 'stop' and which expands the
/// token 'phrase' into 'phrase1 phrase2'
///
public QPTestFilter(TokenStream in_Renamed) : base(in_Renamed)
{
}
internal bool inPhrase = false;
internal int savedStart = 0, savedEnd = 0;
public override Token Next()
{
if (inPhrase)
{
inPhrase = false;
return new Token("phrase2", savedStart, savedEnd);
}
else
for (Token token = input.Next(); token != null; token = input.Next())
{
if (token.TermText().Equals("phrase"))
{
inPhrase = true;
savedStart = token.StartOffset();
savedEnd = token.EndOffset();
return new Token("phrase1", savedStart, savedEnd);
}
else if (!token.TermText().Equals("stop"))
return token;
}
return null;
}
}
public class QPTestAnalyzer : Analyzer
{
/// Filters LowerCaseTokenizer with StopFilter.
public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
{
return new QPTestFilter(new LowerCaseTokenizer(reader));
}
}
public class QPTestParser : QueryParsers.QueryParser
{
public QPTestParser(System.String f, Analyzer a) : base(f, a)
{
}
protected /*internal*/ override Query GetFuzzyQuery(System.String field, System.String termStr)
{
throw new Lucene.Net.Analysis.Standard.ParseException("Fuzzy queries not allowed");
}
protected /*internal*/ override Query GetWildcardQuery(System.String field, System.String termStr)
{
throw new Lucene.Net.Analysis.Standard.ParseException("Wildcard queries not allowed");
}
}
private int originalMaxClauses;
[TestFixtureSetUp]
public virtual void SetUp()
{
originalMaxClauses = BooleanQuery.GetMaxClauseCount();
}
public virtual QueryParsers.QueryParser GetParser(Analyzer a)
{
if (a == null)
a = new SimpleAnalyzer();
QueryParsers.QueryParser qp = new QueryParsers.QueryParser("Field", a);
qp.SetOperator(QueryParsers.QueryParser.DEFAULT_OPERATOR_OR);
return qp;
}
public virtual Query GetQuery(System.String query, Analyzer a)
{
return GetParser(a).Parse(query);
}
public virtual void AssertQueryEquals(System.String query, Analyzer a, System.String result)
{
Query q = GetQuery(query, a);
System.String s = q.ToString("Field");
if (!s.Equals(result))
{
Assert.Fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result + "/");
}
}
public virtual void AssertWildcardQueryEquals(System.String query, bool lowercase, System.String result)
{
QueryParsers.QueryParser qp = GetParser(null);
qp.SetLowercaseWildcardTerms(lowercase);
Query q = qp.Parse(query);
System.String s = q.ToString("Field");
if (!s.Equals(result))
{
Assert.Fail("WildcardQuery /" + query + "/ yielded /" + s + "/, expecting /" + result + "/");
}
}
public virtual Query GetQueryDOA(System.String query, Analyzer a)
{
if (a == null)
a = new SimpleAnalyzer();
QueryParsers.QueryParser qp = new QueryParsers.QueryParser("Field", a);
qp.SetOperator(QueryParsers.QueryParser.DEFAULT_OPERATOR_AND);
return qp.Parse(query);
}
public virtual void AssertQueryEqualsDOA(System.String query, Analyzer a, System.String result)
{
Query q = GetQueryDOA(query, a);
System.String s = q.ToString("Field");
if (!s.Equals(result))
{
Assert.Fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result + "/");
}
}
[Test]
public virtual void TestSimple()
{
AssertQueryEquals("term term term", null, "term term term");
AssertQueryEquals("türm term term", null, "türm term term");
AssertQueryEquals("ümlaut", null, "ümlaut");
AssertQueryEquals("a AND b", null, "+a +b");
AssertQueryEquals("(a AND b)", null, "+a +b");
AssertQueryEquals("c OR (a AND b)", null, "c (+a +b)");
AssertQueryEquals("a AND NOT b", null, "+a -b");
AssertQueryEquals("a AND -b", null, "+a -b");
AssertQueryEquals("a AND !b", null, "+a -b");
AssertQueryEquals("a && b", null, "+a +b");
AssertQueryEquals("a && ! b", null, "+a -b");
AssertQueryEquals("a OR b", null, "a b");
AssertQueryEquals("a || b", null, "a b");
AssertQueryEquals("a OR !b", null, "a -b");
AssertQueryEquals("a OR ! b", null, "a -b");
AssertQueryEquals("a OR -b", null, "a -b");
AssertQueryEquals("+term -term term", null, "+term -term term");
AssertQueryEquals("foo:term AND Field:anotherTerm", null, "+foo:term +anotherterm");
AssertQueryEquals("term AND \"phrase phrase\"", null, "+term +\"phrase phrase\"");
AssertQueryEquals("\"hello there\"", null, "\"hello there\"");
Assert.IsTrue(GetQuery("a AND b", null) is BooleanQuery);
Assert.IsTrue(GetQuery("hello", null) is TermQuery);
Assert.IsTrue(GetQuery("\"hello there\"", null) is PhraseQuery);
AssertQueryEquals("germ term^2.0", null, "germ term^2.0");
AssertQueryEquals("(term)^2.0", null, "term^2.0");
AssertQueryEquals("(germ term)^2.0", null, "(germ term)^2.0");
AssertQueryEquals("term^2.0", null, "term^2.0");
AssertQueryEquals("term^2", null, "term^2.0");
AssertQueryEquals("\"germ term\"^2.0", null, "\"germ term\"^2.0");
AssertQueryEquals("\"term germ\"^2", null, "\"term germ\"^2.0");
AssertQueryEquals("(foo OR bar) AND (baz OR boo)", null, "+(foo bar) +(baz boo)");
AssertQueryEquals("((a OR b) AND NOT c) OR d", null, "(+(a b) -c) d");
AssertQueryEquals("+(apple \"steve jobs\") -(foo bar baz)", null, "+(apple \"steve jobs\") -(foo bar baz)");
AssertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"", null, "+(title:dog title:cat) -author:\"bob dole\"");
}
[Test]
public virtual void TestPunct()
{
Analyzer a = new WhitespaceAnalyzer();
AssertQueryEquals("a&b", a, "a&b");
AssertQueryEquals("a&&b", a, "a&&b");
AssertQueryEquals(".NET", a, ".NET");
}
[Test]
public virtual void TestSlop()
{
AssertQueryEquals("\"term germ\"~2", null, "\"term germ\"~2");
AssertQueryEquals("\"term germ\"~2 flork", null, "\"term germ\"~2 flork");
AssertQueryEquals("\"term\"~2", null, "term");
AssertQueryEquals("\" \"~2 germ", null, "germ");
AssertQueryEquals("\"term germ\"~2^2", null, "\"term germ\"~2^2.0");
}
[Test]
public virtual void TestNumber()
{
// The numbers go away because SimpleAnalzyer ignores them
AssertQueryEquals("3", null, "");
AssertQueryEquals("term 1.0 1 2", null, "term");
AssertQueryEquals("term term1 term2", null, "term term term");
Analyzer a = new StandardAnalyzer();
AssertQueryEquals("3", a, "3");
AssertQueryEquals("term 1.0 1 2", a, "term 1.0 1 2");
AssertQueryEquals("term term1 term2", a, "term term1 term2");
}
[Test]
public virtual void TestWildcard()
{
AssertQueryEquals("term*", null, "term*");
AssertQueryEquals("term*^2", null, "term*^2.0");
AssertQueryEquals("term~", null, "term~0.5");
AssertQueryEquals("term~0.7", null, "term~0.7");
AssertQueryEquals("term~^2", null, "term^2.0~0.5");
AssertQueryEquals("term^2~", null, "term^2.0~0.5");
AssertQueryEquals("term*germ", null, "term*germ");
AssertQueryEquals("term*germ^3", null, "term*germ^3.0");
Assert.IsTrue(GetQuery("term*", null) is PrefixQuery);
Assert.IsTrue(GetQuery("term*^2", null) is PrefixQuery);
Assert.IsTrue(GetQuery("term~", null) is FuzzyQuery);
Assert.IsTrue(GetQuery("term~0.7", null) is FuzzyQuery);
FuzzyQuery fq = (FuzzyQuery) GetQuery("term~0.7", null);
Assert.AreEqual(0.7f, fq.GetMinSimilarity(), 0.1f);
Assert.AreEqual(0, fq.GetPrefixLength());
fq = (FuzzyQuery) GetQuery("term~", null);
Assert.AreEqual(0.5f, fq.GetMinSimilarity(), 0.1f);
Assert.AreEqual(0, fq.GetPrefixLength());
try
{
GetQuery("term~1.1", null); // value > 1, throws exception
Assert.Fail();
}
catch (Lucene.Net.QueryParsers.ParseException pe)
{
// expected exception
}
Assert.IsTrue(GetQuery("term*germ", null) is WildcardQuery);
/* Tests to see that wild card terms are (or are not) properly
* lower-cased with propery parser configuration
*/
// First prefix queries:
AssertWildcardQueryEquals("term*", true, "term*");
AssertWildcardQueryEquals("Term*", true, "term*");
AssertWildcardQueryEquals("TERM*", true, "term*");
AssertWildcardQueryEquals("term*", false, "term*");
AssertWildcardQueryEquals("Term*", false, "Term*");
AssertWildcardQueryEquals("TERM*", false, "TERM*");
// Then 'full' wildcard queries:
AssertWildcardQueryEquals("te?m", true, "te?m");
AssertWildcardQueryEquals("Te?m", true, "te?m");
AssertWildcardQueryEquals("TE?M", true, "te?m");
AssertWildcardQueryEquals("Te?m*gerM", true, "te?m*germ");
AssertWildcardQueryEquals("te?m", false, "te?m");
AssertWildcardQueryEquals("Te?m", false, "Te?m");
AssertWildcardQueryEquals("TE?M", false, "TE?M");
AssertWildcardQueryEquals("Te?m*gerM", false, "Te?m*gerM");
}
[Test]
public virtual void TestQPA()
{
AssertQueryEquals("term term term", qpAnalyzer, "term term term");
AssertQueryEquals("term +stop term", qpAnalyzer, "term term");
AssertQueryEquals("term -stop term", qpAnalyzer, "term term");
AssertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll");
AssertQueryEquals("term phrase term", qpAnalyzer, "term \"phrase1 phrase2\" term");
AssertQueryEquals("term AND NOT phrase term", qpAnalyzer, "+term -\"phrase1 phrase2\" term");
AssertQueryEquals("stop", qpAnalyzer, "");
Assert.IsTrue(GetQuery("term term term", qpAnalyzer) is BooleanQuery);
Assert.IsTrue(GetQuery("term +stop", qpAnalyzer) is TermQuery);
}
[Test]
public virtual void TestRange()
{
AssertQueryEquals("[ a TO z]", null, "[a TO z]");
Assert.IsTrue(GetQuery("[ a TO z]", null) is RangeQuery);
AssertQueryEquals("[ a TO z ]", null, "[a TO z]");
AssertQueryEquals("{ a TO z}", null, "{a TO z}");
AssertQueryEquals("{ a TO z }", null, "{a TO z}");
AssertQueryEquals("{ a TO z }^2.0", null, "{a TO z}^2.0");
AssertQueryEquals("[ a TO z] OR bar", null, "[a TO z] bar");
AssertQueryEquals("[ a TO z] AND bar", null, "+[a TO z] +bar");
AssertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}");
AssertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})");
}
public virtual System.String GetDate(System.String s)
{
return DateField.DateToString(DateTime.Parse(s));
}
public virtual System.String GetLocalizedDate(int year, int month, int day)
{
return new DateTime(year,month,day).ToShortDateString();
}
[Test]
public virtual void TestDateRange()
{
System.String startDate = GetLocalizedDate(2002, 1, 1);
System.String endDate = GetLocalizedDate(2002, 1, 4);
AssertQueryEquals("[ " + startDate + " TO " + endDate + "]", null, "[" + GetDate(startDate) + " TO " + GetDate(endDate) + "]");
AssertQueryEquals("{ " + startDate + " " + endDate + " }", null, "{" + GetDate(startDate) + " TO " + GetDate(endDate) + "}");
}
[Test]
public virtual void TestEscaped()
{
Analyzer a = new WhitespaceAnalyzer();
/*AssertQueryEquals("\\[brackets", a, "\\[brackets");
AssertQueryEquals("\\[brackets", null, "brackets");
AssertQueryEquals("\\\\", a, "\\\\");
AssertQueryEquals("\\+blah", a, "\\+blah");
AssertQueryEquals("\\(blah", a, "\\(blah");
AssertQueryEquals("\\-blah", a, "\\-blah");
AssertQueryEquals("\\!blah", a, "\\!blah");
AssertQueryEquals("\\{blah", a, "\\{blah");
AssertQueryEquals("\\}blah", a, "\\}blah");
AssertQueryEquals("\\:blah", a, "\\:blah");
AssertQueryEquals("\\^blah", a, "\\^blah");
AssertQueryEquals("\\[blah", a, "\\[blah");
AssertQueryEquals("\\]blah", a, "\\]blah");
AssertQueryEquals("\\\"blah", a, "\\\"blah");
AssertQueryEquals("\\(blah", a, "\\(blah");
AssertQueryEquals("\\)blah", a, "\\)blah");
AssertQueryEquals("\\~blah", a, "\\~blah");
AssertQueryEquals("\\*blah", a, "\\*blah");
AssertQueryEquals("\\?blah", a, "\\?blah");
//AssertQueryEquals("foo \\&\\& bar", a, "foo \\&\\& bar");
//AssertQueryEquals("foo \\|| bar", a, "foo \\|| bar");
//AssertQueryEquals("foo \\AND bar", a, "foo \\AND bar");*/
AssertQueryEquals("a\\-b:c", a, "a-b:c");
AssertQueryEquals("a\\+b:c", a, "a+b:c");
AssertQueryEquals("a\\:b:c", a, "a:b:c");
AssertQueryEquals("a\\\\b:c", a, "a\\b:c");
AssertQueryEquals("a:b\\-c", a, "a:b-c");
AssertQueryEquals("a:b\\+c", a, "a:b+c");
AssertQueryEquals("a:b\\:c", a, "a:b:c");
AssertQueryEquals("a:b\\\\c", a, "a:b\\c");
AssertQueryEquals("a:b\\-c*", a, "a:b-c*");
AssertQueryEquals("a:b\\+c*", a, "a:b+c*");
AssertQueryEquals("a:b\\:c*", a, "a:b:c*");
AssertQueryEquals("a:b\\\\c*", a, "a:b\\c*");
AssertQueryEquals("a:b\\-?c", a, "a:b-?c");
AssertQueryEquals("a:b\\+?c", a, "a:b+?c");
AssertQueryEquals("a:b\\:?c", a, "a:b:?c");
AssertQueryEquals("a:b\\\\?c", a, "a:b\\?c");
AssertQueryEquals("a:b\\-c~", a, "a:b-c~0.5");
AssertQueryEquals("a:b\\+c~", a, "a:b+c~0.5");
AssertQueryEquals("a:b\\:c~", a, "a:b:c~0.5");
AssertQueryEquals("a:b\\\\c~", a, "a:b\\c~0.5");
AssertQueryEquals("[ a\\- TO a\\+ ]", null, "[a- TO a+]");
AssertQueryEquals("[ a\\: TO a\\~ ]", null, "[a: TO a~]");
AssertQueryEquals("[ a\\\\ TO a\\* ]", null, "[a\\ TO a*]");
}
[Test]
public virtual void TestTabNewlineCarriageReturn()
{
AssertQueryEqualsDOA("+weltbank +worlbank", null, "+weltbank +worlbank");
AssertQueryEqualsDOA("+weltbank\n+worlbank", null, "+weltbank +worlbank");
AssertQueryEqualsDOA("weltbank \n+worlbank", null, "+weltbank +worlbank");
AssertQueryEqualsDOA("weltbank \n +worlbank", null, "+weltbank +worlbank");
AssertQueryEqualsDOA("+weltbank\r+worlbank", null, "+weltbank +worlbank");
AssertQueryEqualsDOA("weltbank \r+worlbank", null, "+weltbank +worlbank");
AssertQueryEqualsDOA("weltbank \r +worlbank", null, "+weltbank +worlbank");
AssertQueryEqualsDOA("+weltbank\r\n+worlbank", null, "+weltbank +worlbank");
AssertQueryEqualsDOA("weltbank \r\n+worlbank", null, "+weltbank +worlbank");
AssertQueryEqualsDOA("weltbank \r\n +worlbank", null, "+weltbank +worlbank");
AssertQueryEqualsDOA("weltbank \r \n +worlbank", null, "+weltbank +worlbank");
AssertQueryEqualsDOA("+weltbank\t+worlbank", null, "+weltbank +worlbank");
AssertQueryEqualsDOA("weltbank \t+worlbank", null, "+weltbank +worlbank");
AssertQueryEqualsDOA("weltbank \t +worlbank", null, "+weltbank +worlbank");
}
[Test]
public virtual void TestSimpleDAO()
{
AssertQueryEqualsDOA("term term term", null, "+term +term +term");
AssertQueryEqualsDOA("term +term term", null, "+term +term +term");
AssertQueryEqualsDOA("term term +term", null, "+term +term +term");
AssertQueryEqualsDOA("term +term +term", null, "+term +term +term");
AssertQueryEqualsDOA("-term term term", null, "-term +term +term");
}
[Test]
public virtual void TestBoost()
{
StandardAnalyzer oneStopAnalyzer = new StandardAnalyzer(new System.String[]{"on"});
QueryParsers.QueryParser qp = new QueryParsers.QueryParser("Field", oneStopAnalyzer);
Query q = qp.Parse("on^1.0");
Assert.IsNotNull(q);
q = qp.Parse("\"hello\"^2.0");
Assert.IsNotNull(q);
Assert.AreEqual(q.GetBoost(), (float) 2.0, (float) 0.5);
q = qp.Parse("hello^2.0");
Assert.IsNotNull(q);
Assert.AreEqual(q.GetBoost(), (float) 2.0, (float) 0.5);
q = qp.Parse("\"on\"^1.0");
Assert.IsNotNull(q);
q = QueryParsers.QueryParser.Parse("the^3", "Field", new StandardAnalyzer());
Assert.IsNotNull(q);
}
[Test]
public virtual void TestException()
{
try
{
AssertQueryEquals("\"some phrase", null, "abc");
Assert.Fail("ParseException expected, not thrown");
}
catch (Lucene.Net.QueryParsers.ParseException expected)
{
}
}
[Test]
public virtual void TestCustomQueryParserWildcard()
{
try
{
new QPTestParser("contents", new WhitespaceAnalyzer()).Parse("a?t");
}
catch (Lucene.Net.Analysis.Standard.ParseException expected)
{
return ;
}
Assert.Fail("Wildcard queries should not be allowed");
}
[Test]
public virtual void TestCustomQueryParserFuzzy()
{
try
{
new QPTestParser("contents", new WhitespaceAnalyzer()).Parse("xunit~");
}
catch (Lucene.Net.Analysis.Standard.ParseException expected)
{
return ;
}
Assert.Fail("Fuzzy queries should not be allowed");
}
[Test]
public virtual void TestBooleanQuery()
{
BooleanQuery.SetMaxClauseCount(2);
try
{
QueryParsers.QueryParser.Parse("one two three", "Field", new WhitespaceAnalyzer());
Assert.Fail("ParseException expected due to too many boolean clauses");
}
catch (Lucene.Net.QueryParsers.ParseException expected)
{
// too many boolean clauses, so ParseException is expected
}
BooleanQuery.SetMaxClauseCount(originalMaxClauses);
}
[TestFixtureTearDown]
public virtual void TearDown()
{
BooleanQuery.SetMaxClauseCount(originalMaxClauses);
}
}
}