/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using NUnit.Framework;
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using IndexWriter = Lucene.Net.Index.IndexWriter;
using Term = Lucene.Net.Index.Term;
using Directory = Lucene.Net.Store.Directory;
using RAMDirectory = Lucene.Net.Store.RAMDirectory;
namespace Lucene.Net.Search
{
/// Tests {@link PhraseQuery}.
///
///
///
///
/// Erik Hatcher
///
[TestFixture]
public class TestPhraseQuery
{
private class AnonymousClassAnalyzer : Analyzer
{
public AnonymousClassAnalyzer(TestPhraseQuery enclosingInstance)
{
InitBlock(enclosingInstance);
}
private void InitBlock(TestPhraseQuery enclosingInstance)
{
this.enclosingInstance = enclosingInstance;
}
private TestPhraseQuery enclosingInstance;
public TestPhraseQuery Enclosing_Instance
{
get
{
return enclosingInstance;
}
}
public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
{
return new WhitespaceTokenizer(reader);
}
public override int GetPositionIncrementGap(System.String fieldName)
{
return 100;
}
}
private IndexSearcher searcher;
private PhraseQuery query;
private RAMDirectory directory;
[SetUp]
public virtual void SetUp()
{
directory = new RAMDirectory();
Analyzer analyzer = new AnonymousClassAnalyzer(this);
IndexWriter writer = new IndexWriter(directory, analyzer, true);
Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
doc.Add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.TOKENIZED));
doc.Add(new Field("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.TOKENIZED));
Fieldable repeatedField = new Field("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.TOKENIZED);
doc.Add(repeatedField);
writer.AddDocument(doc);
writer.Optimize();
writer.Close();
searcher = new IndexSearcher(directory);
query = new PhraseQuery();
}
[TearDown]
public virtual void TearDown()
{
searcher.Close();
directory.Close();
}
[Test]
public virtual void TestNotCloseEnough()
{
query.SetSlop(2);
query.Add(new Term("field", "one"));
query.Add(new Term("field", "five"));
Hits hits = searcher.Search(query);
Assert.AreEqual(0, hits.Length());
QueryUtils.Check(query, searcher);
}
[Test]
public virtual void TestBarelyCloseEnough()
{
query.SetSlop(3);
query.Add(new Term("field", "one"));
query.Add(new Term("field", "five"));
Hits hits = searcher.Search(query);
Assert.AreEqual(1, hits.Length());
QueryUtils.Check(query, searcher);
}
/// Ensures slop of 0 works for exact matches, but not reversed
[Test]
public virtual void TestExact()
{
// slop is zero by default
query.Add(new Term("field", "four"));
query.Add(new Term("field", "five"));
Hits hits = searcher.Search(query);
Assert.AreEqual(1, hits.Length(), "exact match");
QueryUtils.Check(query, searcher);
query = new PhraseQuery();
query.Add(new Term("field", "two"));
query.Add(new Term("field", "one"));
hits = searcher.Search(query);
Assert.AreEqual(0, hits.Length(), "reverse not exact");
QueryUtils.Check(query, searcher);
}
[Test]
public virtual void TestSlop1()
{
// Ensures slop of 1 works with terms in order.
query.SetSlop(1);
query.Add(new Term("field", "one"));
query.Add(new Term("field", "two"));
Hits hits = searcher.Search(query);
Assert.AreEqual(1, hits.Length(), "in order");
QueryUtils.Check(query, searcher);
// Ensures slop of 1 does not work for phrases out of order;
// must be at least 2.
query = new PhraseQuery();
query.SetSlop(1);
query.Add(new Term("field", "two"));
query.Add(new Term("field", "one"));
hits = searcher.Search(query);
Assert.AreEqual(0, hits.Length(), "reversed, slop not 2 or more");
QueryUtils.Check(query, searcher);
}
/// As long as slop is at least 2, terms can be reversed
[Test]
public virtual void TestOrderDoesntMatter()
{
query.SetSlop(2); // must be at least two for reverse order match
query.Add(new Term("field", "two"));
query.Add(new Term("field", "one"));
Hits hits = searcher.Search(query);
Assert.AreEqual(1, hits.Length(), "just sloppy enough");
QueryUtils.Check(query, searcher);
query = new PhraseQuery();
query.SetSlop(2);
query.Add(new Term("field", "three"));
query.Add(new Term("field", "one"));
hits = searcher.Search(query);
Assert.AreEqual(0, hits.Length(), "not sloppy enough");
QueryUtils.Check(query, searcher);
}
/// slop is the total number of positional moves allowed
/// to line up a phrase
///
[Test]
public virtual void TestMulipleTerms()
{
query.SetSlop(2);
query.Add(new Term("field", "one"));
query.Add(new Term("field", "three"));
query.Add(new Term("field", "five"));
Hits hits = searcher.Search(query);
Assert.AreEqual(1, hits.Length(), "two total moves");
QueryUtils.Check(query, searcher);
query = new PhraseQuery();
query.SetSlop(5); // it takes six moves to match this phrase
query.Add(new Term("field", "five"));
query.Add(new Term("field", "three"));
query.Add(new Term("field", "one"));
hits = searcher.Search(query);
Assert.AreEqual(0, hits.Length(), "slop of 5 not close enough");
QueryUtils.Check(query, searcher);
query.SetSlop(6);
hits = searcher.Search(query);
Assert.AreEqual(1, hits.Length(), "slop of 6 just right");
QueryUtils.Check(query, searcher);
}
[Test]
public virtual void TestPhraseQueryWithStopAnalyzer()
{
RAMDirectory directory = new RAMDirectory();
StopAnalyzer stopAnalyzer = new StopAnalyzer();
IndexWriter writer = new IndexWriter(directory, stopAnalyzer, true);
Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
doc.Add(new Field("field", "the stop words are here", Field.Store.YES, Field.Index.TOKENIZED));
writer.AddDocument(doc);
writer.Close();
IndexSearcher searcher = new IndexSearcher(directory);
// valid exact phrase query
PhraseQuery query = new PhraseQuery();
query.Add(new Term("field", "stop"));
query.Add(new Term("field", "words"));
Hits hits = searcher.Search(query);
Assert.AreEqual(1, hits.Length());
QueryUtils.Check(query, searcher);
// currently StopAnalyzer does not leave "holes", so this matches.
query = new PhraseQuery();
query.Add(new Term("field", "words"));
query.Add(new Term("field", "here"));
hits = searcher.Search(query);
Assert.AreEqual(1, hits.Length());
QueryUtils.Check(query, searcher);
searcher.Close();
}
[Test]
public virtual void TestPhraseQueryInConjunctionScorer()
{
RAMDirectory directory = new RAMDirectory();
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.TOKENIZED));
writer.AddDocument(doc);
doc = new Lucene.Net.Documents.Document();
doc.Add(new Field("contents", "foobar", Field.Store.YES, Field.Index.TOKENIZED));
doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.TOKENIZED));
writer.AddDocument(doc);
writer.Optimize();
writer.Close();
IndexSearcher searcher = new IndexSearcher(directory);
PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.Add(new Term("source", "marketing"));
phraseQuery.Add(new Term("source", "info"));
Hits hits = searcher.Search(phraseQuery);
Assert.AreEqual(2, hits.Length());
QueryUtils.Check(phraseQuery, searcher);
TermQuery termQuery = new TermQuery(new Term("contents", "foobar"));
BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.Add(termQuery, BooleanClause.Occur.MUST);
booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST);
hits = searcher.Search(booleanQuery);
Assert.AreEqual(1, hits.Length());
QueryUtils.Check(termQuery, searcher);
searcher.Close();
writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
doc = new Lucene.Net.Documents.Document();
doc.Add(new Field("contents", "map entry woo", Field.Store.YES, Field.Index.TOKENIZED));
writer.AddDocument(doc);
doc = new Lucene.Net.Documents.Document();
doc.Add(new Field("contents", "woo map entry", Field.Store.YES, Field.Index.TOKENIZED));
writer.AddDocument(doc);
doc = new Lucene.Net.Documents.Document();
doc.Add(new Field("contents", "map foobarword entry woo", Field.Store.YES, Field.Index.TOKENIZED));
writer.AddDocument(doc);
writer.Optimize();
writer.Close();
searcher = new IndexSearcher(directory);
termQuery = new TermQuery(new Term("contents", "woo"));
phraseQuery = new PhraseQuery();
phraseQuery.Add(new Term("contents", "map"));
phraseQuery.Add(new Term("contents", "entry"));
hits = searcher.Search(termQuery);
Assert.AreEqual(3, hits.Length());
hits = searcher.Search(phraseQuery);
Assert.AreEqual(2, hits.Length());
booleanQuery = new BooleanQuery();
booleanQuery.Add(termQuery, BooleanClause.Occur.MUST);
booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST);
hits = searcher.Search(booleanQuery);
Assert.AreEqual(2, hits.Length());
booleanQuery = new BooleanQuery();
booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST);
booleanQuery.Add(termQuery, BooleanClause.Occur.MUST);
hits = searcher.Search(booleanQuery);
Assert.AreEqual(2, hits.Length());
QueryUtils.Check(booleanQuery, searcher);
searcher.Close();
directory.Close();
}
[Test]
public virtual void TestSlopScoring()
{
Directory directory = new RAMDirectory();
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
doc.Add(new Field("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.TOKENIZED));
writer.AddDocument(doc);
Lucene.Net.Documents.Document doc2 = new Lucene.Net.Documents.Document();
doc2.Add(new Field("field", "foo firstname xxx lastname foo", Field.Store.YES, Field.Index.TOKENIZED));
writer.AddDocument(doc2);
Lucene.Net.Documents.Document doc3 = new Lucene.Net.Documents.Document();
doc3.Add(new Field("field", "foo firstname xxx yyy lastname foo", Field.Store.YES, Field.Index.TOKENIZED));
writer.AddDocument(doc3);
writer.Optimize();
writer.Close();
Searcher searcher = new IndexSearcher(directory);
PhraseQuery query = new PhraseQuery();
query.Add(new Term("field", "firstname"));
query.Add(new Term("field", "lastname"));
query.SetSlop(System.Int32.MaxValue);
Hits hits = searcher.Search(query);
Assert.AreEqual(3, hits.Length());
// Make sure that those matches where the terms appear closer to
// each other get a higher score:
Assert.AreEqual(0.71, hits.Score(0), 0.01);
Assert.AreEqual(0, hits.Id(0));
Assert.AreEqual(0.44, hits.Score(1), 0.01);
Assert.AreEqual(1, hits.Id(1));
Assert.AreEqual(0.31, hits.Score(2), 0.01);
Assert.AreEqual(2, hits.Id(2));
QueryUtils.Check(query, searcher);
}
[Test]
public virtual void TestWrappedPhrase()
{
query.Add(new Term("repeated", "first"));
query.Add(new Term("repeated", "part"));
query.Add(new Term("repeated", "second"));
query.Add(new Term("repeated", "part"));
query.SetSlop(99);
Hits hits = searcher.Search(query);
Assert.AreEqual(0, hits.Length());
QueryUtils.Check(query, searcher);
}
}
}