/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using NUnit.Framework; using WhitespaceAnalyzer = Lucene.Net.Analysis.WhitespaceAnalyzer; using Document = Lucene.Net.Documents.Document; using Field = Lucene.Net.Documents.Field; using IndexWriter = Lucene.Net.Index.IndexWriter; using Term = Lucene.Net.Index.Term; using RAMDirectory = Lucene.Net.Store.RAMDirectory; namespace Lucene.Net.Search { /// Tests {@link FuzzyQuery}. /// /// /// Daniel Naber /// [TestFixture] public class TestFuzzyQuery { [Test] public virtual void TestFuzziness() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); AddDoc("aaaaa", writer); AddDoc("aaaab", writer); AddDoc("aaabb", writer); AddDoc("aabbb", writer); AddDoc("abbbb", writer); AddDoc("bbbbb", writer); AddDoc("ddddd", writer); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0); Hits hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); // same with prefix query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query); Assert.AreEqual(2, hits.Length()); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 6); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); // not similar enough: query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.defaultMinSimilarity, 0); // edit distance to "aaaaa" = 3 hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); // query identical to a word in the index: query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaa")); // default allows for up to two edits: Assert.AreEqual(hits.Doc(1).Get("field"), ("aaaab")); Assert.AreEqual(hits.Doc(2).Get("field"), ("aaabb")); // query similar to a word in the index: query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaa")); Assert.AreEqual(hits.Doc(1).Get("field"), ("aaaab")); Assert.AreEqual(hits.Doc(2).Get("field"), ("aaabb")); // now with prefix query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaa")); Assert.AreEqual(hits.Doc(1).Get("field"), ("aaaab")); Assert.AreEqual(hits.Doc(2).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaa")); Assert.AreEqual(hits.Doc(1).Get("field"), ("aaaab")); Assert.AreEqual(hits.Doc(2).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaa")); Assert.AreEqual(hits.Doc(1).Get("field"), ("aaaab")); Assert.AreEqual(hits.Doc(2).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query); Assert.AreEqual(2, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaa")); Assert.AreEqual(hits.Doc(1).Get("field"), ("aaaab")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("ddddd")); // now with prefix query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); // different field = no match: query = new FuzzyQuery(new Term("anotherfield", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); searcher.Close(); directory.Close(); } [Test] public virtual void TestFuzzinessLong() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); AddDoc("aaaaaaa", writer); AddDoc("segment", writer); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); FuzzyQuery query; // not similar enough: query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0); Hits hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); // edit distance to "aaaaaaa" = 3, this matches because the string is longer than // in testDefaultFuzziness so a bigger difference is allowed: query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaaaa")); // now with prefix query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaaaa")); query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaaaa")); query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); // no match, more than half of the characters is wrong: query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); // now with prefix query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); // "student" and "stellent" are indeed similar to "segment" by default: query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); // now with prefix query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); // "student" doesn't match anymore thanks to increased minimum similarity: query = new FuzzyQuery(new Term("field", "student"), 0.6f, 0); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); try { query = new FuzzyQuery(new Term("field", "student"), 1.1f); Assert.Fail("Expected IllegalArgumentException"); } catch (System.ArgumentException e) { // expecting exception } try { query = new FuzzyQuery(new Term("field", "student"), - 0.1f); Assert.Fail("Expected IllegalArgumentException"); } catch (System.ArgumentException e) { // expecting exception } searcher.Close(); directory.Close(); } private void AddDoc(System.String text, IndexWriter writer) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("field", text, Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); } } }