/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using Lucene.Net.Analysis.Ext; using Lucene.Net.Store; using Lucene.Net.Analysis; using Lucene.Net.Analysis.Tokenattributes; using Lucene.Net.Index; using Lucene.Net.Search; using Lucene.Net.Documents; using Lucene.Net.QueryParsers; using NUnit.Framework; namespace Lucene.Net.Test.Analysis.Ext { [TestFixture] class TestAnalysisExt { [SetUp] public void Setup() { } IndexSearcher CreateIndex(string data,Analyzer analyzer) { RAMDirectory dir = new RAMDirectory(); IndexWriter wr = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.Add(new Field("field", data, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); wr.AddDocument(doc); wr.Close(); return new IndexSearcher(IndexReader.Open(dir, true)); } [Test] public void TestSingleCharTokenAnalyzer() { Analyzer analyzer = new SingleCharTokenAnalyzer(); IndexSearcher src = CreateIndex("someuser@gmail.com 1234567890 abcdefgh", analyzer); var p = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "field", analyzer) { DefaultOperator = QueryParser.Operator.AND, EnablePositionIncrements = true }; TopDocs td = null; td = src.Search(p.Parse("usergmail"), 10); Assert.AreEqual(0, td.TotalHits); td = src.Search(p.Parse("gmailcom"), 10); Assert.AreEqual(0, td.TotalHits); td = src.Search(p.Parse("678"), 10); Assert.AreEqual(1, td.TotalHits); td = src.Search(p.Parse("someuser"), 10); Assert.AreEqual(1, td.TotalHits); td = src.Search(p.Parse("omeuse"), 10); Assert.AreEqual(1, td.TotalHits); td = src.Search(p.Parse("omeuse 6789"), 10); Assert.AreEqual(1, td.TotalHits); td = src.Search(p.Parse("user gmail"), 10); Assert.AreEqual(1, td.TotalHits); td = src.Search(p.Parse("\"user gmail\""), 10); Assert.AreEqual(1, td.TotalHits); td = src.Search(p.Parse("user@gmail"), 10); Assert.AreEqual(1, td.TotalHits); td = src.Search(p.Parse("gmail.com"), 10); Assert.AreEqual(1, td.TotalHits); td = src.Search(p.Parse("\"gmail.com 1234\""), 10); Assert.AreEqual(1, td.TotalHits); td = src.Search(p.Parse("\"gmail.com defg\""), 10); Assert.AreEqual(0, td.TotalHits); td = src.Search(p.Parse("gmail.com defg"), 10); Assert.AreEqual(1, td.TotalHits); } //[Test] //public void TestSingleCharTokenAnalyzerHighlight() //{ // Analyzer analyzer = new SingleCharTokenAnalyzer(); // IndexSearcher src = CreateIndex("someuser@gmail.com 1234567890 abcdefgh", analyzer); // QueryParser p = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "field", analyzer); // p.SetDefaultOperator(QueryParser.Operator.AND); // p.SetMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); // Lucene.Net.Search.Vectorhighlight.FastVectorHighlighter fvh = new Lucene.Net.Search.Vectorhighlight.FastVectorHighlighter(true, true); // Query q = null; // string[] fragments = null; // q = p.Parse("cde"); // fragments = fvh.GetBestFragments(fvh.GetFieldQuery(q), src.GetIndexReader(), 0, "field", 256, 10); // Assert.IsTrue(fragments != null && fragments.Length > 0 && fragments[0].IndexOf("cde") >= 0); // q = p.Parse("2345"); // fragments = fvh.GetBestFragments(fvh.GetFieldQuery(q), src.GetIndexReader(), 0, "field", 256, 10); // Assert.IsTrue(fragments != null && fragments.Length > 0 && fragments[0].IndexOf("2345") >= 0); // q = p.Parse("gmail 1234"); // fragments = fvh.GetBestFragments(fvh.GetFieldQuery(q), src.GetIndexReader(), 0, "field", 256, 10); // Assert.IsTrue(fragments != null && fragments.Length > 0 && fragments[0].IndexOf("gmail.com 1234") >= 0); // /* // q = p.Parse("gmail.com"); // fragments = fvh.GetBestFragments(fvh.GetFieldQuery(q), src.GetIndexReader(), 0, "field", 256, 10); // Assert.IsTrue(fragments != null && fragments.Length > 0 && fragments[0].IndexOf("??????????") >= 0); // System.Diagnostics.Debug.WriteLine(fragments[0]); // */ //} [Test] public void TestUnaccentedWordAnalyzer() { TopDocs td = null; string text = "Name.Surname@gmail.com 123.456 ğüşıöç%ĞÜŞİÖÇ$ΑΒΓΔΕΖ#АБВГДЕ SSß"; string[] expectedTokens = new string[] { "name", "surname", "gmail", "com", "123", "456", "gusioc", "gusioc", "αβγδεζ" , "абвгде", "ssss"}; UnaccentedWordAnalyzer analyzer = new UnaccentedWordAnalyzer(); TokenStream ts = analyzer.TokenStream("", new System.IO.StringReader(text)); int i = 0; ITermAttribute termAttribute = ts.GetAttribute(); while (ts.IncrementToken()) { Assert.AreEqual(expectedTokens[i++], termAttribute.Term); System.Diagnostics.Debug.WriteLine(termAttribute.Term); } QueryParser p = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "field", analyzer); IndexSearcher src = CreateIndex(text, analyzer); td = src.Search(p.Parse("ĞÜŞıöç"), 10); Assert.AreEqual(1, td.TotalHits); td = src.Search(p.Parse("name"), 10); Assert.AreEqual(1, td.TotalHits); td = src.Search(p.Parse("surname"), 10); Assert.AreEqual(1, td.TotalHits); td = src.Search(p.Parse("NAME.surname"), 10); Assert.AreEqual(1, td.TotalHits); td = src.Search(p.Parse("surname@gmail"), 10); Assert.AreEqual(1, td.TotalHits); td = src.Search(p.Parse("name@gmail"), 10); Assert.AreEqual(0, td.TotalHits); td = src.Search(p.Parse("456"), 10); Assert.AreEqual(1, td.TotalHits); td = src.Search(p.Parse("123.456"), 10); Assert.AreEqual(1, td.TotalHits); } } }