/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using Lucene.Net.Search; using NUnit.Framework; using Analyzer = Lucene.Net.Analysis.Analyzer; using StandardAnalyzer = Lucene.Net.Analysis.Standard.StandardAnalyzer; using Document = Lucene.Net.Documents.Document; using Field = Lucene.Net.Documents.Field; using Directory = Lucene.Net.Store.Directory; using MockRAMDirectory = Lucene.Net.Store.MockRAMDirectory; using BooleanQuery = Lucene.Net.Search.BooleanQuery; using Collector = Lucene.Net.Search.Collector; using IndexSearcher = Lucene.Net.Search.IndexSearcher; using Scorer = Lucene.Net.Search.Scorer; using Searcher = Lucene.Net.Search.Searcher; using Similarity = Lucene.Net.Search.Similarity; using TermQuery = Lucene.Net.Search.TermQuery; using Occur = Lucene.Net.Search.Occur; using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; using _TestUtil = Lucene.Net.Util._TestUtil; namespace Lucene.Net.Index { [TestFixture] public class TestOmitTf:LuceneTestCase { private class AnonymousClassCountingHitCollector:CountingHitCollector { public AnonymousClassCountingHitCollector(TestOmitTf enclosingInstance) { InitBlock(enclosingInstance); } private void InitBlock(TestOmitTf enclosingInstance) { this.enclosingInstance = enclosingInstance; } private TestOmitTf enclosingInstance; public TestOmitTf Enclosing_Instance { get { return enclosingInstance; } } private Scorer scorer; public override void SetScorer(Scorer scorer) { this.scorer = scorer; } public override void Collect(int doc) { //System.out.println("Q1: Doc=" + doc + " score=" + score); float score = scorer.Score(); Assert.IsTrue(score == 1.0f); base.Collect(doc); } } private class AnonymousClassCountingHitCollector1:CountingHitCollector { public AnonymousClassCountingHitCollector1(TestOmitTf enclosingInstance) { InitBlock(enclosingInstance); } private void InitBlock(TestOmitTf enclosingInstance) { this.enclosingInstance = enclosingInstance; } private TestOmitTf enclosingInstance; public TestOmitTf Enclosing_Instance { get { return enclosingInstance; } } private Scorer scorer; public override void SetScorer(Scorer scorer) { this.scorer = scorer; } public override void Collect(int doc) { //System.out.println("Q2: Doc=" + doc + " score=" + score); float score = scorer.Score(); Assert.IsTrue(score == 1.0f + doc); base.Collect(doc); } } private class AnonymousClassCountingHitCollector2:CountingHitCollector { public AnonymousClassCountingHitCollector2(TestOmitTf enclosingInstance) { InitBlock(enclosingInstance); } private void InitBlock(TestOmitTf enclosingInstance) { this.enclosingInstance = enclosingInstance; } private TestOmitTf enclosingInstance; public TestOmitTf Enclosing_Instance { get { return enclosingInstance; } } private Scorer scorer; public override void SetScorer(Scorer scorer) { this.scorer = scorer; } public override void Collect(int doc) { //System.out.println("Q1: Doc=" + doc + " score=" + score); float score = scorer.Score(); Assert.IsTrue(score == 1.0f); Assert.IsFalse(doc % 2 == 0); base.Collect(doc); } } private class AnonymousClassCountingHitCollector3:CountingHitCollector { public AnonymousClassCountingHitCollector3(TestOmitTf enclosingInstance) { InitBlock(enclosingInstance); } private void InitBlock(TestOmitTf enclosingInstance) { this.enclosingInstance = enclosingInstance; } private TestOmitTf enclosingInstance; public TestOmitTf Enclosing_Instance { get { return enclosingInstance; } } private Scorer scorer; public override void SetScorer(Scorer scorer) { this.scorer = scorer; } public override void Collect(int doc) { float score = scorer.Score(); //System.out.println("Q1: Doc=" + doc + " score=" + score); Assert.IsTrue(score == 1.0f); Assert.IsTrue(doc % 2 == 0); base.Collect(doc); } } private class AnonymousClassCountingHitCollector4:CountingHitCollector { public AnonymousClassCountingHitCollector4(TestOmitTf enclosingInstance) { InitBlock(enclosingInstance); } private void InitBlock(TestOmitTf enclosingInstance) { this.enclosingInstance = enclosingInstance; } private TestOmitTf enclosingInstance; public TestOmitTf Enclosing_Instance { get { return enclosingInstance; } } public override void Collect(int doc) { //System.out.println("BQ: Doc=" + doc + " score=" + score); base.Collect(doc); } } private class AnonymousIDFExplanation : Explanation.IDFExplanation { public override float Idf { get { return 1.0f; } } public override string Explain() { return "Inexplicable"; } } [Serializable] public class SimpleSimilarity:Similarity { public override float LengthNorm(System.String field, int numTerms) { return 1.0f; } public override float QueryNorm(float sumOfSquaredWeights) { return 1.0f; } public override float Tf(float freq) { return freq; } public override float SloppyFreq(int distance) { return 2.0f; } public override float Idf(int docFreq, int numDocs) { return 1.0f; } public override float Coord(int overlap, int maxOverlap) { return 1.0f; } public override Search.Explanation.IDFExplanation IdfExplain(System.Collections.Generic.ICollection terms, Searcher searcher) { return new AnonymousIDFExplanation(); } } // Tests whether the DocumentWriter correctly enable the // omitTermFreqAndPositions bit in the FieldInfo public virtual void TestOmitTermFreqAndPositions() { Directory ram = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document d = new Document(); // this field will have Tf Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); d.Add(f1); // this field will NOT have Tf Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED); f2.OmitTermFreqAndPositions = true; d.Add(f2); writer.AddDocument(d); writer.Optimize(); // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger // keep things constant d = new Document(); // Reverese f1.OmitTermFreqAndPositions = true; d.Add(f1); f2.OmitTermFreqAndPositions = false; d.Add(f2); writer.AddDocument(d); // force merge writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(ram); SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram); FieldInfos fi = reader.FieldInfos(); Assert.IsTrue(fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set."); Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set."); reader.Close(); ram.Close(); } // Tests whether merging of docs that have different // omitTermFreqAndPositions for the same field works [Test] public virtual void TestMixedMerge() { Directory ram = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(3); writer.MergeFactor = 2; Document d = new Document(); // this field will have Tf Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); d.Add(f1); // this field will NOT have Tf Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED); f2.OmitTermFreqAndPositions = true; d.Add(f2); for (int i = 0; i < 30; i++) writer.AddDocument(d); // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger // keep things constant d = new Document(); // Reverese f1.OmitTermFreqAndPositions = true; d.Add(f1); f2.OmitTermFreqAndPositions = false; d.Add(f2); for (int i = 0; i < 30; i++) writer.AddDocument(d); // force merge writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(ram); SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram); FieldInfos fi = reader.FieldInfos(); Assert.IsTrue(fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set."); Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set."); reader.Close(); ram.Close(); } // Make sure first adding docs that do not omitTermFreqAndPositions for // field X, then adding docs that do omitTermFreqAndPositions for that same // field, [Test] public virtual void TestMixedRAM() { Directory ram = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(10); writer.MergeFactor = 2; Document d = new Document(); // this field will have Tf Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); d.Add(f1); // this field will NOT have Tf Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED); d.Add(f2); for (int i = 0; i < 5; i++) writer.AddDocument(d); f2.OmitTermFreqAndPositions = true; for (int i = 0; i < 20; i++) writer.AddDocument(d); // force merge writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(ram); SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram); FieldInfos fi = reader.FieldInfos(); Assert.IsTrue(!fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set."); reader.Close(); ram.Close(); } private void AssertNoPrx(Directory dir) { System.String[] files = dir.ListAll(); for (int i = 0; i < files.Length; i++) Assert.IsFalse(files[i].EndsWith(".prx")); } // Verifies no *.prx exists when all fields omit term freq: [Test] public virtual void TestNoPrxFile() { Directory ram = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(3); writer.MergeFactor = 2; writer.UseCompoundFile = false; Document d = new Document(); Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); f1.OmitTermFreqAndPositions = true; d.Add(f1); for (int i = 0; i < 30; i++) writer.AddDocument(d); writer.Commit(); AssertNoPrx(ram); // force merge writer.Optimize(); // flush writer.Close(); AssertNoPrx(ram); _TestUtil.CheckIndex(ram); ram.Close(); } // Test scores with one field with Term Freqs and one without, otherwise with equal content [Test] public virtual void TestBasic() { Directory dir = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.MergeFactor = 2; writer.SetMaxBufferedDocs(2); writer.SetSimilarity(new SimpleSimilarity()); System.Text.StringBuilder sb = new System.Text.StringBuilder(265); System.String term = "term"; for (int i = 0; i < 30; i++) { Document d = new Document(); sb.Append(term).Append(" "); System.String content = sb.ToString(); Field noTf = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED); noTf.OmitTermFreqAndPositions = true; d.Add(noTf); Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED); d.Add(tf); writer.AddDocument(d); //System.out.println(d); } writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(dir); /* * Verify the index */ Searcher searcher = new IndexSearcher(dir, true); searcher.Similarity = new SimpleSimilarity(); Term a = new Term("noTf", term); Term b = new Term("tf", term); Term c = new Term("noTf", "notf"); Term d2 = new Term("tf", "tf"); TermQuery q1 = new TermQuery(a); TermQuery q2 = new TermQuery(b); TermQuery q3 = new TermQuery(c); TermQuery q4 = new TermQuery(d2); searcher.Search(q1, new AnonymousClassCountingHitCollector(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q2, new AnonymousClassCountingHitCollector1(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q3, new AnonymousClassCountingHitCollector2(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q4, new AnonymousClassCountingHitCollector3(this)); //System.out.println(CountingHitCollector.getCount()); BooleanQuery bq = new BooleanQuery(); bq.Add(q1, Occur.MUST); bq.Add(q4, Occur.MUST); searcher.Search(bq, new AnonymousClassCountingHitCollector4(this)); Assert.IsTrue(15 == CountingHitCollector.GetCount()); searcher.Close(); dir.Close(); } public class CountingHitCollector:Collector { internal static int count = 0; internal static int sum = 0; private int docBase = - 1; internal CountingHitCollector() { count = 0; sum = 0; } public override void SetScorer(Scorer scorer) { } public override void Collect(int doc) { count++; sum += doc + docBase; // use it to avoid any possibility of being optimized away } public static int GetCount() { return count; } public static int GetSum() { return sum; } public override void SetNextReader(IndexReader reader, int docBase) { this.docBase = docBase; } public override bool AcceptsDocsOutOfOrder { get { return true; } } } } }