/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using NUnit.Framework; using Document = Lucene.Net.Documents.Document; using Field = Lucene.Net.Documents.Field; using IndexReader = Lucene.Net.Index.IndexReader; using IndexWriter = Lucene.Net.Index.IndexWriter; using Term = Lucene.Net.Index.Term; using Directory = Lucene.Net.Store.Directory; using RAMDirectory = Lucene.Net.Store.RAMDirectory; using WhitespaceAnalyzer = Lucene.Net.Analysis.WhitespaceAnalyzer; using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; using DocIdBitSet = Lucene.Net.Util.DocIdBitSet; namespace Lucene.Net.Search { /// /// $Id$ /// [TestFixture] public class TestScorerPerf : LuceneTestCase { internal System.Random r = new System.Random((System.Int32) 0); internal bool validate = true; // set to false when doing performance testing internal System.Collections.BitArray[] sets; internal Term[] terms; internal IndexSearcher s; public virtual void CreateDummySearcher() { // Create a dummy index with nothing in it. // This could possibly fail if Lucene starts checking for docid ranges... RAMDirectory rd = new RAMDirectory(); IndexWriter iw = new IndexWriter(rd, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); iw.Close(); s = new IndexSearcher(rd); } public virtual void CreateRandomTerms(int nDocs, int nTerms, double power, Directory dir) { int[] freq = new int[nTerms]; for (int i = 0; i < nTerms; i++) { int f = (nTerms + 1) - i; // make first terms less frequent freq[i] = (int) System.Math.Ceiling(System.Math.Pow(f, power)); terms[i] = new Term("f", System.Convert.ToString((char) ('A' + i))); } IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < nDocs; i++) { Document d = new Document(); for (int j = 0; j < nTerms; j++) { if (r.Next(freq[j]) == 0) { d.Add(new Field("f", terms[j].Text(), Field.Store.NO, Field.Index.NOT_ANALYZED)); //System.out.println(d); } } iw.AddDocument(d); } iw.Optimize(); iw.Close(); } public virtual System.Collections.BitArray RandBitSet(int sz, int numBitsToSet) { System.Collections.BitArray set_Renamed = new System.Collections.BitArray((sz % 64 == 0 ? sz / 64 : sz / 64 + 1) * 64); for (int i = 0; i < numBitsToSet; i++) { set_Renamed.Set(r.Next(sz), true); } return set_Renamed; } public virtual System.Collections.BitArray[] RandBitSets(int numSets, int setSize) { System.Collections.BitArray[] sets = new System.Collections.BitArray[numSets]; for (int i = 0; i < sets.Length; i++) { sets[i] = RandBitSet(setSize, r.Next(setSize)); } return sets; } public class CountingHitCollector : HitCollector { internal int count = 0; internal int sum = 0; public override void Collect(int doc, float score) { count++; sum += doc; // use it to avoid any possibility of being optimized away } public virtual int GetCount() { return count; } public virtual int GetSum() { return sum; } } public class MatchingHitCollector : CountingHitCollector { internal System.Collections.BitArray answer; internal int pos = - 1; public MatchingHitCollector(System.Collections.BitArray answer) { this.answer = answer; } public override void Collect(int doc, float score) { pos = SupportClass.Number.NextSetBit(answer, pos + 1); if (pos != doc) { throw new System.SystemException("Expected doc " + pos + " but got " + doc); } base.Collect(doc, score); } } public class AnonymousClassFilter : Filter { private System.Collections.BitArray rnd; public AnonymousClassFilter(System.Collections.BitArray rnd) { this.rnd = rnd; } override public DocIdSet GetDocIdSet(IndexReader reader) { return new DocIdBitSet(rnd); } [System.Obsolete()] override public System.Collections.BitArray Bits(IndexReader reader) { return null; } } internal virtual System.Collections.BitArray AddClause(BooleanQuery bq, System.Collections.BitArray result) { System.Collections.BitArray rnd = sets[r.Next(sets.Length)]; Query q = new ConstantScoreQuery(new AnonymousClassFilter(rnd)); bq.Add(q, BooleanClause.Occur.MUST); if (validate) { if (result == null) result = (System.Collections.BitArray) rnd.Clone(); else { result.And(rnd); } } return result; } public virtual int DoConjunctions(int iter, int maxClauses) { int ret = 0; for (int i = 0; i < iter; i++) { int nClauses = r.Next(maxClauses - 1) + 2; // min 2 clauses BooleanQuery bq = new BooleanQuery(); System.Collections.BitArray result = null; for (int j = 0; j < nClauses; j++) { result = AddClause(bq, result); } CountingHitCollector hc = validate?new MatchingHitCollector(result):new CountingHitCollector(); s.Search(bq, hc); ret += hc.GetSum(); if (validate) Assert.AreEqual(SupportClass.Number.Cardinality(result), hc.GetCount()); // System.out.println(hc.getCount()); } return ret; } public virtual int DoNestedConjunctions(int iter, int maxOuterClauses, int maxClauses) { int ret = 0; long nMatches = 0; for (int i = 0; i < iter; i++) { int oClauses = r.Next(maxOuterClauses - 1) + 2; BooleanQuery oq = new BooleanQuery(); System.Collections.BitArray result = null; for (int o = 0; o < oClauses; o++) { int nClauses = r.Next(maxClauses - 1) + 2; // min 2 clauses BooleanQuery bq = new BooleanQuery(); for (int j = 0; j < nClauses; j++) { result = AddClause(bq, result); } oq.Add(bq, BooleanClause.Occur.MUST); } // outer CountingHitCollector hc = validate?new MatchingHitCollector(result):new CountingHitCollector(); s.Search(oq, hc); nMatches += hc.GetCount(); ret += hc.GetSum(); if (validate) Assert.AreEqual(SupportClass.Number.Cardinality(result), hc.GetCount()); // System.out.println(hc.getCount()); } System.Console.Out.WriteLine("Average number of matches=" + (nMatches / iter)); return ret; } public virtual int DoTermConjunctions(IndexSearcher s, int termsInIndex, int maxClauses, int iter) { int ret = 0; long nMatches = 0; for (int i = 0; i < iter; i++) { int nClauses = r.Next(maxClauses - 1) + 2; // min 2 clauses BooleanQuery bq = new BooleanQuery(); System.Collections.BitArray termflag = new System.Collections.BitArray((termsInIndex % 64 == 0 ? termsInIndex / 64 : termsInIndex / 64 + 1) * 64); for (int j = 0; j < nClauses; j++) { int tnum; // don't pick same clause twice tnum = r.Next(termsInIndex); if (termflag.Get(tnum)) { int nextClearBit = -1; for (int k = tnum + 1; k < termflag.Count; k++) { if (!termflag.Get(k)) { nextClearBit = k; break; } } tnum = nextClearBit; } if (tnum < 0 || tnum >= termsInIndex) { int nextClearBit = -1; for (int k = 0; k < termflag.Count; k++) { if (!termflag.Get(k)) { nextClearBit = k; break; } } tnum = nextClearBit; } termflag.Set(tnum, true); Query tq = new TermQuery(terms[tnum]); bq.Add(tq, BooleanClause.Occur.MUST); } CountingHitCollector hc = new CountingHitCollector(); s.Search(bq, hc); nMatches += hc.GetCount(); ret += hc.GetSum(); } System.Console.Out.WriteLine("Average number of matches=" + (nMatches / iter)); return ret; } public virtual int DoNestedTermConjunctions(IndexSearcher s, int termsInIndex, int maxOuterClauses, int maxClauses, int iter) { int ret = 0; long nMatches = 0; for (int i = 0; i < iter; i++) { int oClauses = r.Next(maxOuterClauses - 1) + 2; BooleanQuery oq = new BooleanQuery(); for (int o = 0; o < oClauses; o++) { int nClauses = r.Next(maxClauses - 1) + 2; // min 2 clauses BooleanQuery bq = new BooleanQuery(); System.Collections.BitArray termflag = new System.Collections.BitArray((termsInIndex % 64 == 0 ? termsInIndex / 64 : termsInIndex / 64 + 1) * 64); for (int j = 0; j < nClauses; j++) { int tnum; // don't pick same clause twice tnum = r.Next(termsInIndex); if (termflag.Get(tnum)) { int nextClearBit = -1; for (int k = tnum + 1; k < termflag.Count; k++) { if (!termflag.Get(k)) { nextClearBit = k; break; } } tnum = nextClearBit; } if (tnum < 0 || tnum >= 25) { int nextClearBit = -1; for (int k = 0; k < termflag.Count; k++) { if (!termflag.Get(k)) { nextClearBit = k; break; } } tnum = nextClearBit; } termflag.Set(tnum, true); Query tq = new TermQuery(terms[tnum]); bq.Add(tq, BooleanClause.Occur.MUST); } // inner oq.Add(bq, BooleanClause.Occur.MUST); } // outer CountingHitCollector hc = new CountingHitCollector(); s.Search(oq, hc); nMatches += hc.GetCount(); ret += hc.GetSum(); } System.Console.Out.WriteLine("Average number of matches=" + (nMatches / iter)); return ret; } public virtual int DoSloppyPhrase(IndexSearcher s, int termsInIndex, int maxClauses, int iter) { int ret = 0; for (int i = 0; i < iter; i++) { int nClauses = r.Next(maxClauses - 1) + 2; // min 2 clauses PhraseQuery q = new PhraseQuery(); for (int j = 0; j < nClauses; j++) { int tnum = r.Next(termsInIndex); q.Add(new Term("f", System.Convert.ToString((char)(tnum + 'A'))), j); } q.SetSlop(termsInIndex); // this could be random too CountingHitCollector hc = new CountingHitCollector(); s.Search(q, hc); ret += hc.GetSum(); } return ret; } [Test] public virtual void TestConjunctions() { // test many small sets... the bugs will be found on boundary conditions CreateDummySearcher(); validate = true; sets = RandBitSets(1000, 10); DoConjunctions(10000, 5); DoNestedConjunctions(10000, 3, 3); s.Close(); } /// /// int bigIter=10; /// public void testConjunctionPerf() throws Exception { /// CreateDummySearcher(); /// validate=false; /// sets=RandBitSets(32,1000000); /// for (int i=0; i