/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using Document = Lucene.Net.Documents.Document; using Term = Lucene.Net.Index.Term; namespace Lucene.Net.Search { ///

Implements search over a set of Searchables. /// ///

Applications usually need only call the inherited {@link #Search(Query)} /// or {@link #search(Query,Filter)} methods. ///

public class MultiSearcher : Searcher { private class AnonymousClassHitCollector : HitCollector { public AnonymousClassHitCollector(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance) { InitBlock(results, start, enclosingInstance); } private void InitBlock(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance) { this.results = results; this.start = start; this.enclosingInstance = enclosingInstance; } private Lucene.Net.Search.HitCollector results; private int start; private MultiSearcher enclosingInstance; public MultiSearcher Enclosing_Instance { get { return enclosingInstance; } } public override void Collect(int doc, float score) { results.Collect(doc + start, score); } } ///

Document Frequency cache acting as a Dummy-Searcher. /// This class is no full-fledged Searcher, but only supports /// the methods necessary to initialize Weights. ///

private class CachedDfSource:Searcher { private System.Collections.IDictionary dfMap; // Map from Terms to corresponding doc freqs private int maxDoc; // document count public CachedDfSource(System.Collections.IDictionary dfMap, int maxDoc) { this.dfMap = dfMap; this.maxDoc = maxDoc; } public override int DocFreq(Term term) { int df; try { df = ((System.Int32) dfMap[term]); } catch (System.NullReferenceException) { throw new System.ArgumentException("df for term " + term.Text() + " not available"); } return df; } public override int[] DocFreqs(Term[] terms) { int[] result = new int[terms.Length]; for (int i = 0; i < terms.Length; i++) { result[i] = DocFreq(terms[i]); } return result; } public override int MaxDoc() { return maxDoc; } public override Query Rewrite(Query query) { // this is a bit of a hack. We know that a query which // creates a Weight based on this Dummy-Searcher is // always already rewritten (see preparedWeight()). // Therefore we just return the unmodified query here return query; } public override void Close() { throw new System.NotSupportedException(); } public override Document Doc(int i) { throw new System.NotSupportedException(); } public override Explanation Explain(Weight weight, int doc) { throw new System.NotSupportedException(); } public override void Search(Weight weight, Filter filter, HitCollector results) { throw new System.NotSupportedException(); } public override TopDocs Search(Weight weight, Filter filter, int n) { throw new System.NotSupportedException(); } public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort) { throw new System.NotSupportedException(); } } private Lucene.Net.Search.Searchable[] searchables; private int[] starts; private int maxDoc = 0; ///

Creates a searcher which searches searchables.

public MultiSearcher(Lucene.Net.Search.Searchable[] searchables) { this.searchables = searchables; starts = new int[searchables.Length + 1]; // build starts array for (int i = 0; i < searchables.Length; i++) { starts[i] = maxDoc; maxDoc += searchables[i].MaxDoc(); // compute maxDocs } starts[searchables.Length] = maxDoc; } ///

Return the array of {@link Searchable}s this searches.

public virtual Lucene.Net.Search.Searchable[] GetSearchables() { return searchables; } protected internal virtual int[] GetStarts() { return starts; } // inherit javadoc public override void Close() { for (int i = 0; i < searchables.Length; i++) searchables[i].Close(); } public override int DocFreq(Term term) { int docFreq = 0; for (int i = 0; i < searchables.Length; i++) docFreq += searchables[i].DocFreq(term); return docFreq; } // inherit javadoc public override Document Doc(int n) { int i = SubSearcher(n); // find searcher index return searchables[i].Doc(n - starts[i]); // dispatch to searcher } ///

Returns index of the searcher for document n in the array /// used to construct this searcher. ///

public virtual int SubSearcher(int n) { // find searcher for doc n: // replace w/ call to Arrays.binarySearch in Java 1.2 int lo = 0; // search starts array int hi = searchables.Length - 1; // for first element less // than n, return its index while (hi >= lo) { int mid = (lo + hi) >> 1; int midValue = starts[mid]; if (n < midValue) hi = mid - 1; else if (n > midValue) lo = mid + 1; else { // found a match while (mid + 1 < searchables.Length && starts[mid + 1] == midValue) { mid++; // scan to last match } return mid; } } return hi; } ///

Returns the document number of document n within its /// sub-index. ///

public virtual int SubDoc(int n) { return n - starts[SubSearcher(n)]; } public override int MaxDoc() { return maxDoc; } public override TopDocs Search(Weight weight, Filter filter, int nDocs) { HitQueue hq = new HitQueue(nDocs); int totalHits = 0; for (int i = 0; i < searchables.Length; i++) { // search each searcher TopDocs docs = searchables[i].Search(weight, filter, nDocs); totalHits += docs.totalHits; // update totalHits ScoreDoc[] scoreDocs = docs.scoreDocs; for (int j = 0; j < scoreDocs.Length; j++) { // merge scoreDocs into hq ScoreDoc scoreDoc = scoreDocs[j]; scoreDoc.doc += starts[i]; // convert doc if (!hq.Insert(scoreDoc)) break; // no more scores > minScore } } ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()]; for (int i = hq.Size() - 1; i >= 0; i--) // put docs in array scoreDocs2[i] = (ScoreDoc) hq.Pop(); float maxScore = (totalHits == 0) ? System.Single.NegativeInfinity : scoreDocs2[0].score; return new TopDocs(totalHits, scoreDocs2, maxScore); } public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort) { FieldDocSortedHitQueue hq = null; int totalHits = 0; float maxScore = System.Single.NegativeInfinity; for (int i = 0; i < searchables.Length; i++) { // search each searcher TopFieldDocs docs = searchables[i].Search(weight, filter, n, sort); if (hq == null) hq = new FieldDocSortedHitQueue(docs.fields, n); totalHits += docs.totalHits; // update totalHits maxScore = System.Math.Max(maxScore, docs.GetMaxScore()); ScoreDoc[] scoreDocs = docs.scoreDocs; for (int j = 0; j < scoreDocs.Length; j++) { // merge scoreDocs into hq ScoreDoc scoreDoc = scoreDocs[j]; scoreDoc.doc += starts[i]; // convert doc if (!hq.Insert(scoreDoc)) break; // no more scores > minScore } } ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()]; for (int i = hq.Size() - 1; i >= 0; i--) // put docs in array scoreDocs2[i] = (ScoreDoc) hq.Pop(); return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields(), maxScore); } // inherit javadoc public override void Search(Weight weight, Filter filter, HitCollector results) { for (int i = 0; i < searchables.Length; i++) { int start = starts[i]; searchables[i].Search(weight, filter, new AnonymousClassHitCollector(results, start, this)); } } public override Query Rewrite(Query original) { Query[] queries = new Query[searchables.Length]; for (int i = 0; i < searchables.Length; i++) { queries[i] = searchables[i].Rewrite(original); } return queries[0].Combine(queries); } public override Explanation Explain(Weight weight, int doc) { int i = SubSearcher(doc); // find searcher index return searchables[i].Explain(weight, doc - starts[i]); // dispatch to searcher } ///

Create weight in multiple index scenario. /// /// Distributed query processing is done in the following steps: /// 1. rewrite query /// 2. extract necessary terms /// 3. collect dfs for these terms from the Searchables /// 4. create query weight using aggregate dfs. /// 5. distribute that weight to Searchables /// 6. merge results /// /// Steps 1-4 are done here, 5+6 in the search() methods /// ///

/// rewritten queries /// protected internal override Weight CreateWeight(Query original) { // step 1 Query rewrittenQuery = Rewrite(original); // step 2 System.Collections.Hashtable terms = new System.Collections.Hashtable(); rewrittenQuery.ExtractTerms(terms); // step3 Term[] allTermsArray = new Term[terms.Count]; int index = 0; System.Collections.IEnumerator e = terms.Keys.GetEnumerator(); while (e.MoveNext()) allTermsArray[index++] = e.Current as Term; int[] aggregatedDfs = new int[terms.Count]; for (int i = 0; i < searchables.Length; i++) { int[] dfs = searchables[i].DocFreqs(allTermsArray); for (int j = 0; j < aggregatedDfs.Length; j++) { aggregatedDfs[j] += dfs[j]; } } System.Collections.Hashtable dfMap = new System.Collections.Hashtable(); for (int i = 0; i < allTermsArray.Length; i++) { dfMap[allTermsArray[i]] = (System.Int32) aggregatedDfs[i]; } // step4 int numDocs = MaxDoc(); CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs); return rewrittenQuery.Weight(cacheSim); } } }