/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using System.Linq; using Lucene.Net.Support; using Lucene.Net.Util; using Document = Lucene.Net.Documents.Document; using FieldSelector = Lucene.Net.Documents.FieldSelector; using CorruptIndexException = Lucene.Net.Index.CorruptIndexException; using IndexReader = Lucene.Net.Index.IndexReader; using Term = Lucene.Net.Index.Term; using ReaderUtil = Lucene.Net.Util.ReaderUtil; namespace Lucene.Net.Search { ///

Implements search over a set of Searchables. /// ///

Applications usually need only call the inherited /// or methods. ///

public class MultiSearcher:Searcher { private class AnonymousClassCollector:Collector { public AnonymousClassCollector(Lucene.Net.Search.Collector collector, int start, MultiSearcher enclosingInstance) { InitBlock(collector, start, enclosingInstance); } private void InitBlock(Lucene.Net.Search.Collector collector, int start, MultiSearcher enclosingInstance) { this.collector = collector; this.start = start; this.enclosingInstance = enclosingInstance; } private Lucene.Net.Search.Collector collector; private int start; private MultiSearcher enclosingInstance; public MultiSearcher Enclosing_Instance { get { return enclosingInstance; } } public override void SetScorer(Scorer scorer) { collector.SetScorer(scorer); } public override void Collect(int doc) { collector.Collect(doc); } public override void SetNextReader(IndexReader reader, int docBase) { collector.SetNextReader(reader, start + docBase); } public override bool AcceptsDocsOutOfOrder { get { return collector.AcceptsDocsOutOfOrder; } } } ///

Document Frequency cache acting as a Dummy-Searcher. This class is no /// full-fledged Searcher, but only supports the methods necessary to /// initialize Weights. ///

private class CachedDfSource:Searcher { private readonly Dictionary dfMap; // Map from Terms to corresponding doc freqs private readonly int maxDoc; // document count public CachedDfSource(Dictionary dfMap, int maxDoc, Similarity similarity) { this.dfMap = dfMap; this.maxDoc = maxDoc; Similarity = similarity; } public override int DocFreq(Term term) { int df; try { df = dfMap[term]; } catch (KeyNotFoundException) // C# equiv. of java code. { throw new System.ArgumentException("df for term " + term.Text + " not available"); } return df; } public override int[] DocFreqs(Term[] terms) { int[] result = new int[terms.Length]; for (int i = 0; i < terms.Length; i++) { result[i] = DocFreq(terms[i]); } return result; } public override int MaxDoc { get { return maxDoc; } } public override Query Rewrite(Query query) { // this is a bit of a hack. We know that a query which // creates a Weight based on this Dummy-Searcher is // always already rewritten (see preparedWeight()). // Therefore we just return the unmodified query here return query; } // TODO: This probably shouldn't throw an exception? protected override void Dispose(bool disposing) { throw new System.NotSupportedException(); } public override Document Doc(int i) { throw new System.NotSupportedException(); } public override Document Doc(int i, FieldSelector fieldSelector) { throw new System.NotSupportedException(); } public override Explanation Explain(Weight weight, int doc) { throw new System.NotSupportedException(); } public override void Search(Weight weight, Filter filter, Collector results) { throw new System.NotSupportedException(); } public override TopDocs Search(Weight weight, Filter filter, int n) { throw new System.NotSupportedException(); } public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort) { throw new System.NotSupportedException(); } } private Searchable[] searchables; private int[] starts; private int maxDoc = 0; private bool isDisposed; ///

Creates a searcher which searches searchers.

public MultiSearcher(params Searchable[] searchables) { this.searchables = searchables; starts = new int[searchables.Length + 1]; // build starts array for (int i = 0; i < searchables.Length; i++) { starts[i] = maxDoc; maxDoc += searchables[i].MaxDoc; // compute maxDocs } starts[searchables.Length] = maxDoc; } ///

Return the array of s this searches.

public virtual Searchable[] GetSearchables() { return searchables; } protected internal virtual int[] GetStarts() { return starts; } protected override void Dispose(bool disposing) { if (isDisposed) return; if (disposing) { for (int i = 0; i < searchables.Length; i++) searchables[i].Close(); } isDisposed = true; } public override int DocFreq(Term term) { int docFreq = 0; for (int i = 0; i < searchables.Length; i++) docFreq += searchables[i].DocFreq(term); return docFreq; } // inherit javadoc public override Document Doc(int n) { int i = SubSearcher(n); // find searcher index return searchables[i].Doc(n - starts[i]); // dispatch to searcher } // inherit javadoc public override Document Doc(int n, FieldSelector fieldSelector) { int i = SubSearcher(n); // find searcher index return searchables[i].Doc(n - starts[i], fieldSelector); // dispatch to searcher } ///

Returns index of the searcher for document n in the array /// used to construct this searcher. ///

public virtual int SubSearcher(int n) { // find searcher for doc n: return ReaderUtil.SubIndex(n, starts); } ///

Returns the document number of document n within its /// sub-index. ///

public virtual int SubDoc(int n) { return n - starts[SubSearcher(n)]; } public override int MaxDoc { get { return maxDoc; } } public override TopDocs Search(Weight weight, Filter filter, int nDocs) { HitQueue hq = new HitQueue(nDocs, false); int totalHits = 0; var lockObj = new object(); for (int i = 0; i < searchables.Length; i++) { // search each searcher // use NullLock, we don't care about synchronization for these TopDocs docs = MultiSearcherCallableNoSort(ThreadLock.NullLock, lockObj, searchables[i], weight, filter, nDocs, hq, i, starts); totalHits += docs.TotalHits; // update totalHits } ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()]; for (int i = hq.Size() - 1; i >= 0; i--) // put docs in array scoreDocs2[i] = hq.Pop(); float maxScore = (totalHits == 0)?System.Single.NegativeInfinity:scoreDocs2[0].Score; return new TopDocs(totalHits, scoreDocs2, maxScore); } public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort) { var hq = new FieldDocSortedHitQueue(n); int totalHits = 0; float maxScore = System.Single.NegativeInfinity; var lockObj = new object(); for (int i = 0; i < searchables.Length; i++) { // search each searcher // use NullLock, we don't care about synchronization for these TopFieldDocs docs = MultiSearcherCallableWithSort(ThreadLock.NullLock, lockObj, searchables[i], weight, filter, n, hq, sort, i, starts); totalHits += docs.TotalHits; maxScore = System.Math.Max(maxScore, docs.MaxScore); } ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()]; for (int i = hq.Size() - 1; i >= 0; i--) // put docs in array scoreDocs2[i] = hq.Pop(); return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields(), maxScore); } /// public override void Search(Weight weight, Filter filter, Collector collector) { for (int i = 0; i < searchables.Length; i++) { int start = starts[i]; Collector hc = new AnonymousClassCollector(collector, start, this); searchables[i].Search(weight, filter, hc); } } public override Query Rewrite(Query original) { Query[] queries = new Query[searchables.Length]; for (int i = 0; i < searchables.Length; i++) { queries[i] = searchables[i].Rewrite(original); } return queries[0].Combine(queries); } public override Explanation Explain(Weight weight, int doc) { int i = SubSearcher(doc); // find searcher index return searchables[i].Explain(weight, doc - starts[i]); // dispatch to searcher } ///

Create weight in multiple index scenario. /// /// Distributed query processing is done in the following steps: /// 1. rewrite query /// 2. extract necessary terms /// 3. collect dfs for these terms from the Searchables /// 4. create query weight using aggregate dfs. /// 5. distribute that weight to Searchables /// 6. merge results /// /// Steps 1-4 are done here, 5+6 in the search() methods /// ///

/// rewritten queries /// public /*protected internal*/ override Weight CreateWeight(Query original) { // step 1 Query rewrittenQuery = Rewrite(original); // step 2 ISet terms = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet(); rewrittenQuery.ExtractTerms(terms); // step3 Term[] allTermsArray = terms.ToArray(); int[] aggregatedDfs = new int[terms.Count]; for (int i = 0; i < searchables.Length; i++) { int[] dfs = searchables[i].DocFreqs(allTermsArray); for (int j = 0; j < aggregatedDfs.Length; j++) { aggregatedDfs[j] += dfs[j]; } } var dfMap = new Dictionary(); for (int i = 0; i < allTermsArray.Length; i++) { dfMap[allTermsArray[i]] = aggregatedDfs[i]; } // step4 int numDocs = MaxDoc; CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, Similarity); return rewrittenQuery.Weight(cacheSim); } internal Func MultiSearcherCallableNoSort = (threadLock, lockObj, searchable, weight, filter, nDocs, hq, i, starts) => { TopDocs docs = searchable.Search(weight, filter, nDocs); ScoreDoc[] scoreDocs = docs.ScoreDocs; for(int j = 0; j < scoreDocs.Length; j++) // merge scoreDocs into hq { ScoreDoc scoreDoc = scoreDocs[j]; scoreDoc.Doc += starts[i]; //convert doc //it would be so nice if we had a thread-safe insert try { threadLock.Enter(lockObj); if (scoreDoc == hq.InsertWithOverflow(scoreDoc)) break; } finally { threadLock.Exit(lockObj); } } return docs; }; internal Func MultiSearcherCallableWithSort = (threadLock, lockObj, searchable, weight, filter, nDocs, hq, sort, i, starts) => { TopFieldDocs docs = searchable.Search(weight, filter, nDocs, sort); // if one of the Sort fields is FIELD_DOC, need to fix its values, so that // it will break ties by doc Id properly. Otherwise, it will compare to // 'relative' doc Ids, that belong to two different searchables. for (int j = 0; j < docs.fields.Length; j++) { if (docs.fields[j].Type == SortField.DOC) { // iterate over the score docs and change their fields value for (int j2 = 0; j2 < docs.ScoreDocs.Length; j2++) { FieldDoc fd = (FieldDoc) docs.ScoreDocs[j2]; fd.fields[j] = (int)fd.fields[j] + starts[i]; } break; } } try { threadLock.Enter(lockObj); hq.SetFields(docs.fields); } finally { threadLock.Exit(lockObj); } ScoreDoc[] scoreDocs = docs.ScoreDocs; for (int j = 0; j < scoreDocs.Length; j++) // merge scoreDocs into hq { FieldDoc fieldDoc = (FieldDoc) scoreDocs[j]; fieldDoc.Doc += starts[i]; //convert doc //it would be so nice if we had a thread-safe insert lock (lockObj) { if (fieldDoc == hq.InsertWithOverflow(fieldDoc)) break; } } return docs; }; } }