/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.Linq;
using Lucene.Net.Support;
using Lucene.Net.Util;
using Document = Lucene.Net.Documents.Document;
using FieldSelector = Lucene.Net.Documents.FieldSelector;
using CorruptIndexException = Lucene.Net.Index.CorruptIndexException;
using IndexReader = Lucene.Net.Index.IndexReader;
using Term = Lucene.Net.Index.Term;
using ReaderUtil = Lucene.Net.Util.ReaderUtil;
namespace Lucene.Net.Search
{
/// Implements search over a set of Searchables.
///
/// Applications usually need only call the inherited
/// or methods.
///
public class MultiSearcher:Searcher
{
private class AnonymousClassCollector:Collector
{
public AnonymousClassCollector(Lucene.Net.Search.Collector collector, int start, MultiSearcher enclosingInstance)
{
InitBlock(collector, start, enclosingInstance);
}
private void InitBlock(Lucene.Net.Search.Collector collector, int start, MultiSearcher enclosingInstance)
{
this.collector = collector;
this.start = start;
this.enclosingInstance = enclosingInstance;
}
private Lucene.Net.Search.Collector collector;
private int start;
private MultiSearcher enclosingInstance;
public MultiSearcher Enclosing_Instance
{
get
{
return enclosingInstance;
}
}
public override void SetScorer(Scorer scorer)
{
collector.SetScorer(scorer);
}
public override void Collect(int doc)
{
collector.Collect(doc);
}
public override void SetNextReader(IndexReader reader, int docBase)
{
collector.SetNextReader(reader, start + docBase);
}
public override bool AcceptsDocsOutOfOrder
{
get { return collector.AcceptsDocsOutOfOrder; }
}
}
/// Document Frequency cache acting as a Dummy-Searcher. This class is no
/// full-fledged Searcher, but only supports the methods necessary to
/// initialize Weights.
///
private class CachedDfSource:Searcher
{
private readonly Dictionary dfMap; // Map from Terms to corresponding doc freqs
private readonly int maxDoc; // document count
public CachedDfSource(Dictionary dfMap, int maxDoc, Similarity similarity)
{
this.dfMap = dfMap;
this.maxDoc = maxDoc;
Similarity = similarity;
}
public override int DocFreq(Term term)
{
int df;
try
{
df = dfMap[term];
}
catch (KeyNotFoundException) // C# equiv. of java code.
{
throw new System.ArgumentException("df for term " + term.Text + " not available");
}
return df;
}
public override int[] DocFreqs(Term[] terms)
{
int[] result = new int[terms.Length];
for (int i = 0; i < terms.Length; i++)
{
result[i] = DocFreq(terms[i]);
}
return result;
}
public override int MaxDoc
{
get { return maxDoc; }
}
public override Query Rewrite(Query query)
{
// this is a bit of a hack. We know that a query which
// creates a Weight based on this Dummy-Searcher is
// always already rewritten (see preparedWeight()).
// Therefore we just return the unmodified query here
return query;
}
// TODO: This probably shouldn't throw an exception?
protected override void Dispose(bool disposing)
{
throw new System.NotSupportedException();
}
public override Document Doc(int i)
{
throw new System.NotSupportedException();
}
public override Document Doc(int i, FieldSelector fieldSelector)
{
throw new System.NotSupportedException();
}
public override Explanation Explain(Weight weight, int doc)
{
throw new System.NotSupportedException();
}
public override void Search(Weight weight, Filter filter, Collector results)
{
throw new System.NotSupportedException();
}
public override TopDocs Search(Weight weight, Filter filter, int n)
{
throw new System.NotSupportedException();
}
public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
{
throw new System.NotSupportedException();
}
}
private Searchable[] searchables;
private int[] starts;
private int maxDoc = 0;
private bool isDisposed;
/// Creates a searcher which searches searchers.
public MultiSearcher(params Searchable[] searchables)
{
this.searchables = searchables;
starts = new int[searchables.Length + 1]; // build starts array
for (int i = 0; i < searchables.Length; i++)
{
starts[i] = maxDoc;
maxDoc += searchables[i].MaxDoc; // compute maxDocs
}
starts[searchables.Length] = maxDoc;
}
/// Return the array of s this searches.
public virtual Searchable[] GetSearchables()
{
return searchables;
}
protected internal virtual int[] GetStarts()
{
return starts;
}
protected override void Dispose(bool disposing)
{
if (isDisposed) return;
if (disposing)
{
for (int i = 0; i < searchables.Length; i++)
searchables[i].Close();
}
isDisposed = true;
}
public override int DocFreq(Term term)
{
int docFreq = 0;
for (int i = 0; i < searchables.Length; i++)
docFreq += searchables[i].DocFreq(term);
return docFreq;
}
// inherit javadoc
public override Document Doc(int n)
{
int i = SubSearcher(n); // find searcher index
return searchables[i].Doc(n - starts[i]); // dispatch to searcher
}
// inherit javadoc
public override Document Doc(int n, FieldSelector fieldSelector)
{
int i = SubSearcher(n); // find searcher index
return searchables[i].Doc(n - starts[i], fieldSelector); // dispatch to searcher
}
/// Returns index of the searcher for document n in the array
/// used to construct this searcher.
///
public virtual int SubSearcher(int n)
{
// find searcher for doc n:
return ReaderUtil.SubIndex(n, starts);
}
/// Returns the document number of document n within its
/// sub-index.
///
public virtual int SubDoc(int n)
{
return n - starts[SubSearcher(n)];
}
public override int MaxDoc
{
get { return maxDoc; }
}
public override TopDocs Search(Weight weight, Filter filter, int nDocs)
{
HitQueue hq = new HitQueue(nDocs, false);
int totalHits = 0;
var lockObj = new object();
for (int i = 0; i < searchables.Length; i++)
{
// search each searcher
// use NullLock, we don't care about synchronization for these
TopDocs docs = MultiSearcherCallableNoSort(ThreadLock.NullLock, lockObj, searchables[i], weight, filter, nDocs, hq, i, starts);
totalHits += docs.TotalHits; // update totalHits
}
ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
for (int i = hq.Size() - 1; i >= 0; i--)
// put docs in array
scoreDocs2[i] = hq.Pop();
float maxScore = (totalHits == 0)?System.Single.NegativeInfinity:scoreDocs2[0].Score;
return new TopDocs(totalHits, scoreDocs2, maxScore);
}
public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
{
var hq = new FieldDocSortedHitQueue(n);
int totalHits = 0;
float maxScore = System.Single.NegativeInfinity;
var lockObj = new object();
for (int i = 0; i < searchables.Length; i++)
{
// search each searcher
// use NullLock, we don't care about synchronization for these
TopFieldDocs docs = MultiSearcherCallableWithSort(ThreadLock.NullLock, lockObj, searchables[i], weight, filter, n, hq, sort,
i, starts);
totalHits += docs.TotalHits;
maxScore = System.Math.Max(maxScore, docs.MaxScore);
}
ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
for (int i = hq.Size() - 1; i >= 0; i--)
// put docs in array
scoreDocs2[i] = hq.Pop();
return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields(), maxScore);
}
///
public override void Search(Weight weight, Filter filter, Collector collector)
{
for (int i = 0; i < searchables.Length; i++)
{
int start = starts[i];
Collector hc = new AnonymousClassCollector(collector, start, this);
searchables[i].Search(weight, filter, hc);
}
}
public override Query Rewrite(Query original)
{
Query[] queries = new Query[searchables.Length];
for (int i = 0; i < searchables.Length; i++)
{
queries[i] = searchables[i].Rewrite(original);
}
return queries[0].Combine(queries);
}
public override Explanation Explain(Weight weight, int doc)
{
int i = SubSearcher(doc); // find searcher index
return searchables[i].Explain(weight, doc - starts[i]); // dispatch to searcher
}
/// Create weight in multiple index scenario.
///
/// Distributed query processing is done in the following steps:
/// 1. rewrite query
/// 2. extract necessary terms
/// 3. collect dfs for these terms from the Searchables
/// 4. create query weight using aggregate dfs.
/// 5. distribute that weight to Searchables
/// 6. merge results
///
/// Steps 1-4 are done here, 5+6 in the search() methods
///
///
/// rewritten queries
///
public /*protected internal*/ override Weight CreateWeight(Query original)
{
// step 1
Query rewrittenQuery = Rewrite(original);
// step 2
ISet terms = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet();
rewrittenQuery.ExtractTerms(terms);
// step3
Term[] allTermsArray = terms.ToArray();
int[] aggregatedDfs = new int[terms.Count];
for (int i = 0; i < searchables.Length; i++)
{
int[] dfs = searchables[i].DocFreqs(allTermsArray);
for (int j = 0; j < aggregatedDfs.Length; j++)
{
aggregatedDfs[j] += dfs[j];
}
}
var dfMap = new Dictionary();
for (int i = 0; i < allTermsArray.Length; i++)
{
dfMap[allTermsArray[i]] = aggregatedDfs[i];
}
// step4
int numDocs = MaxDoc;
CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, Similarity);
return rewrittenQuery.Weight(cacheSim);
}
internal Func MultiSearcherCallableNoSort =
(threadLock, lockObj, searchable, weight, filter, nDocs, hq, i, starts) =>
{
TopDocs docs = searchable.Search(weight, filter, nDocs);
ScoreDoc[] scoreDocs = docs.ScoreDocs;
for(int j = 0; j < scoreDocs.Length; j++) // merge scoreDocs into hq
{
ScoreDoc scoreDoc = scoreDocs[j];
scoreDoc.Doc += starts[i]; //convert doc
//it would be so nice if we had a thread-safe insert
try
{
threadLock.Enter(lockObj);
if (scoreDoc == hq.InsertWithOverflow(scoreDoc))
break;
}
finally
{
threadLock.Exit(lockObj);
}
}
return docs;
};
internal Func
MultiSearcherCallableWithSort = (threadLock, lockObj, searchable, weight, filter, nDocs, hq, sort, i, starts) =>
{
TopFieldDocs docs = searchable.Search(weight, filter, nDocs, sort);
// if one of the Sort fields is FIELD_DOC, need to fix its values, so that
// it will break ties by doc Id properly. Otherwise, it will compare to
// 'relative' doc Ids, that belong to two different searchables.
for (int j = 0; j < docs.fields.Length; j++)
{
if (docs.fields[j].Type == SortField.DOC)
{
// iterate over the score docs and change their fields value
for (int j2 = 0; j2 < docs.ScoreDocs.Length; j2++)
{
FieldDoc fd = (FieldDoc) docs.ScoreDocs[j2];
fd.fields[j] = (int)fd.fields[j] + starts[i];
}
break;
}
}
try
{
threadLock.Enter(lockObj);
hq.SetFields(docs.fields);
}
finally
{
threadLock.Exit(lockObj);
}
ScoreDoc[] scoreDocs = docs.ScoreDocs;
for (int j = 0; j < scoreDocs.Length; j++) // merge scoreDocs into hq
{
FieldDoc fieldDoc = (FieldDoc) scoreDocs[j];
fieldDoc.Doc += starts[i]; //convert doc
//it would be so nice if we had a thread-safe insert
lock (lockObj)
{
if (fieldDoc == hq.InsertWithOverflow(fieldDoc))
break;
}
}
return docs;
};
}
}