/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using Document = Lucene.Net.Documents.Document;
using CorruptIndexException = Lucene.Net.Index.CorruptIndexException;
namespace Lucene.Net.Search
{
/// A ranked list of documents, used to hold search results.
///
/// Caution: Iterate only over the hits needed. Iterating over all hits is
/// generally not desirable and may be the source of performance issues. If you
/// need to iterate over many or all hits, consider using the search method that
/// takes a .
///
///
/// Note: Deleting matching documents concurrently with traversing the
/// hits, might, when deleting hits that were not yet retrieved, decrease
/// . In such case, an exceptionw is thrown when accessing hit n
/// > current_ (but n <
/// _at_start).
///
///
/// see ,
/// and
/// :
///
///
/// TopDocs topDocs = searcher.Search(query, numHits);
/// ScoreDoc[] hits = topDocs.scoreDocs;
/// for (int i = 0; i < hits.Length; i++) {
/// int docId = hits[i].doc;
/// Document d = searcher.Doc(docId);
/// // do something with current hit
/// ...
///
///
[Obsolete("see Searcher.Search(Query, int), Searcher.Search(Query, Filter, int) and Searcher.Search(Query, Filter, int, Sort)")]
public sealed class Hits
{
private Weight weight;
private Searcher searcher;
private Filter filter = null;
private Sort sort = null;
private int length; // the total number of hits
private System.Collections.ArrayList hitDocs = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); // cache of hits retrieved
private HitDoc first; // head of LRU cache
private HitDoc last; // tail of LRU cache
private int numDocs = 0; // number cached
private int maxDocs = 200; // max to cache
private int nDeletions; // # deleted docs in the index.
private int lengthAtStart; // this is the number apps usually count on (although deletions can bring it down).
private int nDeletedHits = 0; // # of already collected hits that were meanwhile deleted.
public /*internal*/ bool debugCheckedForDeletions = false; // for test purposes.
internal Hits(Searcher s, Query q, Filter f)
{
weight = q.Weight(s);
searcher = s;
filter = f;
nDeletions = CountDeletions(s);
GetMoreDocs(50); // retrieve 100 initially
lengthAtStart = length;
}
internal Hits(Searcher s, Query q, Filter f, Sort o)
{
weight = q.Weight(s);
searcher = s;
filter = f;
sort = o;
nDeletions = CountDeletions(s);
GetMoreDocs(50); // retrieve 100 initially
lengthAtStart = length;
}
// count # deletions, return -1 if unknown.
private int CountDeletions(Searcher s)
{
int cnt = - 1;
if (s is IndexSearcher)
{
cnt = s.MaxDoc() - ((IndexSearcher) s).GetIndexReader().NumDocs();
}
return cnt;
}
/// Tries to add new documents to hitDocs.
/// Ensures that the hit numbered min has been retrieved.
///
private void GetMoreDocs(int min)
{
if (hitDocs.Count > min)
{
min = hitDocs.Count;
}
int n = min * 2; // double # retrieved
TopDocs topDocs = (sort == null)?searcher.Search(weight, filter, n):searcher.Search(weight, filter, n, sort);
length = topDocs.TotalHits;
ScoreDoc[] scoreDocs = topDocs.ScoreDocs;
float scoreNorm = 1.0f;
if (length > 0 && topDocs.GetMaxScore() > 1.0f)
{
scoreNorm = 1.0f / topDocs.GetMaxScore();
}
int start = hitDocs.Count - nDeletedHits;
// any new deletions?
int nDels2 = CountDeletions(searcher);
debugCheckedForDeletions = false;
if (nDeletions < 0 || nDels2 > nDeletions)
{
// either we cannot count deletions, or some "previously valid hits" might have been deleted, so find exact start point
nDeletedHits = 0;
debugCheckedForDeletions = true;
int i2 = 0;
for (int i1 = 0; i1 < hitDocs.Count && i2 < scoreDocs.Length; i1++)
{
int id1 = ((HitDoc) hitDocs[i1]).id;
int id2 = scoreDocs[i2].doc;
if (id1 == id2)
{
i2++;
}
else
{
nDeletedHits++;
}
}
start = i2;
}
int end = scoreDocs.Length < length?scoreDocs.Length:length;
length += nDeletedHits;
for (int i = start; i < end; i++)
{
hitDocs.Add(new HitDoc(scoreDocs[i].score * scoreNorm, scoreDocs[i].doc));
}
nDeletions = nDels2;
}
/// Returns the total number of hits available in this set.
public int Length()
{
return length;
}
/// Returns the stored fields of the nth document in this set.
/// Documents are cached, so that repeated requests for the same element may
/// return the same Document object.
///
/// CorruptIndexException if the index is corrupt
/// IOException if there is a low-level IO error
public Document Doc(int n)
{
HitDoc hitDoc = HitDoc(n);
// Update LRU cache of documents
Remove(hitDoc); // remove from list, if there
AddToFront(hitDoc); // add to front of list
if (numDocs > maxDocs)
{
// if cache is full
HitDoc oldLast = last;
Remove(last); // flush last
oldLast.doc = null; // let doc get gc'd
}
if (hitDoc.doc == null)
{
hitDoc.doc = searcher.Doc(hitDoc.id); // cache miss: read document
}
return hitDoc.doc;
}
/// Returns the score for the nth document in this set.
public float Score(int n)
{
return HitDoc(n).score;
}
/// Returns the id for the nth document in this set.
/// Note that ids may change when the index changes, so you cannot
/// rely on the id to be stable.
///
public int Id(int n)
{
return HitDoc(n).id;
}
/// Returns a to navigate the Hits. Each item returned
/// from is a .
///
/// Caution: Iterate only over the hits needed. Iterating over all
/// hits is generally not desirable and may be the source of
/// performance issues. If you need to iterate over many or all hits, consider
/// using a search method that takes a .
///
///
public System.Collections.IEnumerator Iterator()
{
return new HitIterator(this);
}
private HitDoc HitDoc(int n)
{
if (n >= lengthAtStart)
{
throw new System.IndexOutOfRangeException("Not a valid hit number: " + n);
}
if (n >= hitDocs.Count)
{
GetMoreDocs(n);
}
if (n >= length)
{
throw new System.Exception("Not a valid hit number: " + n);
}
return (HitDoc) hitDocs[n];
}
private void AddToFront(HitDoc hitDoc)
{
// insert at front of cache
if (first == null)
{
last = hitDoc;
}
else
{
first.prev = hitDoc;
}
hitDoc.next = first;
first = hitDoc;
hitDoc.prev = null;
numDocs++;
}
private void Remove(HitDoc hitDoc)
{
// remove from cache
if (hitDoc.doc == null)
{
// it's not in the list
return ; // abort
}
if (hitDoc.next == null)
{
last = hitDoc.prev;
}
else
{
hitDoc.next.prev = hitDoc.prev;
}
if (hitDoc.prev == null)
{
first = hitDoc.next;
}
else
{
hitDoc.prev.next = hitDoc.next;
}
numDocs--;
}
}
sealed class HitDoc
{
internal float score;
internal int id;
internal Document doc = null;
internal HitDoc next; // in doubly-linked cache
internal HitDoc prev; // in doubly-linked cache
internal HitDoc(float s, int i)
{
score = s;
id = i;
}
}
}