/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using IndexReader = Lucene.Net.Index.IndexReader;
namespace Lucene.Net.Search
{
/// Expert: Collectors are primarily meant to be used to
/// gather raw results from a search, and implement sorting
/// or custom result filtering, collation, etc.
///
/// Lucene's core collectors are derived from Collector.
/// Likely your application can use one of these classes, or
/// subclass , instead of
/// implementing Collector directly:
///
///
///
/// - is an abstract base class
/// that assumes you will retrieve the top N docs,
/// according to some criteria, after collection is
/// done.
///
/// - is a concrete subclass
/// and sorts according to score +
/// docID. This is used internally by the
/// search methods that do not take an
/// explicit . It is likely the most frequently
/// used collector.
///
/// - subclasses
/// and sorts according to a specified
/// object (sort by field). This is used
/// internally by the search methods
/// that take an explicit .
///
/// - , which wraps any other
/// Collector and aborts the search if it's taken too much
/// time.
///
/// - wraps any other
/// Collector and prevents collection of hits whose score
/// is <= 0.0
///
///
///
/// Collector decouples the score from the collected doc:
/// the score computation is skipped entirely if it's not
/// needed. Collectors that do need the score should
/// implement the method, to hold onto the
/// passed instance, and call
/// within the collect method to compute the
/// current hit's score. If your collector may request the
/// score for a single hit multiple times, you should use
/// .
///
/// NOTE: The doc that is passed to the collect
/// method is relative to the current reader. If your
/// collector needs to resolve this to the docID space of the
/// Multi*Reader, you must re-base it by recording the
/// docBase from the most recent setNextReader call. Here's
/// a simple example showing how to collect docIDs into a
/// BitSet:
///
///
/// Searcher searcher = new IndexSearcher(indexReader);
/// final BitSet bits = new BitSet(indexReader.MaxDoc);
/// searcher.search(query, new Collector() {
/// private int docBase;
///
/// // ignore scorer
/// public void setScorer(Scorer scorer) {
/// }
///
/// // accept docs out of order (for a BitSet it doesn't matter)
/// public boolean acceptsDocsOutOfOrder() {
/// return true;
/// }
///
/// public void collect(int doc) {
/// bits.set(doc + docBase);
/// }
///
/// public void setNextReader(IndexReader reader, int docBase) {
/// this.docBase = docBase;
/// }
/// });
///
///
/// Not all collectors will need to rebase the docID. For
/// example, a collector that simply counts the total number
/// of hits would skip it.
///
/// NOTE: Prior to 2.9, Lucene silently filtered
/// out hits with score <= 0. As of 2.9, the core Collectors
/// no longer do that. It's very unusual to have such hits
/// (a negative query boost, or function query returning
/// negative custom scores, could cause it to happen). If
/// you need that behavior, use
///.
///
/// NOTE: This API is experimental and might change
/// in incompatible ways in the next release.
///
///
/// 2.9
///
public abstract class Collector
{
/// Called before successive calls to . Implementations
/// that need the score of the current document (passed-in to
/// ), should save the passed-in Scorer and call
/// scorer.score() when needed.
///
public abstract void SetScorer(Scorer scorer);
/// Called once for every document matching a query, with the unbased document
/// number.
///
///
/// Note: This is called in an inner search loop. For good search performance,
/// implementations of this method should not call or
/// on every hit.
/// Doing so can slow searches by an order of magnitude or more.
///
public abstract void Collect(int doc);
/// Called before collecting from each IndexReader. All doc ids in
/// will correspond to reader.
///
/// Add docBase to the current IndexReaders internal document id to re-base ids
/// in .
///
///
/// next IndexReader
///
///
///
public abstract void SetNextReader(IndexReader reader, int docBase);
///
/// Return true if this collector does not
/// require the matching docIDs to be delivered in int sort
/// order (smallest to largest) to .
/// Most Lucene Query implementations will visit
/// matching docIDs in order. However, some queries
/// (currently limited to certain cases of )
/// can achieve faster searching if the
/// Collector allows them to deliver the
/// docIDs out of order.
/// Many collectors don't mind getting docIDs out of
/// order, so it's important to return true
/// here.
///
///
public abstract bool AcceptsDocsOutOfOrder { get; }
}
}