/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Runtime.InteropServices; using IndexReader = Lucene.Net.Index.IndexReader; using Term = Lucene.Net.Index.Term; using QueryParser = Lucene.Net.QueryParsers.QueryParser; using ToStringUtils = Lucene.Net.Util.ToStringUtils; namespace Lucene.Net.Search { /// An abstract {@link Query} that matches documents /// containing a subset of terms provided by a {@link /// FilteredTermEnum} enumeration. /// ///

This query cannot be used directly; you must subclass /// it and define {@link #getEnum} to provide a {@link /// FilteredTermEnum} that iterates through the terms to be /// matched. /// ///

NOTE: if {@link #setRewriteMethod} is either /// {@link #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} or {@link /// #SCORING_BOOLEAN_QUERY_REWRITE}, you may encounter a /// {@link BooleanQuery.TooManyClauses} exception during /// searching, which happens when the number of terms to be /// searched exceeds {@link /// BooleanQuery#GetMaxClauseCount()}. Setting {@link /// #setRewriteMethod} to {@link #CONSTANT_SCORE_FILTER_REWRITE} /// prevents this. /// ///

The recommended rewrite method is {@link /// #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}: it doesn't spend CPU /// computing unhelpful scores, and it tries to pick the most /// performant rewrite method given the query. /// /// Note that {@link QueryParser} produces /// MultiTermQueries using {@link /// #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} by default. ///

[Serializable] public abstract class MultiTermQuery:Query { [Serializable] public class AnonymousClassConstantScoreAutoRewrite:ConstantScoreAutoRewrite { public override void SetTermCountCutoff(int count) { throw new System.NotSupportedException("Please create a private instance"); } public override void SetDocCountPercent(double percent) { throw new System.NotSupportedException("Please create a private instance"); } // Make sure we are still a singleton even after deserializing protected internal virtual System.Object ReadResolve() { return Lucene.Net.Search.MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; } } /* @deprecated move to sub class */ protected internal Term term; protected internal RewriteMethod rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; [NonSerialized] internal int numberOfTerms = 0; /// Abstract class that defines how the query is rewritten. [Serializable] public abstract class RewriteMethod { public abstract Query Rewrite(IndexReader reader, MultiTermQuery query); } [Serializable] private sealed class ConstantScoreFilterRewrite:RewriteMethod { public override Query Rewrite(IndexReader reader, MultiTermQuery query) { Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query)); result.SetBoost(query.GetBoost()); return result; } // Make sure we are still a singleton even after deserializing internal System.Object ReadResolve() { return Lucene.Net.Search.MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE; } } /// A rewrite method that first creates a private Filter, /// by visiting each term in sequence and marking all docs /// for that term. Matching documents are assigned a /// constant score equal to the query's boost. /// ///

This method is faster than the BooleanQuery /// rewrite methods when the number of matched terms or /// matched documents is non-trivial. Also, it will never /// hit an errant {@link BooleanQuery.TooManyClauses} /// exception. /// ///

/// /// public static readonly RewriteMethod CONSTANT_SCORE_FILTER_REWRITE = new ConstantScoreFilterRewrite(); [Serializable] private class ScoringBooleanQueryRewrite:RewriteMethod { public override Query Rewrite(IndexReader reader, MultiTermQuery query) { FilteredTermEnum enumerator = query.GetEnum(reader); BooleanQuery result = new BooleanQuery(true); int count = 0; try { do { Term t = enumerator.Term(); if (t != null) { TermQuery tq = new TermQuery(t); // found a match tq.SetBoost(query.GetBoost() * enumerator.Difference()); // set the boost result.Add(tq, BooleanClause.Occur.SHOULD); // add to query count++; } } while (enumerator.Next()); } finally { enumerator.Close(); } query.IncTotalNumberOfTerms(count); return result; } // Make sure we are still a singleton even after deserializing protected internal virtual System.Object ReadResolve() { return Lucene.Net.Search.MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; } } /// A rewrite method that first translates each term into /// {@link BooleanClause.Occur#SHOULD} clause in a /// BooleanQuery, and keeps the scores as computed by the /// query. Note that typically such scores are /// meaningless to the user, and require non-trivial CPU /// to compute, so it's almost always better to use {@link /// #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} instead. /// ///

NOTE: This rewrite method will hit {@link /// BooleanQuery.TooManyClauses} if the number of terms /// exceeds {@link BooleanQuery#getMaxClauseCount}. /// ///

/// /// public static readonly RewriteMethod SCORING_BOOLEAN_QUERY_REWRITE = new ScoringBooleanQueryRewrite(); [Serializable] private class ConstantScoreBooleanQueryRewrite:ScoringBooleanQueryRewrite { public override Query Rewrite(IndexReader reader, MultiTermQuery query) { // strip the scores off Query result = new ConstantScoreQuery(new QueryWrapperFilter(base.Rewrite(reader, query))); result.SetBoost(query.GetBoost()); return result; } // Make sure we are still a singleton even after deserializing protected internal override System.Object ReadResolve() { return Lucene.Net.Search.MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; } } /// Like {@link #SCORING_BOOLEAN_QUERY_REWRITE} except /// scores are not computed. Instead, each matching /// document receives a constant score equal to the /// query's boost. /// ///

NOTE: This rewrite method will hit {@link /// BooleanQuery.TooManyClauses} if the number of terms /// exceeds {@link BooleanQuery#getMaxClauseCount}. /// ///

/// /// public static readonly RewriteMethod CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = new ConstantScoreBooleanQueryRewrite(); /// A rewrite method that tries to pick the best /// constant-score rewrite method based on term and /// document counts from the query. If both the number of /// terms and documents is small enough, then {@link /// #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} is used. /// Otherwise, {@link #CONSTANT_SCORE_FILTER_REWRITE} is /// used. /// [Serializable] public class ConstantScoreAutoRewrite:RewriteMethod { public ConstantScoreAutoRewrite() { InitBlock(); } private void InitBlock() { termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF; docCountPercent = DEFAULT_DOC_COUNT_PERCENT; } // Defaults derived from rough tests with a 20.0 million // doc Wikipedia index. With more than 350 terms in the // query, the filter method is fastest: public static int DEFAULT_TERM_COUNT_CUTOFF = 350; // If the query will hit more than 1 in 1000 of the docs // in the index (0.1%), the filter method is fastest: public static double DEFAULT_DOC_COUNT_PERCENT = 0.1; private int termCountCutoff; private double docCountPercent; /// If the number of terms in this query is equal to or /// larger than this setting then {@link /// #CONSTANT_SCORE_FILTER_REWRITE} is used. /// public virtual void SetTermCountCutoff(int count) { termCountCutoff = count; } /// /// public virtual int GetTermCountCutoff() { return termCountCutoff; } /// If the number of documents to be visited in the /// postings exceeds this specified percentage of the /// maxDoc() for the index, then {@link /// #CONSTANT_SCORE_FILTER_REWRITE} is used. /// /// 0.0 to 100.0 /// public virtual void SetDocCountPercent(double percent) { docCountPercent = percent; } /// /// public virtual double GetDocCountPercent() { return docCountPercent; } public override Query Rewrite(IndexReader reader, MultiTermQuery query) { // Get the enum and start visiting terms. If we // exhaust the enum before hitting either of the // cutoffs, we use ConstantBooleanQueryRewrite; else, // ConstantFilterRewrite: System.Collections.ArrayList pendingTerms = new System.Collections.ArrayList(); int docCountCutoff = (int) ((docCountPercent / 100.0) * reader.MaxDoc()); int termCountLimit = System.Math.Min(BooleanQuery.GetMaxClauseCount(), termCountCutoff); int docVisitCount = 0; FilteredTermEnum enumerator = query.GetEnum(reader); try { while (true) { Term t = enumerator.Term(); if (t != null) { pendingTerms.Add(t); // Loading the TermInfo from the terms dict here // should not be costly, because 1) the // query/filter will load the TermInfo when it // runs, and 2) the terms dict has a cache: docVisitCount += reader.DocFreq(t); } if (pendingTerms.Count >= termCountLimit || docVisitCount >= docCountCutoff) { // Too many terms -- make a filter. Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query)); result.SetBoost(query.GetBoost()); return result; } else if (!enumerator.Next()) { // Enumeration is done, and we hit a small // enough number of terms & docs -- just make a // BooleanQuery, now System.Collections.IEnumerator it = pendingTerms.GetEnumerator(); BooleanQuery bq = new BooleanQuery(true); while (it.MoveNext()) { TermQuery tq = new TermQuery((Term) it.Current); bq.Add(tq, BooleanClause.Occur.SHOULD); } // Strip scores Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq)); result.SetBoost(query.GetBoost()); query.IncTotalNumberOfTerms(pendingTerms.Count); return result; } } } finally { enumerator.Close(); } } public override int GetHashCode() { int prime = 1279; return (int) (prime * termCountCutoff + BitConverter.DoubleToInt64Bits(docCountPercent)); } public override bool Equals(System.Object obj) { if (this == obj) return true; if (obj == null) return false; if (GetType() != obj.GetType()) return false; ConstantScoreAutoRewrite other = (ConstantScoreAutoRewrite) obj; if (other.termCountCutoff != termCountCutoff) { return false; } if (BitConverter.DoubleToInt64Bits(other.docCountPercent) != BitConverter.DoubleToInt64Bits(docCountPercent)) { return false; } return true; } } /// Read-only default instance of {@link /// ConstantScoreAutoRewrite}, with {@link /// ConstantScoreAutoRewrite#setTermCountCutoff} set to /// {@link /// ConstantScoreAutoRewrite#DEFAULT_TERM_COUNT_CUTOFF} /// and {@link /// ConstantScoreAutoRewrite#setDocCountPercent} set to /// {@link /// ConstantScoreAutoRewrite#DEFAULT_DOC_COUNT_PERCENT}. /// Note that you cannot alter the configuration of this /// instance; you'll need to create a private instance /// instead. /// public static readonly RewriteMethod CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; /// Constructs a query for terms matching term. /// check sub class for possible term access - the Term does not /// make sense for all MultiTermQuerys and will be removed. /// [Obsolete("check sub class for possible term access - the Term does not make sense for all MultiTermQuerys and will be removed.")] public MultiTermQuery(Term term) { this.term = term; } /// Constructs a query matching terms that cannot be represented with a single /// Term. /// public MultiTermQuery() { } /// Returns the pattern term. /// check sub class for possible term access - getTerm does not /// make sense for all MultiTermQuerys and will be removed. /// [Obsolete("check sub class for possible term access - getTerm does not make sense for all MultiTermQuerys and will be removed.")] public virtual Term GetTerm() { return term; } /// Construct the enumeration to be used, expanding the pattern term. public /*protected internal*/ abstract FilteredTermEnum GetEnum(IndexReader reader); /// Expert: Return the number of unique terms visited during execution of the query. /// If there are many of them, you may consider using another query type /// or optimize your total term count in index. ///

This method is not thread safe, be sure to only call it when no query is running! /// If you re-use the same query instance for another /// search, be sure to first reset the term counter /// with {@link #clearTotalNumberOfTerms}. ///

On optimized indexes / no MultiReaders, you get the correct number of /// unique terms for the whole index. Use this number to compare different queries. /// For non-optimized indexes this number can also be achived in /// non-constant-score mode. In constant-score mode you get the total number of /// terms seeked for all segments / sub-readers. ///

/// /// public virtual int GetTotalNumberOfTerms() { return numberOfTerms; } /// Expert: Resets the counting of unique terms. /// Do this before executing the query/filter. /// /// /// public virtual void ClearTotalNumberOfTerms() { numberOfTerms = 0; } protected internal virtual void IncTotalNumberOfTerms(int inc) { numberOfTerms += inc; } public override Query Rewrite(IndexReader reader) { return rewriteMethod.Rewrite(reader, this); } /* Prints a user-readable version of this query. * Implemented for back compat in case MultiTermQuery * subclasses do no implement. */ public override System.String ToString(System.String field) { System.Text.StringBuilder buffer = new System.Text.StringBuilder(); if (term != null) { if (!term.Field().Equals(field)) { buffer.Append(term.Field()); buffer.Append(":"); } buffer.Append(term.Text()); } else { buffer.Append("termPattern:unknown"); } buffer.Append(ToStringUtils.Boost(GetBoost())); return buffer.ToString(); } /// /// public virtual RewriteMethod GetRewriteMethod() { return rewriteMethod; } /// Sets the rewrite method to be used when executing the /// query. You can use one of the four core methods, or /// implement your own subclass of {@link RewriteMethod}. /// public virtual void SetRewriteMethod(RewriteMethod method) { rewriteMethod = method; } //@Override public override int GetHashCode() { int prime = 31; int result = 1; result = prime * result + System.Convert.ToInt32(GetBoost()); result = prime * result; result += rewriteMethod.GetHashCode(); return result; } //@Override public override bool Equals(System.Object obj) { if (this == obj) return true; if (obj == null) return false; if (GetType() != obj.GetType()) return false; MultiTermQuery other = (MultiTermQuery) obj; if (System.Convert.ToInt32(GetBoost()) != System.Convert.ToInt32(other.GetBoost())) return false; if (!rewriteMethod.Equals(other.rewriteMethod)) { return false; } return true; } static MultiTermQuery() { CONSTANT_SCORE_AUTO_REWRITE_DEFAULT = new AnonymousClassConstantScoreAutoRewrite(); } } }