/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using Lucene.Net.Analysis; using Lucene.Net.Analysis.Tokenattributes; using Lucene.Net.Index; using Lucene.Net.Index.Memory; using Lucene.Net.Search.Spans; using Lucene.Net.Support; using Lucene.Net.Util; namespace Lucene.Net.Search.Highlight { /// /// implementation which scores text fragments by the number of /// unique query terms found. This class converts appropriate s to /// s and attempts to score only those terms that participated in /// generating the 'hit' on the document. /// public class QueryScorer : IScorer { private float totalScore; private ISet foundTerms; private IDictionary fieldWeightedSpanTerms; private float maxTermWeight; private int position = -1; private String defaultField; private ITermAttribute termAtt; private IPositionIncrementAttribute posIncAtt; private bool expandMultiTermQuery = true; private Query query; private String field; private IndexReader reader; private bool skipInitExtractor; private bool wrapToCaching = true; /// /// Constructs a new QueryScorer instance /// /// Query to use for highlighting public QueryScorer(Query query) { Init(query, null, null, true); } /// /// Constructs a new QueryScorer instance /// /// Query to use for highlighting /// Field to highlight - pass null to ignore fields public QueryScorer(Query query, String field) { Init(query, field, null, true); } /// /// Constructs a new QueryScorer instance /// /// Query to use for highlighting /// to use for quasi tf/idf scoring /// Field to highlight - pass null to ignore fields public QueryScorer(Query query, IndexReader reader, String field) { Init(query, field, reader, true); } /// /// Constructs a new QueryScorer instance /// /// Query to use for highlighting /// to use for quasi tf/idf scoring /// Field to highlight - pass null to ignore fields /// The default field for queries with the field name unspecified public QueryScorer(Query query, IndexReader reader, String field, String defaultField) { this.defaultField = StringHelper.Intern(defaultField); Init(query, field, reader, true); } /// /// Constructs a new QueryScorer instance /// /// Query to use for highlighting /// Field to highlight - pass null to ignore fields /// The default field for queries with the field name unspecified public QueryScorer(Query query, String field, String defaultField) { this.defaultField = StringHelper.Intern(defaultField); Init(query, field, null, true); } /// /// Constructs a new QueryScorer instance /// /// an array of pre-created s public QueryScorer(WeightedSpanTerm[] weightedTerms) { this.fieldWeightedSpanTerms = new HashMap(weightedTerms.Length); foreach (WeightedSpanTerm t in weightedTerms) { WeightedSpanTerm existingTerm = fieldWeightedSpanTerms[t.Term]; if ((existingTerm == null) || (existingTerm.Weight < t.Weight)) { // if a term is defined more than once, always use the highest // scoring Weight fieldWeightedSpanTerms[t.Term] = t; maxTermWeight = Math.Max(maxTermWeight, t.Weight); } } skipInitExtractor = true; } /// public float FragmentScore { get { return totalScore; } } /// /// The highest weighted term (useful for passing to GradientFormatter to set top end of coloring scale). /// public float MaxTermWeight { get { return maxTermWeight; } } /// public float GetTokenScore() { position += posIncAtt.PositionIncrement; String termText = termAtt.Term; WeightedSpanTerm weightedSpanTerm; if ((weightedSpanTerm = fieldWeightedSpanTerms[termText]) == null) { return 0; } if (weightedSpanTerm.IsPositionSensitive() && !weightedSpanTerm.CheckPosition(position)) { return 0; } float score = weightedSpanTerm.Weight; // found a query term - is it unique in this doc? if (!foundTerms.Contains(termText)) { totalScore += score; foundTerms.Add(termText); } return score; } /// public TokenStream Init(TokenStream tokenStream) { position = -1; termAtt = tokenStream.AddAttribute(); posIncAtt = tokenStream.AddAttribute(); if (!skipInitExtractor) { if (fieldWeightedSpanTerms != null) { fieldWeightedSpanTerms.Clear(); } return InitExtractor(tokenStream); } return null; } /// /// Retrieve the for the specified token. Useful for passing /// Span information to a . /// /// token to get {@link WeightedSpanTerm} for /// WeightedSpanTerm for token public WeightedSpanTerm GetWeightedSpanTerm(String token) { return fieldWeightedSpanTerms[token]; } private void Init(Query query, String field, IndexReader reader, bool expandMultiTermQuery) { this.reader = reader; this.expandMultiTermQuery = expandMultiTermQuery; this.query = query; this.field = field; } private TokenStream InitExtractor(TokenStream tokenStream) { WeightedSpanTermExtractor qse = defaultField == null ? new WeightedSpanTermExtractor() : new WeightedSpanTermExtractor(defaultField); qse.ExpandMultiTermQuery = expandMultiTermQuery; qse.SetWrapIfNotCachingTokenFilter(wrapToCaching); if (reader == null) { this.fieldWeightedSpanTerms = qse.GetWeightedSpanTerms(query, tokenStream, field); } else { this.fieldWeightedSpanTerms = qse.GetWeightedSpanTermsWithScores(query, tokenStream, field, reader); } if (qse.IsCachedTokenStream) { return qse.TokenStream; } return null; } /// public void StartFragment(TextFragment newFragment) { foundTerms = Support.Compatibility.SetFactory.CreateHashSet(); totalScore = 0; } /// /// Controls whether or not multi-term queries are expanded /// against a . /// public bool IsExpandMultiTermQuery { get { return expandMultiTermQuery; } set { this.expandMultiTermQuery = value; } } /// /// By default, s that are not of the type /// are wrapped in a to /// ensure an efficient reset - if you are already using a different caching /// impl and you don't want it to be wrapped, set this to /// false. /// public void SetWrapIfNotCachingTokenFilter(bool wrap) { this.wrapToCaching = wrap; } } }