/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * */ using System; using System.Collections.Generic; using System.Linq; using System.Text; using Lucene.Net.Analysis; using Lucene.Net.Analysis.Tokenattributes; using Lucene.Net.Index; using Lucene.Net.Support; namespace Lucene.Net.Search.Highlight { /** * {@link Scorer} implementation which scores text fragments by the number of * unique query terms found. This class uses the {@link QueryTermExtractor} * class to process determine the query terms and their boosts to be used. */ // TODO: provide option to boost score of fragments near beginning of document // based on fragment.getFragNum() public class QueryTermScorer : IScorer { private TextFragment currentTextFragment = null; private HashSet uniqueTermsInFragment; private float totalScore = 0; private float maxTermWeight = 0; private HashMap termsToFind; private ITermAttribute termAtt; /** * * @param query a Lucene query (ideally rewritten using query.rewrite before * being passed to this class and the searcher) */ public QueryTermScorer(Query query) : this(QueryTermExtractor.GetTerms(query)) { } /** * * @param query a Lucene query (ideally rewritten using query.rewrite before * being passed to this class and the searcher) * @param fieldName the Field name which is used to match Query terms */ public QueryTermScorer(Query query, String fieldName) : this(QueryTermExtractor.GetTerms(query, false, fieldName)) { } /** * * @param query a Lucene query (ideally rewritten using query.rewrite before * being passed to this class and the searcher) * @param reader used to compute IDF which can be used to a) score selected * fragments better b) use graded highlights eg set font color * intensity * @param fieldName the field on which Inverse Document Frequency (IDF) * calculations are based */ public QueryTermScorer(Query query, IndexReader reader, String fieldName) : this(QueryTermExtractor.GetIdfWeightedTerms(query, reader, fieldName)) { } public QueryTermScorer(WeightedTerm[] weightedTerms) { termsToFind = new HashMap(); for (int i = 0; i < weightedTerms.Length; i++) { WeightedTerm existingTerm = termsToFind[weightedTerms[i].Term]; if ((existingTerm == null) || (existingTerm.Weight < weightedTerms[i].Weight)) { // if a term is defined more than once, always use the highest scoring // Weight termsToFind[weightedTerms[i].Term] = weightedTerms[i]; maxTermWeight = Math.Max(maxTermWeight, weightedTerms[i].Weight); } } } /* (non-Javadoc) * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream) */ public TokenStream Init(TokenStream tokenStream) { termAtt = tokenStream.AddAttribute(); return null; } /* * (non-Javadoc) * * @see * org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache * .lucene.search.highlight.TextFragment) */ public void StartFragment(TextFragment newFragment) { uniqueTermsInFragment = new HashSet(); currentTextFragment = newFragment; totalScore = 0; } /* (non-Javadoc) * @see org.apache.lucene.search.highlight.Scorer#getTokenScore() */ public float GetTokenScore() { String termText = termAtt.Term; WeightedTerm queryTerm = termsToFind[termText]; if (queryTerm == null) { // not a query term - return return 0; } // found a query term - is it unique in this doc? if (!uniqueTermsInFragment.Contains(termText)) { totalScore += queryTerm.Weight; uniqueTermsInFragment.Add(termText); } return queryTerm.Weight; } /* (non-Javadoc) * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore() */ public float FragmentScore { get { return totalScore; } } /* * (non-Javadoc) * * @see * org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed() */ public void AllFragmentsProcessed() { // this class has no special operations to perform at end of processing } /** * * @return The highest weighted term (useful for passing to GradientFormatter * to set top end of coloring scale. */ public float MaxTermWeight { get { return maxTermWeight; } } } }