/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using TermPositions = Lucene.Net.Index.TermPositions; namespace Lucene.Net.Search { /// Expert: Scoring functionality for phrase queries. ///
A document is considered matching if it contains the phrase-query terms /// at "valid" positons. What "valid positions" are /// depends on the type of the phrase query: for an exact phrase query terms are required /// to appear in adjacent locations, while for a sloppy phrase query some distance between /// the terms is allowed. The abstract method of extending classes /// is invoked for each document containing all the phrase query terms, in order to /// compute the frequency of the phrase query in that document. A non zero frequency /// means a match. ///
abstract class PhraseScorer:Scorer { private Weight weight; protected internal byte[] norms; protected internal float value_Renamed; private bool firstTime = true; private bool more = true; protected internal PhraseQueue pq; protected internal PhrasePositions first, last; private float freq; //prhase frequency in current doc as computed by phraseFreq(). internal PhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, byte[] norms):base(similarity) { this.norms = norms; this.weight = weight; this.value_Renamed = weight.Value; // convert tps to a list of phrase positions. // note: phrase-position differs from term-position in that its position // reflects the phrase offset: pp.pos = tp.pos - offset. // this allows to easily identify a matching (exact) phrase // when all PhrasePositions have exactly the same position. for (int i = 0; i < tps.Length; i++) { PhrasePositions pp = new PhrasePositions(tps[i], offsets[i]); if (last != null) { // add next to end of list last.next = pp; } else { first = pp; } last = pp; } pq = new PhraseQueue(tps.Length); // construct empty pq first.doc = - 1; } public override int DocID() { return first.doc; } public override int NextDoc() { if (firstTime) { Init(); firstTime = false; } else if (more) { more = last.Next(); // trigger further scanning } if (!DoNext()) { first.doc = NO_MORE_DOCS; } return first.doc; } // next without initial increment private bool DoNext() { while (more) { while (more && first.doc < last.doc) { // find doc w/ all the terms more = first.SkipTo(last.doc); // skip first upto last FirstToLast(); // and move it to the end } if (more) { // found a doc with all of the terms freq = PhraseFreq(); // check for phrase if (freq == 0.0f) // no match more = last.Next(); // trigger further scanning else return true; // found a match } } return false; // no more matches } public override float Score() { //System.out.println("scoring " + first.doc); float raw = Similarity.Tf(freq) * value_Renamed; // raw score return norms == null?raw:raw * Similarity.DecodeNorm(norms[first.doc]); // normalize } public override int Advance(int target) { firstTime = false; for (PhrasePositions pp = first; more && pp != null; pp = pp.next) { more = pp.SkipTo(target); } if (more) { Sort(); // re-sort } if (!DoNext()) { first.doc = NO_MORE_DOCS; } return first.doc; } /// /// Phrase frequency in current doc as computed by PhraseFreq() /// /// public float CurrentFreq() { return freq; } /// For a document containing all the phrase query terms, compute the /// frequency of the phrase in that document. /// A non zero frequency means a match. ///
Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations. ///
/// frequency of the phrase in current doc, 0 if not found. /// protected internal abstract float PhraseFreq(); private void Init() { for (PhrasePositions pp = first; more && pp != null; pp = pp.next) { more = pp.Next(); } if (more) { Sort(); } } private void Sort() { pq.Clear(); for (PhrasePositions pp = first; pp != null; pp = pp.next) { pq.Add(pp); } PqToList(); } protected internal void PqToList() { last = first = null; while (pq.Top() != null) { PhrasePositions pp = pq.Pop(); if (last != null) { // add next to end of list last.next = pp; } else first = pp; last = pp; pp.next = null; } } protected internal void FirstToLast() { last.next = first; // move first to end of list last = first; first = first.next; last.next = null; } public override System.String ToString() { return "scorer(" + weight + ")"; } } }