/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using System.Text; using Lucene.Net.Documents; using Lucene.Net.Search; using Lucene.Net.Index; using TermInfo = Lucene.Net.Search.Vectorhighlight.FieldTermStack.TermInfo; using QueryPhraseMap = Lucene.Net.Search.Vectorhighlight.FieldQuery.QueryPhraseMap; namespace Lucene.Net.Search.Vectorhighlight { /// /// FieldPhraseList has a list of WeightedPhraseInfo that is used by FragListBuilder /// to create a FieldFragList object. /// public class FieldPhraseList { public LinkedList phraseList = new LinkedList(); /// /// create a FieldPhraseList that has no limit on the number of phrases to analyze /// FieldTermStack object /// FieldQuery object /// public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery) : this(fieldTermStack, fieldQuery, Int32.MaxValue) { } /// /// a constructor. /// /// FieldTermStack object /// FieldQuery object /// maximum size of phraseList public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit) { String field = fieldTermStack.FieldName; LinkedList phraseCandidate = new LinkedList(); QueryPhraseMap currMap = null; QueryPhraseMap nextMap = null; while (!fieldTermStack.IsEmpty() && (phraseList.Count < phraseLimit) ) { phraseCandidate.Clear(); TermInfo ti = fieldTermStack.Pop(); currMap = fieldQuery.GetFieldTermMap(field, ti.Text); // if not found, discard top TermInfo from stack, then try next element if (currMap == null) continue; // if found, search the longest phrase phraseCandidate.AddLast(ti); while (true) { ti = fieldTermStack.Pop(); nextMap = null; if (ti != null) nextMap = currMap.GetTermMap(ti.Text); if (ti == null || nextMap == null) { if (ti != null) fieldTermStack.Push(ti); if (currMap.IsValidTermOrPhrase(new List(phraseCandidate))) { AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber)); } else { while (phraseCandidate.Count > 1) { TermInfo last = phraseCandidate.Last.Value; phraseCandidate.RemoveLast(); fieldTermStack.Push(last); currMap = fieldQuery.SearchPhrase(field, new List(phraseCandidate)); if (currMap != null) { AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber)); break; } } } break; } else { phraseCandidate.AddLast(ti); currMap = nextMap; } } } } void AddIfNoOverlap(WeightedPhraseInfo wpi) { foreach (WeightedPhraseInfo existWpi in phraseList) { if (existWpi.IsOffsetOverlap(wpi)) return; } phraseList.AddLast(wpi); } public class WeightedPhraseInfo { internal String text; // unnecessary member, just exists for debugging purpose internal List termsOffsets; // usually termsOffsets.size() == 1, // but if position-gap > 1 and slop > 0 then size() could be greater than 1 internal float boost; // query boost internal int seqnum; public WeightedPhraseInfo(LinkedList terms, float boost): this(terms, boost, 0) { } public WeightedPhraseInfo(LinkedList terms, float boost, int number) { this.boost = boost; this.seqnum = number; termsOffsets = new List(terms.Count); TermInfo ti = terms.First.Value; termsOffsets.Add(new Toffs(ti.StartOffset, ti.EndOffset)); if (terms.Count == 1) { text = ti.Text; return; } StringBuilder sb = new StringBuilder(); sb.Append(ti.Text); int pos = ti.Position; bool dummy = true; foreach(TermInfo ti2 in terms) //for (int i = 1; i < terms.Count; i++) { if (dummy) { dummy = false; continue; } //Skip First Item {{DIGY}} ti = ti2; //ti = terms.get(i); sb.Append(ti.Text); if (ti.Position - pos == 1) { Toffs to = termsOffsets[termsOffsets.Count - 1]; to.SetEndOffset(ti.EndOffset); } else { termsOffsets.Add(new Toffs(ti.StartOffset, ti.EndOffset)); } pos = ti.Position; } text = sb.ToString(); } public int StartOffset { get { return termsOffsets[0].startOffset; } } public int EndOffset { get { return termsOffsets[termsOffsets.Count - 1].endOffset; } } public bool IsOffsetOverlap(WeightedPhraseInfo other) { int so = StartOffset; int eo = EndOffset; int oso = other.StartOffset; int oeo = other.EndOffset; if (so <= oso && oso < eo) return true; if (so < oeo && oeo <= eo) return true; if (oso <= so && so < oeo) return true; if (oso < eo && eo <= oeo) return true; return false; } public override string ToString() { StringBuilder sb = new StringBuilder(); sb.Append(text).Append('(').Append(boost.ToString(".0").Replace(System.Globalization.CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator,".")).Append(")("); foreach (Toffs to in termsOffsets) { sb.Append(to); } sb.Append(')'); return sb.ToString(); } public class Toffs { internal int startOffset; internal int endOffset; public Toffs(int startOffset, int endOffset) { this.startOffset = startOffset; this.endOffset = endOffset; } internal void SetEndOffset(int endOffset) { this.endOffset = endOffset; } public override string ToString() { StringBuilder sb = new StringBuilder(); sb.Append('(').Append(startOffset).Append(',').Append(endOffset).Append(')'); return sb.ToString(); } } } } }