/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.Text;
using Lucene.Net.Documents;
using Lucene.Net.Search;
using Lucene.Net.Index;
using TermInfo = Lucene.Net.Search.Vectorhighlight.FieldTermStack.TermInfo;
using QueryPhraseMap = Lucene.Net.Search.Vectorhighlight.FieldQuery.QueryPhraseMap;
namespace Lucene.Net.Search.Vectorhighlight
{
///
/// FieldPhraseList has a list of WeightedPhraseInfo that is used by FragListBuilder
/// to create a FieldFragList object.
///
public class FieldPhraseList
{
public LinkedList phraseList = new LinkedList();
///
/// create a FieldPhraseList that has no limit on the number of phrases to analyze
/// FieldTermStack object
/// FieldQuery object
///
public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery) : this(fieldTermStack, fieldQuery, Int32.MaxValue)
{
}
///
/// a constructor.
///
/// FieldTermStack object
/// FieldQuery object
/// maximum size of phraseList
public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit)
{
String field = fieldTermStack.FieldName;
LinkedList phraseCandidate = new LinkedList();
QueryPhraseMap currMap = null;
QueryPhraseMap nextMap = null;
while (!fieldTermStack.IsEmpty() && (phraseList.Count < phraseLimit) )
{
phraseCandidate.Clear();
TermInfo ti = fieldTermStack.Pop();
currMap = fieldQuery.GetFieldTermMap(field, ti.Text);
// if not found, discard top TermInfo from stack, then try next element
if (currMap == null) continue;
// if found, search the longest phrase
phraseCandidate.AddLast(ti);
while (true)
{
ti = fieldTermStack.Pop();
nextMap = null;
if (ti != null)
nextMap = currMap.GetTermMap(ti.Text);
if (ti == null || nextMap == null)
{
if (ti != null)
fieldTermStack.Push(ti);
if (currMap.IsValidTermOrPhrase(new List(phraseCandidate)))
{
AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber));
}
else
{
while (phraseCandidate.Count > 1)
{
TermInfo last = phraseCandidate.Last.Value;
phraseCandidate.RemoveLast();
fieldTermStack.Push(last);
currMap = fieldQuery.SearchPhrase(field, new List(phraseCandidate));
if (currMap != null)
{
AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber));
break;
}
}
}
break;
}
else
{
phraseCandidate.AddLast(ti);
currMap = nextMap;
}
}
}
}
void AddIfNoOverlap(WeightedPhraseInfo wpi)
{
foreach (WeightedPhraseInfo existWpi in phraseList)
{
if (existWpi.IsOffsetOverlap(wpi)) return;
}
phraseList.AddLast(wpi);
}
public class WeightedPhraseInfo
{
internal String text; // unnecessary member, just exists for debugging purpose
internal List termsOffsets; // usually termsOffsets.size() == 1,
// but if position-gap > 1 and slop > 0 then size() could be greater than 1
internal float boost; // query boost
internal int seqnum;
public WeightedPhraseInfo(LinkedList terms, float boost): this(terms, boost, 0)
{
}
public WeightedPhraseInfo(LinkedList terms, float boost, int number)
{
this.boost = boost;
this.seqnum = number;
termsOffsets = new List(terms.Count);
TermInfo ti = terms.First.Value;
termsOffsets.Add(new Toffs(ti.StartOffset, ti.EndOffset));
if (terms.Count == 1)
{
text = ti.Text;
return;
}
StringBuilder sb = new StringBuilder();
sb.Append(ti.Text);
int pos = ti.Position;
bool dummy = true;
foreach(TermInfo ti2 in terms)
//for (int i = 1; i < terms.Count; i++)
{
if (dummy) { dummy = false; continue; } //Skip First Item {{DIGY}}
ti = ti2;
//ti = terms.get(i);
sb.Append(ti.Text);
if (ti.Position - pos == 1)
{
Toffs to = termsOffsets[termsOffsets.Count - 1];
to.SetEndOffset(ti.EndOffset);
}
else
{
termsOffsets.Add(new Toffs(ti.StartOffset, ti.EndOffset));
}
pos = ti.Position;
}
text = sb.ToString();
}
public int StartOffset
{
get { return termsOffsets[0].startOffset; }
}
public int EndOffset
{
get { return termsOffsets[termsOffsets.Count - 1].endOffset; }
}
public bool IsOffsetOverlap(WeightedPhraseInfo other)
{
int so = StartOffset;
int eo = EndOffset;
int oso = other.StartOffset;
int oeo = other.EndOffset;
if (so <= oso && oso < eo) return true;
if (so < oeo && oeo <= eo) return true;
if (oso <= so && so < oeo) return true;
if (oso < eo && eo <= oeo) return true;
return false;
}
public override string ToString()
{
StringBuilder sb = new StringBuilder();
sb.Append(text).Append('(').Append(boost.ToString(".0").Replace(System.Globalization.CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator,".")).Append(")(");
foreach (Toffs to in termsOffsets)
{
sb.Append(to);
}
sb.Append(')');
return sb.ToString();
}
public class Toffs
{
internal int startOffset;
internal int endOffset;
public Toffs(int startOffset, int endOffset)
{
this.startOffset = startOffset;
this.endOffset = endOffset;
}
internal void SetEndOffset(int endOffset)
{
this.endOffset = endOffset;
}
public override string ToString()
{
StringBuilder sb = new StringBuilder();
sb.Append('(').Append(startOffset).Append(',').Append(endOffset).Append(')');
return sb.ToString();
}
}
}
}
}