20 using System.Collections;
22 using Lucene.Net.Analysis;
23 using Lucene.Net.Analysis.Tokenattributes;
24 using Lucene.Net.Util;
26 namespace Lucene.Net.Analysis.NGram
28 public static class SideExtensions
30 public static string GetLabel(
this Side theSide)
39 throw new ArgumentException(
string.Format(
"{0} is not a valid value for EdgeNGramTokenFilter.Side", theSide));
43 public static Side GetSide(
string sideName)
45 if (
Side.FRONT.GetLabel() == sideName)
50 if (
Side.BACK.GetLabel() == sideName)
77 public static int DEFAULT_MAX_GRAM_SIZE = 1;
78 public static int DEFAULT_MIN_GRAM_SIZE = 1;
83 private char[] curTermBuffer;
84 private int curTermLength;
85 private int curGramSize;
94 this.termAtt = AddAttribute<ITermAttribute>();
95 this.offsetAtt = AddAttribute<IOffsetAttribute>();
111 if (side !=
Side.FRONT && side !=
Side.BACK)
113 throw new System.ArgumentException(
"sideLabel must be either front or back");
118 throw new System.ArgumentException(
"minGram must be greater than zero");
121 if (minGram > maxGram)
123 throw new System.ArgumentException(
"minGram must not be greater than maxGram");
126 this.minGram = minGram;
127 this.maxGram = maxGram;
129 this.termAtt = AddAttribute<ITermAttribute>();
130 this.offsetAtt = AddAttribute<IOffsetAttribute>();
142 : this(input, SideExtensions.GetSide(sideLabel), minGram, maxGram)
146 public override bool IncrementToken()
150 if (curTermBuffer == null)
152 if (!input.IncrementToken())
158 curTermBuffer = (
char[])termAtt.TermBuffer().Clone();
159 curTermLength = termAtt.TermLength();
160 curGramSize = minGram;
161 tokStart = offsetAtt.StartOffset;
164 if (curGramSize <= maxGram)
166 if (!(curGramSize > curTermLength
167 || curGramSize > maxGram))
170 int start = side ==
Side.FRONT ? 0 : curTermLength - curGramSize;
171 int end = start + curGramSize;
173 offsetAtt.SetOffset(tokStart + start, tokStart + end);
174 termAtt.SetTermBuffer(curTermBuffer, start, curGramSize);
179 curTermBuffer = null;
183 public override void Reset()
186 curTermBuffer = null;