18 using Lucene.Net.Analysis.Tokenattributes;
21 namespace Lucene.Net.Analysis
29 offsetAtt = AddAttribute<IOffsetAttribute>();
30 termAtt = AddAttribute<ITermAttribute>();
35 offsetAtt = AddAttribute<IOffsetAttribute>();
36 termAtt = AddAttribute<ITermAttribute>();
41 offsetAtt = AddAttribute<IOffsetAttribute>();
42 termAtt = AddAttribute<ITermAttribute>();
45 private int offset = 0, bufferIndex = 0, dataLen = 0;
46 private const int MAX_WORD_LEN = 255;
47 private const int IO_BUFFER_SIZE = 4096;
48 private readonly
char[] ioBuffer =
new char[IO_BUFFER_SIZE];
58 protected internal abstract bool IsTokenChar(
char c);
64 protected internal virtual char Normalize(
char c)
69 public override bool IncrementToken()
73 int start = bufferIndex;
74 char[] buffer = termAtt.TermBuffer();
78 if (bufferIndex >= dataLen)
81 dataLen = input.Read(ioBuffer, 0, ioBuffer.Length);
92 char c = ioBuffer[bufferIndex++];
100 start = offset + bufferIndex - 1;
101 else if (length == buffer.Length)
102 buffer = termAtt.ResizeTermBuffer(1 + length);
104 buffer[length++] = Normalize(c);
106 if (length == MAX_WORD_LEN)
115 termAtt.SetTermLength(length);
116 offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + length));
120 public override void End()
123 int finalOffset = CorrectOffset(offset);
124 offsetAtt.SetOffset(finalOffset, finalOffset);
127 public override void Reset(System.IO.TextReader input)