19 using System.Collections.Generic;
20 using Lucene.Net.Analysis.Tokenattributes;
22 namespace Lucene.Net.Analysis.Compound
33 public static readonly
int DEFAULT_MIN_WORD_SIZE = 5;
38 public static readonly
int DEFAULT_MIN_SUBWORD_SIZE = 2;
43 public static readonly
int DEFAULT_MAX_SUBWORD_SIZE = 15;
46 protected readonly LinkedList<Token>
tokens;
62 : this(input, MakeDictionary(dictionary), minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch)
68 : this(input, MakeDictionary(dictionary), DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch)
74 : this(input, dictionary, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch)
80 : this(input, MakeDictionary(dictionary), DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false)
86 : this(input, dictionary, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false)
94 this.tokens =
new LinkedList<Token>();
95 this.minWordSize = minWordSize;
96 this.minSubwordSize = minSubwordSize;
97 this.maxSubwordSize = maxSubwordSize;
98 this.onlyLongestMatch = onlyLongestMatch;
102 this.dictionary = (CharArraySet)dictionary;
106 this.dictionary =
new CharArraySet(dictionary.Count,
false);
107 AddAllLowerCase(this.dictionary, dictionary);
110 termAtt = AddAttribute<ITermAttribute>();
111 offsetAtt = AddAttribute<IOffsetAttribute>();
112 flagsAtt = AddAttribute<IFlagsAttribute>();
113 posIncAtt = AddAttribute<IPositionIncrementAttribute>();
114 typeAtt = AddAttribute<ITypeAttribute>();
115 payloadAtt = AddAttribute<IPayloadAttribute>();
125 public static ISet<string> MakeDictionary(String[] dictionary)
129 AddAllLowerCase(dict, dictionary);
133 private void setToken(
Token token)
137 flagsAtt.Flags = token.
Flags;
138 typeAtt.Type = token.
Type;
141 payloadAtt.Payload = token.
Payload;
144 public sealed
override bool IncrementToken()
146 if (tokens.Count > 0)
148 setToken((
Token)tokens.First.Value);
149 tokens.RemoveFirst();
153 if (input.IncrementToken() ==
false)
156 wrapper.SetTermBuffer(termAtt.TermBuffer(), 0, termAtt.TermLength());
157 wrapper.StartOffset = offsetAtt.StartOffset;
158 wrapper.EndOffset = offsetAtt.EndOffset;
159 wrapper.Flags = flagsAtt.Flags;
160 wrapper.Type = typeAtt.Type;
161 wrapper.PositionIncrement = posIncAtt.PositionIncrement;
162 wrapper.Payload = payloadAtt.Payload;
166 if (tokens.Count > 0)
168 setToken(tokens.First.Value);
169 tokens.RemoveFirst();
178 protected static void AddAllLowerCase(ISet<string> target, ICollection<string> col)
180 foreach (var str
in col)
182 target.Add(str.ToLower(System.Globalization.CultureInfo.GetCultureInfo(
"en-US")));
186 protected static char[] MakeLowerCaseCopy(
char[] buffer)
188 char[] result =
new char[buffer.Length];
189 Array.Copy(buffer, 0, result, 0, buffer.Length);
191 for (
int i = 0; i < buffer.Length; ++i)
193 result[i] =
char.ToLower(buffer[i]);
199 protected Token CreateToken(
int offset,
int length,
208 protected void Decompose(
Token token)
219 DecomposeInternal(token);
222 protected abstract void DecomposeInternal(
Token token);
224 public override void Reset()