A filter that replaces accented characters in the ISO Latin 1 character set
/// (ISO-8859-1) by their unaccented equivalent. The case will not be altered.
///
/// For instance, 'à' will be replaced by 'a'.
///
///
public class ISOLatin1AccentFilter : TokenFilter
{
public ISOLatin1AccentFilter(TokenStream input) : base(input)
{
}
private char[] output = new char[256];
private int outputPos;
public override Token Next(Token result)
{
result = input.Next(result);
if (result != null)
{
char[] buffer = result.TermBuffer();
int length = result.TermLength();
// If no characters actually require rewriting then we
// just return token as-is:
for (int i = 0; i < length; i++)
{
char c = buffer[i];
if (c >= '\u00c0' && c <= '\u0178')
{
RemoveAccents(buffer, length);
result.SetTermBuffer(output, 0, outputPos);
break;
}
}
return result;
}
else
return null;
}
///