/*
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
namespace Lucene.Net.Analysis.RU
{
/// RussianCharsets class contains encodings schemes (charsets) and toLowerCase() method implementation
/// for russian characters in Unicode, KOI8 and CP1252.
/// Each encoding scheme contains lowercase (positions 0-31) and uppercase (position 32-63) characters.
/// One should be able to add other encoding schemes (like ISO-8859-5 or customized) by adding a new charset
/// and adding logic to toLowerCase() method for that charset.
///
///
/// Boris Okner, b.okner@rogers.com
///
/// $Id: RussianCharsets.java,v 1.3 2004/03/29 22:48:01 cutting Exp $
///
public class RussianCharsets
{
// Unicode Russian charset (lowercase letters only)
public static char[] UnicodeRussian = new char[]{'\u0430', '\u0431', '\u0432', '\u0433', '\u0434', '\u0435', '\u0436', '\u0437', '\u0438', '\u0439', '\u043A', '\u043B', '\u043C', '\u043D', '\u043E', '\u043F', '\u0440', '\u0441', '\u0442', '\u0443', '\u0444', '\u0445', '\u0446', '\u0447', '\u0448', '\u0449', '\u044A', '\u044B', '\u044C', '\u044D', '\u044E', '\u044F', '\u0410', '\u0411', '\u0412', '\u0413', '\u0414', '\u0415', '\u0416', '\u0417', '\u0418', '\u0419', '\u041A', '\u041B', '\u041C', '\u041D', '\u041E', '\u041F', '\u0420', '\u0421', '\u0422', '\u0423', '\u0424', '\u0425', '\u0426', '\u0427', '\u0428', '\u0429', '\u042A', '\u042B', '\u042C', '\u042D', '\u042E', '\u042F'};
// KOI8 charset
public static char[] KOI8 = new char[]{(char) (0xc1), (char) (0xc2), (char) (0xd7), (char) (0xc7), (char) (0xc4), (char) (0xc5), (char) (0xd6), (char) (0xda), (char) (0xc9), (char) (0xca), (char) (0xcb), (char) (0xcc), (char) (0xcd), (char) (0xce), (char) (0xcf), (char) (0xd0), (char) (0xd2), (char) (0xd3), (char) (0xd4), (char) (0xd5), (char) (0xc6), (char) (0xc8), (char) (0xc3), (char) (0xde), (char) (0xdb), (char) (0xdd), (char) (0xdf), (char) (0xd9), (char) (0xd8), (char) (0xdc), (char) (0xc0), (char) (0xd1), (char) (0xe1), (char) (0xe2), (char) (0xf7), (char) (0xe7), (char) (0xe4), (char) (0xe5), (char) (0xf6), (char) (0xfa), (char) (0xe9), (char) (0xea), (char) (0xeb), (char) (0xec), (char) (0xed), (char) (0xee), (char) (0xef), (char) (0xf0), (char) (0xf2), (char) (0xf3), (char) (0xf4), (char) (0xf5), (char) (0xe6), (char) (0xe8), (char) (0xe3), (char) (0xfe), (char) (0xfb), (char) (0xfd), (char) (0xff), (char) (0xf9), (char) (0xf8), (char) (0xfc), (char) (0xe0), (char) (0xf1)};
// CP1251 eharset
public static char[] CP1251 = new char[]{(char) (0xE0), (char) (0xE1), (char) (0xE2), (char) (0xE3), (char) (0xE4), (char) (0xE5), (char) (0xE6), (char) (0xE7), (char) (0xE8), (char) (0xE9), (char) (0xEA), (char) (0xEB), (char) (0xEC), (char) (0xED), (char) (0xEE), (char) (0xEF), (char) (0xF0), (char) (0xF1), (char) (0xF2), (char) (0xF3), (char) (0xF4), (char) (0xF5), (char) (0xF6), (char) (0xF7), (char) (0xF8), (char) (0xF9), (char) (0xFA), (char) (0xFB), (char) (0xFC), (char) (0xFD), (char) (0xFE), (char) (0xFF), (char) (0xC0), (char) (0xC1), (char) (0xC2), (char) (0xC3), (char) (0xC4), (char) (0xC5), (char) (0xC6), (char) (0xC7), (char) (0xC8), (char) (0xC9), (char) (0xCA), (char) (0xCB), (char) (0xCC), (char) (0xCD), (char) (0xCE), (char) (0xCF), (char) (0xD0), (char) (0xD1), (char) (0xD2), (char) (0xD3), (char) (0xD4), (char) (0xD5), (char) (0xD6), (char) (0xD7), (char) (0xD8), (char) (0xD9), (char) (0xDA), (char) (0xDB), (char) (0xDC), (char) (0xDD), (char) (0xDE), (char) (0xDF)};
public static char ToLowerCase(char letter, char[] charset)
{
if (charset == UnicodeRussian)
{
if (letter >= '\u0430' && letter <= '\u044F')
{
return letter;
}
if (letter >= '\u0410' && letter <= '\u042F')
{
return (char) (letter + 32);
}
}
if (charset == KOI8)
{
if (letter >= 0xe0 && letter <= 0xff)
{
return (char) (letter - 32);
}
if (letter >= 0xc0 && letter <= 0xdf)
{
return letter;
}
}
if (charset == CP1251)
{
if (letter >= 0xC0 && letter <= 0xDF)
{
return (char) (letter + 32);
}
if (letter >= 0xE0 && letter <= 0xFF)
{
return letter;
}
}
return System.Char.ToLower(letter);
}
}
}