18 using Lucene.Net.Analysis.Tokenattributes;
21 namespace Lucene.Net.Analysis
61 termAtt = AddAttribute<ITermAttribute>();
64 private char[] output =
new char[512];
65 private int outputPos;
68 public override bool IncrementToken()
70 if (input.IncrementToken())
73 int length = termAtt.TermLength();
77 for (
int i = 0; i < length; ++i)
82 FoldToASCII(buffer, length);
83 termAtt.SetTermBuffer(output, 0, outputPos);
102 public void FoldToASCII(
char[] input,
int length)
105 int maxSizeNeeded = 4 * length;
106 if (output.Length < maxSizeNeeded)
108 output =
new char[
ArrayUtil.GetNextSize(maxSizeNeeded)];
113 for (
int pos = 0; pos < length; ++pos)
120 output[outputPos++] = c;
194 output[outputPos++] =
'A';
278 output[outputPos++] =
'a';
282 output[outputPos++] =
'A';
283 output[outputPos++] =
'A';
293 output[outputPos++] =
'A';
294 output[outputPos++] =
'E';
298 output[outputPos++] =
'A';
299 output[outputPos++] =
'O';
303 output[outputPos++] =
'A';
304 output[outputPos++] =
'U';
310 output[outputPos++] =
'A';
311 output[outputPos++] =
'V';
315 output[outputPos++] =
'A';
316 output[outputPos++] =
'Y';
320 output[outputPos++] =
'(';
321 output[outputPos++] =
'a';
322 output[outputPos++] =
')';
326 output[outputPos++] =
'a';
327 output[outputPos++] =
'a';
337 output[outputPos++] =
'a';
338 output[outputPos++] =
'e';
342 output[outputPos++] =
'a';
343 output[outputPos++] =
'o';
347 output[outputPos++] =
'a';
348 output[outputPos++] =
'u';
354 output[outputPos++] =
'a';
355 output[outputPos++] =
'v';
359 output[outputPos++] =
'a';
360 output[outputPos++] =
'y';
382 output[outputPos++] =
'B';
404 output[outputPos++] =
'b';
408 output[outputPos++] =
'(';
409 output[outputPos++] =
'b';
410 output[outputPos++] =
')';
436 output[outputPos++] =
'C';
466 output[outputPos++] =
'c';
470 output[outputPos++] =
'(';
471 output[outputPos++] =
'c';
472 output[outputPos++] =
')';
506 output[outputPos++] =
'D';
544 output[outputPos++] =
'd';
550 output[outputPos++] =
'D';
551 output[outputPos++] =
'Z';
557 output[outputPos++] =
'D';
558 output[outputPos++] =
'z';
562 output[outputPos++] =
'(';
563 output[outputPos++] =
'd';
564 output[outputPos++] =
')';
568 output[outputPos++] =
'd';
569 output[outputPos++] =
'b';
579 output[outputPos++] =
'd';
580 output[outputPos++] =
'z';
646 output[outputPos++] =
'E';
730 output[outputPos++] =
'e';
734 output[outputPos++] =
'(';
735 output[outputPos++] =
'e';
736 output[outputPos++] =
')';
752 output[outputPos++] =
'F';
770 output[outputPos++] =
'f';
774 output[outputPos++] =
'(';
775 output[outputPos++] =
'f';
776 output[outputPos++] =
')';
780 output[outputPos++] =
'f';
781 output[outputPos++] =
'f';
785 output[outputPos++] =
'f';
786 output[outputPos++] =
'f';
787 output[outputPos++] =
'i';
791 output[outputPos++] =
'f';
792 output[outputPos++] =
'f';
793 output[outputPos++] =
'l';
797 output[outputPos++] =
'f';
798 output[outputPos++] =
'i';
802 output[outputPos++] =
'f';
803 output[outputPos++] =
'l';
839 output[outputPos++] =
'G';
869 output[outputPos++] =
'g';
873 output[outputPos++] =
'(';
874 output[outputPos++] =
'g';
875 output[outputPos++] =
')';
903 output[outputPos++] =
'H';
939 output[outputPos++] =
'h';
943 output[outputPos++] =
'H';
944 output[outputPos++] =
'V';
948 output[outputPos++] =
'(';
949 output[outputPos++] =
'h';
950 output[outputPos++] =
')';
954 output[outputPos++] =
'h';
955 output[outputPos++] =
'v';
1003 output[outputPos++] =
'I';
1053 output[outputPos++] =
'i';
1057 output[outputPos++] =
'I';
1058 output[outputPos++] =
'J';
1062 output[outputPos++] =
'(';
1063 output[outputPos++] =
'i';
1064 output[outputPos++] =
')';
1068 output[outputPos++] =
'i';
1069 output[outputPos++] =
'j';
1081 output[outputPos++] =
'J';
1103 output[outputPos++] =
'j';
1107 output[outputPos++] =
'(';
1108 output[outputPos++] =
'j';
1109 output[outputPos++] =
')';
1137 output[outputPos++] =
'K';
1167 output[outputPos++] =
'k';
1171 output[outputPos++] =
'(';
1172 output[outputPos++] =
'k';
1173 output[outputPos++] =
')';
1213 output[outputPos++] =
'L';
1257 output[outputPos++] =
'l';
1261 output[outputPos++] =
'L';
1262 output[outputPos++] =
'J';
1266 output[outputPos++] =
'L';
1267 output[outputPos++] =
'L';
1271 output[outputPos++] =
'L';
1272 output[outputPos++] =
'j';
1276 output[outputPos++] =
'(';
1277 output[outputPos++] =
'l';
1278 output[outputPos++] =
')';
1282 output[outputPos++] =
'l';
1283 output[outputPos++] =
'j';
1287 output[outputPos++] =
'l';
1288 output[outputPos++] =
'l';
1292 output[outputPos++] =
'l';
1293 output[outputPos++] =
's';
1297 output[outputPos++] =
'l';
1298 output[outputPos++] =
'z';
1320 output[outputPos++] =
'M';
1342 output[outputPos++] =
'm';
1346 output[outputPos++] =
'(';
1347 output[outputPos++] =
'm';
1348 output[outputPos++] =
')';
1382 output[outputPos++] =
'N';
1424 output[outputPos++] =
'n';
1428 output[outputPos++] =
'N';
1429 output[outputPos++] =
'J';
1433 output[outputPos++] =
'N';
1434 output[outputPos++] =
'j';
1438 output[outputPos++] =
'(';
1439 output[outputPos++] =
'n';
1440 output[outputPos++] =
')';
1444 output[outputPos++] =
'n';
1445 output[outputPos++] =
'j';
1535 output[outputPos++] =
'O';
1631 output[outputPos++] =
'o';
1637 output[outputPos++] =
'O';
1638 output[outputPos++] =
'E';
1642 output[outputPos++] =
'O';
1643 output[outputPos++] =
'O';
1649 output[outputPos++] =
'O';
1650 output[outputPos++] =
'U';
1654 output[outputPos++] =
'(';
1655 output[outputPos++] =
'o';
1656 output[outputPos++] =
')';
1662 output[outputPos++] =
'o';
1663 output[outputPos++] =
'e';
1667 output[outputPos++] =
'o';
1668 output[outputPos++] =
'o';
1672 output[outputPos++] =
'o';
1673 output[outputPos++] =
'u';
1695 output[outputPos++] =
'P';
1721 output[outputPos++] =
'p';
1725 output[outputPos++] =
'(';
1726 output[outputPos++] =
'p';
1727 output[outputPos++] =
')';
1739 output[outputPos++] =
'Q';
1755 output[outputPos++] =
'q';
1759 output[outputPos++] =
'(';
1760 output[outputPos++] =
'q';
1761 output[outputPos++] =
')';
1765 output[outputPos++] =
'q';
1766 output[outputPos++] =
'p';
1806 output[outputPos++] =
'R';
1852 output[outputPos++] =
'r';
1856 output[outputPos++] =
'(';
1857 output[outputPos++] =
'r';
1858 output[outputPos++] =
')';
1888 output[outputPos++] =
'S';
1930 output[outputPos++] =
's';
1934 output[outputPos++] =
'S';
1935 output[outputPos++] =
'S';
1939 output[outputPos++] =
'(';
1940 output[outputPos++] =
's';
1941 output[outputPos++] =
')';
1945 output[outputPos++] =
's';
1946 output[outputPos++] =
's';
1950 output[outputPos++] =
's';
1951 output[outputPos++] =
't';
1983 output[outputPos++] =
'T';
2021 output[outputPos++] =
't';
2027 output[outputPos++] =
'T';
2028 output[outputPos++] =
'H';
2032 output[outputPos++] =
'T';
2033 output[outputPos++] =
'Z';
2037 output[outputPos++] =
'(';
2038 output[outputPos++] =
't';
2039 output[outputPos++] =
')';
2043 output[outputPos++] =
't';
2044 output[outputPos++] =
'c';
2052 output[outputPos++] =
't';
2053 output[outputPos++] =
'h';
2057 output[outputPos++] =
't';
2058 output[outputPos++] =
's';
2062 output[outputPos++] =
't';
2063 output[outputPos++] =
'z';
2135 output[outputPos++] =
'U';
2207 output[outputPos++] =
'u';
2211 output[outputPos++] =
'(';
2212 output[outputPos++] =
'u';
2213 output[outputPos++] =
')';
2217 output[outputPos++] =
'u';
2218 output[outputPos++] =
'e';
2240 output[outputPos++] =
'V';
2264 output[outputPos++] =
'v';
2268 output[outputPos++] =
'V';
2269 output[outputPos++] =
'Y';
2273 output[outputPos++] =
'(';
2274 output[outputPos++] =
'v';
2275 output[outputPos++] =
')';
2279 output[outputPos++] =
'v';
2280 output[outputPos++] =
'y';
2304 output[outputPos++] =
'W';
2330 output[outputPos++] =
'w';
2334 output[outputPos++] =
'(';
2335 output[outputPos++] =
'w';
2336 output[outputPos++] =
')';
2346 output[outputPos++] =
'X';
2360 output[outputPos++] =
'x';
2364 output[outputPos++] =
'(';
2365 output[outputPos++] =
'x';
2366 output[outputPos++] =
')';
2398 output[outputPos++] =
'Y';
2432 output[outputPos++] =
'y';
2436 output[outputPos++] =
'(';
2437 output[outputPos++] =
'y';
2438 output[outputPos++] =
')';
2468 output[outputPos++] =
'Z';
2506 output[outputPos++] =
'z';
2510 output[outputPos++] =
'(';
2511 output[outputPos++] =
'z';
2512 output[outputPos++] =
')';
2524 output[outputPos++] =
'0';
2542 output[outputPos++] =
'1';
2546 output[outputPos++] =
'1';
2547 output[outputPos++] =
'.';
2551 output[outputPos++] =
'(';
2552 output[outputPos++] =
'1';
2553 output[outputPos++] =
')';
2571 output[outputPos++] =
'2';
2575 output[outputPos++] =
'2';
2576 output[outputPos++] =
'.';
2580 output[outputPos++] =
'(';
2581 output[outputPos++] =
'2';
2582 output[outputPos++] =
')';
2600 output[outputPos++] =
'3';
2604 output[outputPos++] =
'3';
2605 output[outputPos++] =
'.';
2609 output[outputPos++] =
'(';
2610 output[outputPos++] =
'3';
2611 output[outputPos++] =
')';
2629 output[outputPos++] =
'4';
2633 output[outputPos++] =
'4';
2634 output[outputPos++] =
'.';
2638 output[outputPos++] =
'(';
2639 output[outputPos++] =
'4';
2640 output[outputPos++] =
')';
2658 output[outputPos++] =
'5';
2662 output[outputPos++] =
'5';
2663 output[outputPos++] =
'.';
2667 output[outputPos++] =
'(';
2668 output[outputPos++] =
'5';
2669 output[outputPos++] =
')';
2687 output[outputPos++] =
'6';
2691 output[outputPos++] =
'6';
2692 output[outputPos++] =
'.';
2696 output[outputPos++] =
'(';
2697 output[outputPos++] =
'6';
2698 output[outputPos++] =
')';
2716 output[outputPos++] =
'7';
2720 output[outputPos++] =
'7';
2721 output[outputPos++] =
'.';
2725 output[outputPos++] =
'(';
2726 output[outputPos++] =
'7';
2727 output[outputPos++] =
')';
2745 output[outputPos++] =
'8';
2749 output[outputPos++] =
'8';
2750 output[outputPos++] =
'.';
2754 output[outputPos++] =
'(';
2755 output[outputPos++] =
'8';
2756 output[outputPos++] =
')';
2774 output[outputPos++] =
'9';
2778 output[outputPos++] =
'9';
2779 output[outputPos++] =
'.';
2783 output[outputPos++] =
'(';
2784 output[outputPos++] =
'9';
2785 output[outputPos++] =
')';
2797 output[outputPos++] =
'1';
2798 output[outputPos++] =
'0';
2802 output[outputPos++] =
'1';
2803 output[outputPos++] =
'0';
2804 output[outputPos++] =
'.';
2808 output[outputPos++] =
'(';
2809 output[outputPos++] =
'1';
2810 output[outputPos++] =
'0';
2811 output[outputPos++] =
')';
2817 output[outputPos++] =
'1';
2818 output[outputPos++] =
'1';
2822 output[outputPos++] =
'1';
2823 output[outputPos++] =
'1';
2824 output[outputPos++] =
'.';
2828 output[outputPos++] =
'(';
2829 output[outputPos++] =
'1';
2830 output[outputPos++] =
'1';
2831 output[outputPos++] =
')';
2837 output[outputPos++] =
'1';
2838 output[outputPos++] =
'2';
2842 output[outputPos++] =
'1';
2843 output[outputPos++] =
'2';
2844 output[outputPos++] =
'.';
2848 output[outputPos++] =
'(';
2849 output[outputPos++] =
'1';
2850 output[outputPos++] =
'2';
2851 output[outputPos++] =
')';
2857 output[outputPos++] =
'1';
2858 output[outputPos++] =
'3';
2862 output[outputPos++] =
'1';
2863 output[outputPos++] =
'3';
2864 output[outputPos++] =
'.';
2868 output[outputPos++] =
'(';
2869 output[outputPos++] =
'1';
2870 output[outputPos++] =
'3';
2871 output[outputPos++] =
')';
2877 output[outputPos++] =
'1';
2878 output[outputPos++] =
'4';
2882 output[outputPos++] =
'1';
2883 output[outputPos++] =
'4';
2884 output[outputPos++] =
'.';
2888 output[outputPos++] =
'(';
2889 output[outputPos++] =
'1';
2890 output[outputPos++] =
'4';
2891 output[outputPos++] =
')';
2897 output[outputPos++] =
'1';
2898 output[outputPos++] =
'5';
2902 output[outputPos++] =
'1';
2903 output[outputPos++] =
'5';
2904 output[outputPos++] =
'.';
2908 output[outputPos++] =
'(';
2909 output[outputPos++] =
'1';
2910 output[outputPos++] =
'5';
2911 output[outputPos++] =
')';
2917 output[outputPos++] =
'1';
2918 output[outputPos++] =
'6';
2922 output[outputPos++] =
'1';
2923 output[outputPos++] =
'6';
2924 output[outputPos++] =
'.';
2928 output[outputPos++] =
'(';
2929 output[outputPos++] =
'1';
2930 output[outputPos++] =
'6';
2931 output[outputPos++] =
')';
2937 output[outputPos++] =
'1';
2938 output[outputPos++] =
'7';
2942 output[outputPos++] =
'1';
2943 output[outputPos++] =
'7';
2944 output[outputPos++] =
'.';
2948 output[outputPos++] =
'(';
2949 output[outputPos++] =
'1';
2950 output[outputPos++] =
'7';
2951 output[outputPos++] =
')';
2957 output[outputPos++] =
'1';
2958 output[outputPos++] =
'8';
2962 output[outputPos++] =
'1';
2963 output[outputPos++] =
'8';
2964 output[outputPos++] =
'.';
2968 output[outputPos++] =
'(';
2969 output[outputPos++] =
'1';
2970 output[outputPos++] =
'8';
2971 output[outputPos++] =
')';
2977 output[outputPos++] =
'1';
2978 output[outputPos++] =
'9';
2982 output[outputPos++] =
'1';
2983 output[outputPos++] =
'9';
2984 output[outputPos++] =
'.';
2988 output[outputPos++] =
'(';
2989 output[outputPos++] =
'1';
2990 output[outputPos++] =
'9';
2991 output[outputPos++] =
')';
2997 output[outputPos++] =
'2';
2998 output[outputPos++] =
'0';
3002 output[outputPos++] =
'2';
3003 output[outputPos++] =
'0';
3004 output[outputPos++] =
'.';
3008 output[outputPos++] =
'(';
3009 output[outputPos++] =
'2';
3010 output[outputPos++] =
'0';
3011 output[outputPos++] =
')';
3037 output[outputPos++] =
'"';
3061 output[outputPos++] =
'\'';
3079 output[outputPos++] =
'-';
3087 output[outputPos++] =
'[';
3095 output[outputPos++] =
']';
3107 output[outputPos++] =
'(';
3111 output[outputPos++] =
'(';
3112 output[outputPos++] =
'(';
3124 output[outputPos++] =
')';
3128 output[outputPos++] =
')';
3129 output[outputPos++] =
')';
3137 output[outputPos++] =
'<';
3145 output[outputPos++] =
'>';
3151 output[outputPos++] =
'{';
3157 output[outputPos++] =
'}';
3165 output[outputPos++] =
'+';
3173 output[outputPos++] =
'=';
3177 output[outputPos++] =
'!';
3181 output[outputPos++] =
'!';
3182 output[outputPos++] =
'!';
3186 output[outputPos++] =
'!';
3187 output[outputPos++] =
'?';
3191 output[outputPos++] =
'#';
3195 output[outputPos++] =
'$';
3201 output[outputPos++] =
'%';
3205 output[outputPos++] =
'&';
3211 output[outputPos++] =
'*';
3215 output[outputPos++] =
',';
3219 output[outputPos++] =
'.';
3225 output[outputPos++] =
'/';
3229 output[outputPos++] =
':';
3235 output[outputPos++] =
';';
3239 output[outputPos++] =
'?';
3243 output[outputPos++] =
'?';
3244 output[outputPos++] =
'?';
3248 output[outputPos++] =
'?';
3249 output[outputPos++] =
'!';
3253 output[outputPos++] =
'@';
3257 output[outputPos++] =
'\\';
3263 output[outputPos++] =
'^';
3267 output[outputPos++] =
'_';
3273 output[outputPos++] =
'~';
3277 output[outputPos++] = c;