/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* The following code was generated by JFlex 1.4.1 on 12/18/07 9:22 PM */ using System; using Token = Lucene.Net.Analysis.Token; namespace Lucene.Net.Analysis.Standard { /// This class is a scanner generated by /// JFlex 1.4.1 /// on 12/18/07 9:22 PM from the specification file /// /Volumes/User/grantingersoll/projects/lucene/java/lucene-clean/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex /// class StandardTokenizerImpl { /// This character denotes the end of file public const int YYEOF = - 1; /// initial size of the lookahead buffer private const int ZZ_BUFFERSIZE = 16384; /// lexical states public const int YYINITIAL = 0; /// Translates characters to character classes private const System.String ZZ_CMAP_PACKED = "\x0009\x0000\x0001\x0000\x0001\x000E\x0001\x0000\x0001\x0000\x0001\x000D\x0012\x0000\x0001\x0000\x0005\x0000\x0001\x0003" + "\x0001\x0001\x0004\x0000\x0001\x0007\x0001\x0005\x0001\x0002\x0001\x0007\x000A\x0009\x0006\x0000\x0001\x0004\x001A\x0008" + "\x0004\x0000\x0001\x0006\x0001\x0000\x001A\x0008\x0045\x0000\x0017\x0008\x0001\x0000\x001F\x0008\x0001\x0000\u0568\x0008" + "\x000A\x000A\x0086\x0008\x000A\x000A\u026c\x0008\x000A\x000A\x0076\x0008\x000A\x000A\x0076\x0008\x000A\x000A\x0076\x0008" + "\x000A\x000A\x0076\x0008\x000A\x000A\x0077\x0008\x0009\x000A\x0076\x0008\x000A\x000A\x0076\x0008\x000A\x000A\x0076\x0008" + "\x000A\x000A\x00E0\x0008\x000A\x000A\x0076\x0008\x000A\x000A\u0166\x0008\x000A\x000A\x00B6\x0008\u0100\x0008\u0e00\x0008" + "\u1040\x0000\u0150\x000C\x0060\x0000\x0010\x000C\u0100\x0000\x0080\x000C\x0080\x0000\u19c0\x000C\x0040\x0000\u5200\x000C" + "\u0c00\x0000\u2bb0\x000B\u2150\x0000\u0200\x000C\u0465\x0000\x003B\x000C\x003D\x0008\x0023\x0000"; /// Translates characters to character classes private static readonly char[] ZZ_CMAP = ZzUnpackCMap(ZZ_CMAP_PACKED); /// Translates DFA states to action switch labels. private static readonly int[] ZZ_ACTION = ZzUnpackAction(); private const System.String ZZ_ACTION_PACKED_0 = "\x0001\x0000\x0001\x0001\x0004\x0002\x0001\x0003\x0001\x0001\x0006\x0000\x0002\x0002\x0006\x0000" + "\x0001\x0004\x0004\x0005\x0002\x0006\x0002\x0000\x0001\x0007\x0001\x0000\x0001\x0007\x0003\x0005" + "\x0006\x0007\x0003\x0005\x0001\x0008\x0001\x0000\x0001\x0009\x0002\x0000\x0001\x0008\x0001\x0009" + "\x0001\x0000\x0002\x0009\x0002\x0008\x0002\x0005\x0001\x000A"; private static int[] ZzUnpackAction() { int[] result = new int[61]; int offset = 0; offset = ZzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); return result; } private static int ZzUnpackAction(System.String packed, int offset, int[] result) { int i = 0; /* index in packed string */ int j = offset; /* index in unpacked array */ int l = packed.Length; while (i < l) { int count = packed[i++]; int value_Renamed = packed[i++]; do result[j++] = value_Renamed; while (--count > 0); } return j; } /// Translates a state to a row index in the transition table private static readonly int[] ZZ_ROWMAP = ZzUnpackRowMap(); private const System.String ZZ_ROWMAP_PACKED_0 = "\x0000\x0000\x0000\x000F\x0000\x001E\x0000\x002D\x0000\x003C\x0000\x004B\x0000\x000F\x0000\x005A" + "\x0000\x0069\x0000\x0078\x0000\x0087\x0000\x0096\x0000\x00A5\x0000\x00B4\x0000\x00C3\x0000\x00D2" + "\x0000\x00E1\x0000\x00F0\x0000\x00FF\x0000\u010e\x0000\u011d\x0000\u012c\x0000\u013b\x0000\u014a" + "\x0000\u0159\x0000\u0168\x0000\u0177\x0000\x0087\x0000\u0186\x0000\u0195\x0000\u01a4\x0000\u01b3" + "\x0000\u01c2\x0000\u01d1\x0000\u01e0\x0000\u01ef\x0000\u01fe\x0000\u020d\x0000\u021c\x0000\u022b" + "\x0000\u023a\x0000\u0249\x0000\u0258\x0000\u0267\x0000\u0276\x0000\u0285\x0000\u0294\x0000\u02a3" + "\x0000\u02b2\x0000\u02c1\x0000\u02d0\x0000\u02df\x0000\u02ee\x0000\u02fd\x0000\u012c\x0000\x00E1" + "\x0000\x0078\x0000\u011d\x0000\u030c\x0000\u031b\x0000\u032a"; private static int[] ZzUnpackRowMap() { int[] result = new int[61]; int offset = 0; offset = ZzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); return result; } private static int ZzUnpackRowMap(System.String packed, int offset, int[] result) { int i = 0; /* index in packed string */ int j = offset; /* index in unpacked array */ int l = packed.Length; while (i < l) { int high = packed[i++] << 16; result[j++] = high | packed[i++]; } return j; } /// The transition table of the DFA private static readonly int[] ZZ_TRANS = ZzUnpackTrans(); private const System.String ZZ_TRANS_PACKED_0 = "\x0008\x0002\x0001\x0003\x0001\x0004\x0001\x0005\x0001\x0006\x0001\x0007\x0001\x0008\x0001\x0002" + "\x0010\x0000\x0001\x0009\x0001\x000A\x0001\x000B\x0001\x000C\x0002\x000D\x0001\x000E\x0001\x000F" + "\x0001\x0004\x0001\x0010\x0001\x0006\x0005\x0000\x0001\x0011\x0001\x0000\x0001\x0012\x0002\x0013" + "\x0001\x0014\x0003\x0004\x0001\x0006\x0004\x0000\x0001\x0009\x0001\x0015\x0001\x000B\x0001\x000C" + "\x0002\x0013\x0001\x0014\x0001\x0010\x0001\x0004\x0001\x0010\x0001\x0006\x0005\x0000\x0001\x0016" + "\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0004\x0006\x0011\x0000\x0001\x0002\x0008\x0000" + "\x0001\x0017\x0001\x0000\x0001\x0017\x000C\x0000\x0001\x0018\x0001\x0019\x0001\x001A\x0001\x001B" + "\x000B\x0000\x0001\x001C\x0001\x0000\x0001\x001C\x000C\x0000\x0001\x001D\x0001\x001E\x0001\x001D" + "\x0001\x001E\x000B\x0000\x0001\x001F\x0002\x0020\x0001\x0021\x000B\x0000\x0001\x000E\x0002\x0022" + "\x0005\x0000\x0001\x0009\x0001\x0016\x0001\x000B\x0001\x000C\x0002\x000D\x0001\x000E\x0001\x000F" + "\x0001\x0004\x0001\x0010\x0001\x0006\x0004\x0000\x0001\x0009\x0001\x0011\x0001\x000B\x0001\x000C" + "\x0002\x0013\x0001\x0014\x0001\x0010\x0001\x0004\x0001\x0010\x0001\x0006\x000B\x0000\x0001\x0023" + "\x0002\x0024\x0001\x0025\x000B\x0000\x0004\x001E\x000B\x0000\x0001\x0026\x0002\x0027\x0001\x0028" + "\x000B\x0000\x0001\x0029\x0002\x002A\x0001\x002B\x000B\x0000\x0001\x002C\x0001\x0024\x0001\x002D" + "\x0001\x0025\x000B\x0000\x0001\x002E\x0002\x0019\x0001\x001B\x0004\x0000\x0001\x0009\x0006\x0000" + "\x0001\x0017\x0001\x0000\x0001\x0017\x0006\x0000\x0001\x002F\x0001\x0000\x0001\x0012\x0002\x0030" + "\x0001\x0000\x0001\x002E\x0002\x0019\x0001\x001B\x0005\x0000\x0001\x0031\x0001\x0000\x0001\x0012" + "\x0002\x0032\x0001\x0033\x0003\x0019\x0001\x001B\x0005\x0000\x0001\x0034\x0001\x0000\x0001\x0012" + "\x0002\x0032\x0001\x0033\x0003\x0019\x0001\x001B\x0005\x0000\x0001\x0035\x0001\x0000\x0001\x0012" + "\x0002\x0030\x0001\x0000\x0004\x001B\x0005\x0000\x0001\x0036\x0002\x0000\x0001\x0036\x0002\x0000" + "\x0001\x001D\x0001\x001E\x0001\x001D\x0001\x001E\x0005\x0000\x0001\x0036\x0002\x0000\x0001\x0036" + "\x0002\x0000\x0004\x001E\x0005\x0000\x0001\x0030\x0001\x0000\x0001\x0012\x0002\x0030\x0001\x0000" + "\x0001\x001F\x0002\x0020\x0001\x0021\x0005\x0000\x0001\x0032\x0001\x0000\x0001\x0012\x0002\x0032" + "\x0001\x0033\x0003\x0020\x0001\x0021\x0005\x0000\x0001\x0030\x0001\x0000\x0001\x0012\x0002\x0030" + "\x0001\x0000\x0004\x0021\x0005\x0000\x0001\x0033\x0002\x0000\x0003\x0033\x0003\x0022\x0006\x0000" + "\x0001\x0037\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0001\x0023\x0002\x0024\x0001\x0025" + "\x0005\x0000\x0001\x0038\x0001\x0000\x0001\x0012\x0002\x0013\x0001\x0014\x0003\x0024\x0001\x0025" + "\x0005\x0000\x0001\x0037\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0004\x0025\x0005\x0000" + "\x0001\x000D\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0001\x0026\x0002\x0027\x0001\x0028" + "\x0005\x0000\x0001\x0013\x0001\x0000\x0001\x0012\x0002\x0013\x0001\x0014\x0003\x0027\x0001\x0028" + "\x0005\x0000\x0001\x000D\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0004\x0028\x0005\x0000" + "\x0001\x000E\x0002\x0000\x0003\x000E\x0001\x0029\x0002\x002A\x0001\x002B\x0005\x0000\x0001\x0014" + "\x0002\x0000\x0003\x0014\x0003\x002A\x0001\x002B\x0005\x0000\x0001\x000E\x0002\x0000\x0003\x000E" + "\x0004\x002B\x0005\x0000\x0001\x0039\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0001\x0023" + "\x0002\x0024\x0001\x0025\x0005\x0000\x0001\x003A\x0001\x0000\x0001\x0012\x0002\x0013\x0001\x0014" + "\x0003\x0024\x0001\x0025\x0005\x0000\x0001\x0035\x0001\x0000\x0001\x0012\x0002\x0030\x0001\x0000" + "\x0001\x002E\x0002\x0019\x0001\x001B\x000B\x0000\x0001\x003B\x0001\x001B\x0001\x003B\x0001\x001B" + "\x000B\x0000\x0004\x0021\x000B\x0000\x0004\x0025\x000B\x0000\x0004\x0028\x000B\x0000\x0004\x002B" + "\x000B\x0000\x0001\x003C\x0001\x0025\x0001\x003C\x0001\x0025\x000B\x0000\x0004\x001B\x000B\x0000" + "\x0004\x003D\x0005\x0000\x0001\x002F\x0001\x0000\x0001\x0012\x0002\x0030\x0001\x0000\x0004\x001B" + "\x0005\x0000\x0001\x0039\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0004\x0025\x0005\x0000" + "\x0001\x0036\x0002\x0000\x0001\x0036\x0002\x0000\x0004\x003D\x0003\x0000"; private static int[] ZzUnpackTrans() { int[] result = new int[825]; int offset = 0; offset = ZzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); return result; } private static int ZzUnpackTrans(System.String packed, int offset, int[] result) { int i = 0; /* index in packed string */ int j = offset; /* index in unpacked array */ int l = packed.Length; while (i < l) { int count = packed[i++]; int value_Renamed = packed[i++]; value_Renamed--; do result[j++] = value_Renamed; while (--count > 0); } return j; } /* error codes */ private const int ZZ_UNKNOWN_ERROR = 0; private const int ZZ_NO_MATCH = 1; private const int ZZ_PUSHBACK_2BIG = 2; /* error messages for the codes above */ private static readonly System.String[] ZZ_ERROR_MSG = new System.String[]{"Unkown internal scanner error", "Error: could not match input", "Error: pushback value was too large"}; /// ZZ_ATTRIBUTE[aState] contains the attributes of state aState private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute(); private const System.String ZZ_ATTRIBUTE_PACKED_0 = "\x0001\x0000\x0001\x0009\x0004\x0001\x0001\x0009\x0001\x0001\x0006\x0000\x0002\x0001\x0006\x0000" + "\x0007\x0001\x0002\x0000\x0001\x0001\x0001\x0000\x000E\x0001\x0001\x0000\x0001\x0001\x0002\x0000" + "\x0002\x0001\x0001\x0000\x0007\x0001"; private static int[] ZzUnpackAttribute() { int[] result = new int[61]; int offset = 0; offset = ZzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); return result; } private static int ZzUnpackAttribute(System.String packed, int offset, int[] result) { int i = 0; /* index in packed string */ int j = offset; /* index in unpacked array */ int l = packed.Length; while (i < l) { int count = packed[i++]; int value_Renamed = packed[i++]; do result[j++] = value_Renamed; while (--count > 0); } return j; } /// the input device private System.IO.TextReader zzReader; /// the current state of the DFA private int zzState; /// the current lexical state private int zzLexicalState = YYINITIAL; /// this buffer contains the current text to be matched and is /// the source of the yytext() string /// private char[] zzBuffer = new char[ZZ_BUFFERSIZE]; /// the textposition at the last accepting state private int zzMarkedPos; /// the textposition at the last state to be included in yytext private int zzPushbackPos; /// the current text position in the buffer private int zzCurrentPos; /// startRead marks the beginning of the yytext() string in the buffer private int zzStartRead; /// endRead marks the last character in the buffer, that has been read /// from input /// private int zzEndRead; /// number of newlines encountered up to the start of the matched text private int yyline; /// the number of characters up to the start of the matched text private int yychar; /// the number of characters from the last newline up to the start of the /// matched text /// private int yycolumn; /// zzAtBOL == true <=> the scanner is currently at the beginning of a line private bool zzAtBOL = true; /// zzAtEOF == true <=> the scanner is at the EOF private bool zzAtEOF; /* user code: */ public const int ALPHANUM = 0; public const int APOSTROPHE = 1; public const int ACRONYM = 2; public const int COMPANY = 3; public const int EMAIL = 4; public const int HOST = 5; public const int NUM = 6; public const int CJ = 7; /// this solves a bug where HOSTs that end with '.' are identified /// as ACRONYMs. It is deprecated and will be removed in the next /// release. /// public const int ACRONYM_DEP = 8; public static readonly System.String[] TOKEN_TYPES = new System.String[]{"", "", "", "", "", "", "", "", ""}; public int Yychar() { return yychar; } /// Fills Lucene token with the current token text. internal void GetText(Token t) { t.SetTermBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); } /// Creates a new scanner /// There is also a java.io.InputStream version of this constructor. /// /// /// the java.io.Reader to read input from. /// internal StandardTokenizerImpl(System.IO.TextReader in_Renamed) { this.zzReader = in_Renamed; } /// Creates a new scanner. /// There is also java.io.Reader version of this constructor. /// /// /// the java.io.Inputstream to read input from. /// internal StandardTokenizerImpl(System.IO.Stream in_Renamed) : this(new System.IO.StreamReader(in_Renamed, System.Text.Encoding.Default)) { } /// Unpacks the compressed character translation table. /// /// /// the packed character translation table /// /// the unpacked character translation table /// private static char[] ZzUnpackCMap(System.String packed) { char[] map = new char[0x10000]; int i = 0; /* index in packed string */ int j = 0; /* index in unpacked array */ while (i < 156) { int count = packed[i++]; char value_Renamed = packed[i++]; do map[j++] = value_Renamed; while (--count > 0); } return map; } /// Refills the input buffer. /// /// /// false, iff there was new input. /// /// /// if any I/O-Error occurs /// private bool ZzRefill() { /* first: make room (if you can) */ if (zzStartRead > 0) { Array.Copy(zzBuffer, zzStartRead, zzBuffer, 0, zzEndRead - zzStartRead); /* translate stored positions */ zzEndRead -= zzStartRead; zzCurrentPos -= zzStartRead; zzMarkedPos -= zzStartRead; zzPushbackPos -= zzStartRead; zzStartRead = 0; } /* is the buffer big enough? */ if (zzCurrentPos >= zzBuffer.Length) { /* if not: blow it up */ char[] newBuffer = new char[zzCurrentPos * 2]; Array.Copy(zzBuffer, 0, newBuffer, 0, zzBuffer.Length); zzBuffer = newBuffer; } /* finally: fill the buffer with new input */ int numRead = zzReader.Read(zzBuffer, zzEndRead, zzBuffer.Length - zzEndRead); if (numRead < 1) { return true; } else { zzEndRead += numRead; return false; } } /// Closes the input stream. public void Yyclose() { zzAtEOF = true; /* indicate end of file */ zzEndRead = zzStartRead; /* invalidate buffer */ if (zzReader != null) zzReader.Close(); } /// Resets the scanner to read from a new input stream. /// Does not close the old reader. /// /// All internal variables are reset, the old input stream /// cannot be reused (internal buffer is discarded and lost). /// Lexical state is set to ZZ_INITIAL. /// /// /// the new input stream /// public void Yyreset(System.IO.TextReader reader) { zzReader = reader; zzAtBOL = true; zzAtEOF = false; zzEndRead = zzStartRead = 0; zzCurrentPos = zzMarkedPos = zzPushbackPos = 0; yyline = yychar = yycolumn = 0; zzLexicalState = YYINITIAL; } /// Returns the current lexical state. public int Yystate() { return zzLexicalState; } /// Enters a new lexical state /// /// /// the new lexical state /// public void Yybegin(int newState) { zzLexicalState = newState; } /// Returns the text matched by the current regular expression. public System.String Yytext() { return new System.String(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); } /// Returns the character at position pos from the /// matched text. /// /// It is equivalent to yytext().charAt(pos), but faster /// /// /// the position of the character to fetch. /// A value from 0 to yylength()-1. /// /// /// the character at position pos /// public char Yycharat(int pos) { return zzBuffer[zzStartRead + pos]; } /// Returns the length of the matched text region. public int Yylength() { return zzMarkedPos - zzStartRead; } /// Reports an error that occured while scanning. /// /// In a wellformed scanner (no or only correct usage of /// yypushback(int) and a match-all fallback rule) this method /// will only be called with things that "Can't Possibly Happen". /// If this method is called, something is seriously wrong /// (e.g. a JFlex bug producing a faulty scanner etc.). /// /// Usual syntax/scanner level error handling should be done /// in error fallback rules. /// /// /// the code of the errormessage to display /// private void ZzScanError(int errorCode) { System.String message; try { message = ZZ_ERROR_MSG[errorCode]; } catch (System.IndexOutOfRangeException) { message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR]; } throw new System.ApplicationException(message); } /// Pushes the specified amount of characters back into the input stream. /// /// They will be read again by then next call of the scanning method /// /// /// the number of characters to be read again. /// This number must not be greater than yylength()! /// public virtual void Yypushback(int number) { if (number > Yylength()) ZzScanError(ZZ_PUSHBACK_2BIG); zzMarkedPos -= number; } /// Resumes scanning until the next regular expression is matched, /// the end of input is encountered or an I/O-Error occurs. /// /// /// the next token /// /// if any I/O-Error occurs /// public virtual int GetNextToken() { int zzInput; int zzAction; // cached fields: int zzCurrentPosL; int zzMarkedPosL; int zzEndReadL = zzEndRead; char[] zzBufferL = zzBuffer; char[] zzCMapL = ZZ_CMAP; int[] zzTransL = ZZ_TRANS; int[] zzRowMapL = ZZ_ROWMAP; int[] zzAttrL = ZZ_ATTRIBUTE; while (true) { zzMarkedPosL = zzMarkedPos; yychar += zzMarkedPosL - zzStartRead; zzAction = - 1; zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL; zzState = zzLexicalState; { while (true) { if (zzCurrentPosL < zzEndReadL) zzInput = zzBufferL[zzCurrentPosL++]; else if (zzAtEOF) { zzInput = YYEOF; goto zzForAction_brk; } else { // store back cached positions zzCurrentPos = zzCurrentPosL; zzMarkedPos = zzMarkedPosL; bool eof = ZzRefill(); // get translated positions and possibly new buffer zzCurrentPosL = zzCurrentPos; zzMarkedPosL = zzMarkedPos; zzBufferL = zzBuffer; zzEndReadL = zzEndRead; if (eof) { zzInput = YYEOF; goto zzForAction_brk; } else { zzInput = zzBufferL[zzCurrentPosL++]; } } int zzNext = zzTransL[zzRowMapL[zzState] + zzCMapL[zzInput]]; if (zzNext == - 1) { goto zzForAction_brk; } zzState = zzNext; int zzAttributes = zzAttrL[zzState]; if ((zzAttributes & 1) == 1) { zzAction = zzState; zzMarkedPosL = zzCurrentPosL; if ((zzAttributes & 8) == 8) { goto zzForAction_brk; } } } } zzForAction_brk: ; // store back cached position zzMarkedPos = zzMarkedPosL; switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { case 5: { return HOST; } case 11: break; case 9: { return ACRONYM_DEP; } case 12: break; case 8: { return ACRONYM; } case 13: break; case 1: { /* ignore */ } goto case 14; case 14: break; case 7: { return NUM; } case 15: break; case 3: { return CJ; } case 16: break; case 2: { return ALPHANUM; } case 17: break; case 6: { return COMPANY; } case 18: break; case 4: { return APOSTROPHE; } case 19: break; case 10: { return EMAIL; } case 20: break; default: if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { zzAtEOF = true; return YYEOF; } else { ZzScanError(ZZ_NO_MATCH); } break; } } } } }