/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* Generated By:JavaCC: Do not edit this line. StandardTokenizer.java */ using System; namespace Lucene.Net.Analysis.Standard { ///

A grammar-based tokenizer constructed with JavaCC. /// ///

This should be a good tokenizer for most European-language documents: /// ///

Splits words at punctuation characters, removing punctuation. However, a /// dot that's not followed by whitespace is considered part of a token. ///
Splits words at hyphens, unless there's a number in the token, in which case /// the whole token is interpreted as a product number and is not split. ///
Recognizes email addresses and internet hostnames as one token. ///

/// ///

Many applications have specific tokenizer needs. If this tokenizer does /// not suit your application, please consider copying this source code /// directory to your project and maintaining your own grammar-based tokenizer. ///

public class StandardTokenizer : Lucene.Net.Analysis.Tokenizer { ///

Constructs a tokenizer for this Reader.

public StandardTokenizer(System.IO.TextReader reader) : this(new FastCharStream(reader)) { this.input = reader; } ///

Returns the next token in the stream, or null at EOS. ///

The returned token's type is set to an element of {@link /// StandardTokenizerConstants#tokenImage}. ///

public override Lucene.Net.Analysis.Token Next() { Token token = null; switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk) { case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ALPHANUM: token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ALPHANUM); break; case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE: token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE); break; case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM: token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM); break; case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.COMPANY: token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.COMPANY); break; case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EMAIL: token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EMAIL); break; case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.HOST: token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.HOST); break; case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.NUM: token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.NUM); break; case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ: token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ); break; case 0: token = Jj_consume_token(0); break; default: jj_la1[0] = jj_gen; Jj_consume_token(- 1); throw new ParseException(); } if (token.kind == Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EOF) { { if (true) return null; } } else { { if (true) return new Lucene.Net.Analysis.Token(token.image, token.beginColumn, token.endColumn, Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[token.kind]); } } throw new System.ApplicationException("Missing return statement in function"); } ///

By default, closes the input Reader.

public override void Close() { token_source.Close(); base.Close(); } public StandardTokenizerTokenManager token_source; public Token token, jj_nt; private int jj_ntk; private int jj_gen; private int[] jj_la1 = new int[1]; private static int[] jj_la1_0_Renamed_Field; private static void jj_la1_0() { jj_la1_0_Renamed_Field = new int[]{0x10ff}; } public StandardTokenizer(CharStream stream) { token_source = new StandardTokenizerTokenManager(stream); token = new Token(); jj_ntk = - 1; jj_gen = 0; for (int i = 0; i < 1; i++) jj_la1[i] = - 1; } public virtual void ReInit(CharStream stream) { token_source.ReInit(stream); token = new Token(); jj_ntk = - 1; jj_gen = 0; for (int i = 0; i < 1; i++) jj_la1[i] = - 1; } public StandardTokenizer(StandardTokenizerTokenManager tm) { token_source = tm; token = new Token(); jj_ntk = - 1; jj_gen = 0; for (int i = 0; i < 1; i++) jj_la1[i] = - 1; } public virtual void ReInit(StandardTokenizerTokenManager tm) { token_source = tm; token = new Token(); jj_ntk = - 1; jj_gen = 0; for (int i = 0; i < 1; i++) jj_la1[i] = - 1; } private Token Jj_consume_token(int kind) { Token oldToken; if ((oldToken = token).next != null) token = token.next; else token = token.next = token_source.GetNextToken(); jj_ntk = - 1; if (token.kind == kind) { jj_gen++; return token; } token = oldToken; jj_kind = kind; throw GenerateParseException(); } public Token GetNextToken() { if (token.next != null) token = token.next; else token = token.next = token_source.GetNextToken(); jj_ntk = - 1; jj_gen++; return token; } public Token GetToken(int index) { Token t = token; for (int i = 0; i < index; i++) { if (t.next != null) t = t.next; else t = t.next = token_source.GetNextToken(); } return t; } private int Jj_ntk() { if ((jj_nt = token.next) == null) return (jj_ntk = (token.next = token_source.GetNextToken()).kind); else return (jj_ntk = jj_nt.kind); } private System.Collections.ArrayList jj_expentries = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); private int[] jj_expentry; private int jj_kind = - 1; public virtual ParseException GenerateParseException() { jj_expentries.Clear(); bool[] la1tokens = new bool[16]; for (int i = 0; i < 16; i++) { la1tokens[i] = false; } if (jj_kind >= 0) { la1tokens[jj_kind] = true; jj_kind = - 1; } for (int i = 0; i < 1; i++) { if (jj_la1[i] == jj_gen) { for (int j = 0; j < 32; j++) { if ((jj_la1_0_Renamed_Field[i] & (1 << j)) != 0) { la1tokens[j] = true; } } } } for (int i = 0; i < 16; i++) { if (la1tokens[i]) { jj_expentry = new int[1]; jj_expentry[0] = i; jj_expentries.Add(jj_expentry); } } int[][] exptokseq = new int[jj_expentries.Count][]; for (int i = 0; i < jj_expentries.Count; i++) { exptokseq[i] = (int[]) jj_expentries[i]; } return new ParseException(token, exptokseq, Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage); } public void Enable_tracing() { } public void Disable_tracing() { } static StandardTokenizer() { { jj_la1_0(); } } } }