/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * */ using System; using System.Collections; using System.Collections.Generic; using System.Linq; using System.Text; namespace Lucene.Net.Search.Highlight.Test { /// /// The class performs token processing in strings /// public class Tokenizer : IEnumerator { /// Position over the string private long currentPos = 0; /// Include demiliters in the results. private bool includeDelims = false; /// Char representation of the String to tokenize. private char[] chars = null; //The tokenizer uses the default delimiter set: the space character, the tab character, the newline character, and the carriage-return character and the form-feed character private string delimiters = " \t\n\r\f"; /// /// Initializes a new class instance with a specified string to process /// /// String to tokenize public Tokenizer(System.String source) { this.chars = source.ToCharArray(); } /// /// Initializes a new class instance with a specified string to process /// and the specified token delimiters to use /// /// String to tokenize /// String containing the delimiters public Tokenizer(System.String source, System.String delimiters) : this(source) { this.delimiters = delimiters; } /// /// Initializes a new class instance with a specified string to process, the specified token /// delimiters to use, and whether the delimiters must be included in the results. /// /// String to tokenize /// String containing the delimiters /// Determines if delimiters are included in the results. public Tokenizer(System.String source, System.String delimiters, bool includeDelims) : this(source, delimiters) { this.includeDelims = includeDelims; } /// /// Returns the next token from the token list /// /// The string value of the token public System.String NextToken() { return NextToken(this.delimiters); } /// /// Returns the next token from the source string, using the provided /// token delimiters /// /// String containing the delimiters to use /// The string value of the token public System.String NextToken(System.String delimiters) { //According to documentation, the usage of the received delimiters should be temporary (only for this call). //However, it seems it is not true, so the following line is necessary. this.delimiters = delimiters; //at the end if (this.currentPos == this.chars.Length) throw new System.ArgumentOutOfRangeException(); //if over a delimiter and delimiters must be returned else if ((System.Array.IndexOf(delimiters.ToCharArray(), chars[this.currentPos]) != -1) && this.includeDelims) return "" + this.chars[this.currentPos++]; //need to get the token wo delimiters. else return NextToken(delimiters.ToCharArray()); } //Returns the nextToken wo delimiters private System.String NextToken(char[] delimiters) { string token = ""; long pos = this.currentPos; //skip possible delimiters while (System.Array.IndexOf(delimiters, this.chars[currentPos]) != -1) //The last one is a delimiter (i.e there is no more tokens) if (++this.currentPos == this.chars.Length) { this.currentPos = pos; throw new System.ArgumentOutOfRangeException(); } //getting the token while (System.Array.IndexOf(delimiters, this.chars[this.currentPos]) == -1) { token += this.chars[this.currentPos]; //the last one is not a delimiter if (++this.currentPos == this.chars.Length) break; } return token; } /// /// Determines if there are more tokens to return from the source string /// /// True or false, depending if there are more tokens public bool HasMoreTokens() { //keeping the current pos long pos = this.currentPos; try { this.NextToken(); } catch (System.ArgumentOutOfRangeException) { return false; } finally { this.currentPos = pos; } return true; } /// /// Remaining tokens count /// public int Count { get { //keeping the current pos long pos = this.currentPos; int i = 0; try { while (true) { this.NextToken(); i++; } } catch (System.ArgumentOutOfRangeException) { this.currentPos = pos; return i; } } } /// /// Performs the same action as NextToken. /// public string Current { get { return this.NextToken(); } } /// /// Performs the same action as NextToken. /// object IEnumerator.Current { get { return Current; } } /// // Performs the same action as HasMoreTokens. /// /// True or false, depending if there are more tokens public bool MoveNext() { return this.HasMoreTokens(); } /// /// Does nothing. /// public void Reset() { } /// /// Does nothing. /// public void Dispose() { } } }