/* * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; namespace Lucene.Net.Analysis { /// A Token is an occurence of a term from the text of a field. It consists of /// a term's text, the start and end offset of the term in the text of the field, /// and a type string. /// The start and end offsets permit applications to re-associate a token with /// its source text, e.g., to display highlighted query terms in a document /// browser, or to show matching text fragments in a KWIC (KeyWord In Context) /// display, etc. /// The type is an interned string, assigned by a lexical analyzer /// (a.k.a. tokenizer), naming the lexical or syntactic class that the token /// belongs to. For example an end of sentence marker token might be implemented /// with type "eos". The default token type is "word". /// public sealed class Token { internal System.String termText; // the text of the term internal int startOffset; // start in source text internal int endOffset; // end in source text internal System.String type = "word"; // lexical type private int positionIncrement = 1; /// Constructs a Token with the given term text, and start & end offsets. /// The type defaults to "word." /// public Token(System.String text, int start, int end) { termText = text; startOffset = start; endOffset = end; } /// Constructs a Token with the given text, start and end offsets, & type. public Token(System.String text, int start, int end, System.String typ) { termText = text; startOffset = start; endOffset = end; type = typ; } /// Set the position increment. This determines the position of this token /// relative to the previous Token in a {@link TokenStream}, used in phrase /// searching. /// ///

The default value is one. /// ///

Some common uses for this are:

///
/// /// public void SetPositionIncrement(int positionIncrement) { if (positionIncrement < 0) throw new System.ArgumentException("Increment must be zero or greater: " + positionIncrement); this.positionIncrement = positionIncrement; } /// Returns the position increment of this Token. /// /// public int GetPositionIncrement() { return positionIncrement; } /// Returns the Token's term text. public System.String TermText() { return termText; } /// Returns this Token's starting offset, the position of the first character /// corresponding to this token in the source text. /// Note that the difference between endOffset() and startOffset() may not be /// equal to termText.length(), as the term text may have been altered by a /// stemmer or some other filter. /// public int StartOffset() { return startOffset; } /// Returns this Token's ending offset, one greater than the position of the /// last character corresponding to this token in the source text. /// public int EndOffset() { return endOffset; } /// Returns this Token's lexical type. Defaults to "word". public System.String Type() { return type; } public override System.String ToString() { System.Text.StringBuilder sb = new System.Text.StringBuilder(); sb.Append("(" + termText + "," + startOffset + "," + endOffset); if (!type.Equals("word")) sb.Append(",type=" + type); if (positionIncrement != 1) sb.Append(",posIncr=" + positionIncrement); sb.Append(")"); return sb.ToString(); } } }