/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ /* Generated By:JavaCC: Do not edit this line. QueryParser.java */ using System; using System.Collections.Generic; using System.Diagnostics; using System.Globalization; using System.IO; using System.Text; using Lucene.Net.Analysis; using Lucene.Net.Analysis.Tokenattributes; using Lucene.Net.Search; using Lucene.Net.Support; using Lucene.Net.Util; using Analyzer = Lucene.Net.Analysis.Analyzer; using CachingTokenFilter = Lucene.Net.Analysis.CachingTokenFilter; using TokenStream = Lucene.Net.Analysis.TokenStream; using DateField = Lucene.Net.Documents.DateField; using DateTools = Lucene.Net.Documents.DateTools; using Term = Lucene.Net.Index.Term; using BooleanClause = Lucene.Net.Search.BooleanClause; using BooleanQuery = Lucene.Net.Search.BooleanQuery; using FuzzyQuery = Lucene.Net.Search.FuzzyQuery; using MatchAllDocsQuery = Lucene.Net.Search.MatchAllDocsQuery; using MultiPhraseQuery = Lucene.Net.Search.MultiPhraseQuery; using MultiTermQuery = Lucene.Net.Search.MultiTermQuery; using PhraseQuery = Lucene.Net.Search.PhraseQuery; using PrefixQuery = Lucene.Net.Search.PrefixQuery; using Query = Lucene.Net.Search.Query; using Single = Lucene.Net.Support.Single; using TermQuery = Lucene.Net.Search.TermQuery; using TermRangeQuery = Lucene.Net.Search.TermRangeQuery; using WildcardQuery = Lucene.Net.Search.WildcardQuery; using Version = Lucene.Net.Util.Version; namespace Lucene.Net.QueryParsers { /// This class is generated by JavaCC. The most important method is /// . /// /// The syntax for query strings is as follows: /// A Query is a series of clauses. /// A clause may be prefixed by: /// /// a plus (+) or a minus (-) sign, indicating /// that the clause is required or prohibited respectively; or /// a term followed by a colon, indicating the field to be searched. /// This enables one to construct queries which search multiple fields. /// /// /// A clause may be either: /// /// a term, indicating all the documents that contain this term; or /// a nested query, enclosed in parentheses. Note that this may be used /// with a +/- prefix to require any of a set of /// terms. /// /// /// Thus, in BNF, the query grammar is: /// /// Query ::= ( Clause )* /// Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) /// /// ///

/// Examples of appropriately formatted queries can be found in the query syntax /// documentation. ///

/// ///

/// In s, QueryParser tries to detect date values, e.g. /// date:[6/1/2005 TO 6/4/2005] produces a range query that searches /// for "date" fields between 2005-06-01 and 2005-06-04. Note that the format /// of the accepted input depends on the . /// By default a date is converted into a search term using the deprecated /// for compatibility reasons. /// To use the new to convert dates, a /// has to be set. ///

///

/// The date resolution that shall be used for RangeQueries can be set /// using /// or . The former /// sets the default date resolution for all fields, whereas the latter can /// be used to set field specific date resolutions. Field specific date /// resolutions take, if set, precedence over the default date resolution. ///

///

/// If you use neither nor in your /// index, you can create your own /// query parser that inherits QueryParser and overwrites /// to /// use a different method for date conversion. ///

/// ///

Note that QueryParser is not thread-safe.

/// ///

NOTE: there is a new QueryParser in contrib, which matches /// the same syntax as this class, but is more modular, /// enabling substantial customization to how a query is created. /// ///

NOTE: there is a new QueryParser in contrib, which matches /// the same syntax as this class, but is more modular, /// enabling substantial customization to how a query is created. /// NOTE: You must specify the required compatibility when /// creating QueryParser: /// /// As of 2.9, is true by default. /// ///

public class QueryParser : QueryParserConstants { private static int CONJ_NONE = 0; private static int CONJ_AND = 1; private static int CONJ_OR = 2; private static int MOD_NONE = 0; private static int MOD_NOT = 10; private static int MOD_REQ = 11; // make it possible to call setDefaultOperator() without accessing // the nested class: /// Alternative form of QueryParser.Operator.AND public static Operator AND_OPERATOR = Operator.AND; /// Alternative form of QueryParser.Operator.OR public static Operator OR_OPERATOR = Operator.OR; /// The actual operator that parser uses to combine query terms private Operator operator_Renamed = OR_OPERATOR; private bool lowercaseExpandedTerms = true; private RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; private bool allowLeadingWildcard = false; private bool enablePositionIncrements = true; // LUCENENET-423 - DateRange differences with Java and .NET private bool _useJavaStyleDateRangeParsing = false; private Analyzer analyzer; private String field; private int phraseSlop = 0; private float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; private int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; private System.Globalization.CultureInfo locale = System.Globalization.CultureInfo.CurrentCulture; // the default date resolution private DateTools.Resolution dateResolution = null; // maps field names to date resolutions private IDictionary fieldToDateResolution = null; // The collator to use when determining range inclusion, // for use when constructing RangeQuerys. private System.Globalization.CompareInfo rangeCollator = null; /* The default operator_Renamed for parsing queries. * Use {@link QueryParser#setDefaultOperator} to change it. */ public enum Operator { OR, AND } /* Constructs a query parser. * @param matchVersion Lucene version to match. See above) * @param f the default field for query terms. * @param a used to find terms in the query text. */ public QueryParser(Version matchVersion, String f, Analyzer a) : this(new FastCharStream(new StringReader(""))) { analyzer = a; field = f; if (matchVersion.OnOrAfter(Version.LUCENE_29)) { enablePositionIncrements = true; } else { enablePositionIncrements = false; } // LUCENENET-423 - DateRange differences with Java and .NET if (matchVersion.OnOrAfter(Version.LUCENE_30)) { _useJavaStyleDateRangeParsing = true; } } /// Parses a query string, returning a {@link Lucene.Net.Search.Query}. /// the query string to be parsed. /// /// ParseException if the parsing fails public virtual Query Parse(String query) { ReInit(new FastCharStream(new StringReader(query))); try { // TopLevelQuery is a Query followed by the end-of-input (EOF) Query res = TopLevelQuery(field); return res ?? NewBooleanQuery(false); } catch (ParseException tme) { // rethrow to include the original query: throw new ParseException("Cannot parse '" + query + "': " + tme.Message, tme); } catch (TokenMgrError tme) { throw new ParseException("Cannot parse '" + query + "': " + tme.Message, tme); } catch (BooleanQuery.TooManyClauses tmc) { throw new ParseException("Cannot parse '" + query + "': too many bool clauses", tmc); } } /// Returns the analyzer. public virtual Analyzer Analyzer { get { return analyzer; } } /// Returns the field. public virtual string Field { get { return field; } } /// /// Gets or sets the minimal similarity for fuzzy queries. /// Default is 0.5f. /// public virtual float FuzzyMinSim { get { return fuzzyMinSim; } set { this.fuzzyMinSim = value; } } /// Gets or sets the prefix length for fuzzy queries. /// Returns the fuzzyPrefixLength. public virtual int FuzzyPrefixLength { get { return fuzzyPrefixLength; } set { this.fuzzyPrefixLength = value; } } /// Gets or sets the default slop for phrases. If zero, then exact phrase matches /// are required. Default value is zero. /// public virtual int PhraseSlop { set { this.phraseSlop = value; } get { return phraseSlop; } } /// Set to true to allow leading wildcard characters. ///

/// When set, * or ? are allowed as /// the first character of a PrefixQuery and WildcardQuery. /// Note that this can produce very slow /// queries on big indexes. ///

/// Default: false. ///

public virtual bool AllowLeadingWildcard { set { this.allowLeadingWildcard = value; } get { return allowLeadingWildcard; } } /// Set to true to enable position increments in result query. ///

/// When set, result phrase and multi-phrase queries will /// be aware of position increments. /// Useful when e.g. a StopFilter increases the position increment of /// the token that follows an omitted token. ///

/// Default: false. ///

public virtual bool EnablePositionIncrements { set { this.enablePositionIncrements = value; } get { return enablePositionIncrements; } } /// Gets or sets the boolean operator of the QueryParser. /// In default mode (OR_OPERATOR) terms without any modifiers /// are considered optional: for example capital of Hungary is equal to /// capital OR of OR Hungary.
/// In AND_OPERATOR mode terms are considered to be in conjunction: the /// above mentioned query is parsed as capital AND of AND Hungary ///
public virtual Operator DefaultOperator { set { this.operator_Renamed = value; } get { return operator_Renamed; } } /// Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically /// lower-cased or not. Default is true. /// public virtual bool LowercaseExpandedTerms { set { this.lowercaseExpandedTerms = value; } get { return lowercaseExpandedTerms; } } /// By default QueryParser uses /// when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it /// a) Runs faster b) Does not have the scarcity of terms unduly influence score /// c) avoids any "TooManyBooleanClauses" exception. /// However, if your application really needs to use the /// old-fashioned BooleanQuery expansion rewriting and the above /// points are not relevant then use this to change /// the rewrite method. /// public virtual RewriteMethod MultiTermRewriteMethod { set { multiTermRewriteMethod = value; } get { return multiTermRewriteMethod; } } /// Gets or sets locale used by date range parsing. public virtual CultureInfo Locale { set { this.locale = value; } get { return locale; } } /// Sets the default date resolution used by RangeQueries for fields for which no /// specific date resolutions has been set. Field specific resolutions can be set /// with {@link #SetDateResolution(String, DateTools.Resolution)}. /// /// /// the default date resolution to set /// public virtual void SetDateResolution(DateTools.Resolution dateResolution) { this.dateResolution = dateResolution; } /// Sets the date resolution used by RangeQueries for a specific field. /// /// /// field for which the date resolution is to be set /// /// date resolution to set /// public virtual void SetDateResolution(String fieldName, DateTools.Resolution dateResolution) { if (fieldName == null) { throw new ArgumentException("Field cannot be null."); } if (fieldToDateResolution == null) { // lazily initialize HashMap fieldToDateResolution = new HashMap(); } fieldToDateResolution.Add(fieldName, dateResolution); } /// Returns the date resolution that is used by RangeQueries for the given field. /// Returns null, if no default or field specific date resolution has been set /// for the given field. /// public virtual DateTools.Resolution getDateResolution(String fieldName) { if (fieldName == null) { throw new ArgumentException("Field cannot be null."); } if (fieldToDateResolution == null) { // no field specific date resolutions set; return default date resolution instead return this.dateResolution; } DateTools.Resolution resolution = fieldToDateResolution[fieldName]; if (resolution == null) { // no date resolutions set for the given field; return default date resolution instead resolution = this.dateResolution; } return resolution; } /// Gets or sets the collator used to determine index term inclusion in ranges /// for RangeQuerys. ///

/// WARNING: Setting the rangeCollator to a non-null /// collator using this method will cause every single index Term in the /// Field referenced by lowerTerm and/or upperTerm to be examined. /// Depending on the number of index Terms in this Field, the operation could /// be very slow. /// ///

/// the collator to use when constructing RangeQuerys public virtual CompareInfo RangeCollator { set { rangeCollator = value; } get { return rangeCollator; } } protected internal virtual void AddClause(List clauses, int conj, int mods, Query q) { bool required, prohibited; // If this term is introduced by AND, make the preceding term required, // unless it's already prohibited if (clauses.Count > 0 && conj == CONJ_AND) { BooleanClause c = clauses[clauses.Count - 1]; if (!c.IsProhibited) c.Occur = Occur.MUST; } if (clauses.Count > 0 && operator_Renamed == AND_OPERATOR && conj == CONJ_OR) { // If this term is introduced by OR, make the preceding term optional, // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) // notice if the input is a OR b, first term is parsed as required; without // this modification a OR b would parsed as +a OR b BooleanClause c = clauses[clauses.Count - 1]; if (!c.IsProhibited) c.Occur = Occur.SHOULD; } // We might have been passed a null query; the term might have been // filtered away by the analyzer. if (q == null) return; if (operator_Renamed == OR_OPERATOR) { // We set REQUIRED if we're introduced by AND or +; PROHIBITED if // introduced by NOT or -; make sure not to set both. prohibited = (mods == MOD_NOT); required = (mods == MOD_REQ); if (conj == CONJ_AND && !prohibited) { required = true; } } else { // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED // if not PROHIBITED and not introduced by OR prohibited = (mods == MOD_NOT); required = (!prohibited && conj != CONJ_OR); } if (required && !prohibited) clauses.Add(NewBooleanClause(q, Occur.MUST)); else if (!required && !prohibited) clauses.Add(NewBooleanClause(q, Occur.SHOULD)); else if (!required && prohibited) clauses.Add(NewBooleanClause(q, Occur.MUST_NOT)); else throw new SystemException("Clause cannot be both required and prohibited"); } /// throw in overridden method to disallow /// protected internal virtual Query GetFieldQuery(String field, String queryText) { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count TokenStream source; try { source = analyzer.ReusableTokenStream(field, new StringReader(queryText)); source.Reset(); } catch (IOException) { source = analyzer.TokenStream(field, new StringReader(queryText)); } CachingTokenFilter buffer = new CachingTokenFilter(source); ITermAttribute termAtt = null; IPositionIncrementAttribute posIncrAtt = null; int numTokens = 0; bool success = false; try { buffer.Reset(); success = true; } catch (IOException) { // success==false if we hit an exception } if (success) { if (buffer.HasAttribute()) { termAtt = buffer.GetAttribute(); } if (buffer.HasAttribute()) { posIncrAtt = buffer.GetAttribute(); } } int positionCount = 0; bool severalTokensAtSamePosition = false; bool hasMoreTokens = false; if (termAtt != null) { try { hasMoreTokens = buffer.IncrementToken(); while (hasMoreTokens) { numTokens++; int positionIncrement = (posIncrAtt != null) ? posIncrAtt.PositionIncrement : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } hasMoreTokens = buffer.IncrementToken(); } } catch (IOException) { // ignore } } try { // rewind the buffer stream buffer.Reset(); // close original stream - all tokens buffered source.Close(); } catch (IOException) { // ignore } if (numTokens == 0) return null; else if (numTokens == 1) { String term = null; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext); term = termAtt.Term; } catch (IOException) { // safe to ignore, because we know the number of tokens } return NewTermQuery(new Term(field, term)); } else { if (severalTokensAtSamePosition) { if (positionCount == 1) { // no phrase query: BooleanQuery q = NewBooleanQuery(true); for (int i = 0; i < numTokens; i++) { String term = null; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext); term = termAtt.Term; } catch (IOException) { // safe to ignore, because we know the number of tokens } Query currentQuery = NewTermQuery( new Term(field, term)); q.Add(currentQuery, Occur.SHOULD); } return q; } else { // phrase query: MultiPhraseQuery mpq = NewMultiPhraseQuery(); mpq.Slop = phraseSlop; List multiTerms = new List(); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); term = termAtt.Term; if (posIncrAtt != null) { positionIncrement = posIncrAtt.PositionIncrement; } } catch (IOException) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && multiTerms.Count > 0) { if (enablePositionIncrements) { mpq.Add(multiTerms.ToArray(), position); } else { mpq.Add(multiTerms.ToArray()); } multiTerms.Clear(); } position += positionIncrement; multiTerms.Add(new Term(field, term)); } if (enablePositionIncrements) { mpq.Add(multiTerms.ToArray(), position); } else { mpq.Add(multiTerms.ToArray()); } return mpq; } } else { PhraseQuery pq = NewPhraseQuery(); pq.Slop = phraseSlop; int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); term = termAtt.Term; if (posIncrAtt != null) { positionIncrement = posIncrAtt.PositionIncrement; } } catch (IOException) { // safe to ignore, because we know the number of tokens } if (enablePositionIncrements) { position += positionIncrement; pq.Add(new Term(field, term), position); } else { pq.Add(new Term(field, term)); } } return pq; } } } /// Base implementation delegates to {@link #GetFieldQuery(String,String)}. /// This method may be overridden, for example, to return /// a SpanNearQuery instead of a PhraseQuery. /// /// /// throw in overridden method to disallow /// protected internal virtual Query GetFieldQuery(String field, String queryText, int slop) { Query query = GetFieldQuery(field, queryText); if (query is PhraseQuery) { ((PhraseQuery)query).Slop = slop; } if (query is MultiPhraseQuery) { ((MultiPhraseQuery)query).Slop = slop; } return query; } /// throw in overridden method to disallow /// protected internal virtual Query GetRangeQuery(String field, String part1, String part2, bool inclusive) { if (lowercaseExpandedTerms) { part1 = part1.ToLower(); part2 = part2.ToLower(); } try { DateTime d1, d2; if (_useJavaStyleDateRangeParsing) { // TODO: This doesn't emulate java perfectly. // Java allows parsing of the string up to the end of the pattern // and then ignores everything else. .NET will throw an exception, // so this will fail in those cases, though the code below is clear // that users can only specify the date, not the time. var shortFormat = locale.DateTimeFormat.ShortDatePattern; d1 = DateTime.ParseExact(part1, shortFormat, locale); d2 = DateTime.ParseExact(part2, shortFormat, locale); } else { d1 = DateTime.Parse(part1, locale); d2 = DateTime.Parse(part2, locale); } if (inclusive) { // The user can only specify the date, not the time, so make sure // the time is set to the latest possible time of that date to really // include all documents: var cal = locale.Calendar; d2 = cal.AddHours(d2, 23); d2 = cal.AddMinutes(d2, 59); d2 = cal.AddSeconds(d2, 59); d2 = cal.AddMilliseconds(d2, 999); } DateTools.Resolution resolution = getDateResolution(field); if (resolution == null) { // no default or field specific date resolution has been set, // use deprecated DateField to maintain compatibility with // pre-1.9 Lucene versions. part1 = DateField.DateToString(d1); part2 = DateField.DateToString(d2); } else { part1 = DateTools.DateToString(d1, resolution); part2 = DateTools.DateToString(d2, resolution); } } catch (Exception) { } return NewRangeQuery(field, part1, part2, inclusive); } /// Builds a new BooleanQuery instance /// disable coord /// /// new BooleanQuery instance /// protected internal virtual BooleanQuery NewBooleanQuery(bool disableCoord) { return new BooleanQuery(disableCoord); } /// Builds a new BooleanClause instance /// sub query /// /// how this clause should occur when matching documents /// /// new BooleanClause instance /// protected internal virtual BooleanClause NewBooleanClause(Query q, Occur occur) { return new BooleanClause(q, occur); } /// Builds a new TermQuery instance /// term /// /// new TermQuery instance /// protected internal virtual Query NewTermQuery(Term term) { return new TermQuery(term); } /// Builds a new PhraseQuery instance /// new PhraseQuery instance /// protected internal virtual PhraseQuery NewPhraseQuery() { return new PhraseQuery(); } /// Builds a new MultiPhraseQuery instance /// new MultiPhraseQuery instance /// protected internal virtual MultiPhraseQuery NewMultiPhraseQuery() { return new MultiPhraseQuery(); } /// Builds a new PrefixQuery instance /// Prefix term /// /// new PrefixQuery instance /// protected internal virtual Query NewPrefixQuery(Term prefix) { return new PrefixQuery(prefix) { RewriteMethod = multiTermRewriteMethod }; } /// Builds a new FuzzyQuery instance /// Term /// /// minimum similarity /// /// prefix length /// /// new FuzzyQuery Instance /// protected internal virtual Query NewFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) { // FuzzyQuery doesn't yet allow constant score rewrite return new FuzzyQuery(term, minimumSimilarity, prefixLength); } /// Builds a new TermRangeQuery instance /// Field /// /// min /// /// max /// /// true if range is inclusive /// /// new TermRangeQuery instance /// protected internal virtual Query NewRangeQuery(String field, String part1, String part2, bool inclusive) { return new TermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator) { RewriteMethod = multiTermRewriteMethod }; } /// Builds a new MatchAllDocsQuery instance /// new MatchAllDocsQuery instance /// protected internal virtual Query NewMatchAllDocsQuery() { return new MatchAllDocsQuery(); } /// Builds a new WildcardQuery instance /// wildcard term /// /// new WildcardQuery instance /// protected internal virtual Query NewWildcardQuery(Term t) { return new WildcardQuery(t) { RewriteMethod = multiTermRewriteMethod }; } /// Factory method for generating query, given a set of clauses. /// By default creates a boolean query composed of clauses passed in. /// /// Can be overridden by extending classes, to modify query being /// returned. /// /// /// List that contains {@link BooleanClause} instances /// to join. /// /// /// Resulting {@link Query} object. /// /// throw in overridden method to disallow /// protected internal virtual Query GetBooleanQuery(IList clauses) { return GetBooleanQuery(clauses, false); } /// Factory method for generating query, given a set of clauses. /// By default creates a boolean query composed of clauses passed in. /// /// Can be overridden by extending classes, to modify query being /// returned. /// /// /// List that contains {@link BooleanClause} instances /// to join. /// /// true if coord scoring should be disabled. /// /// /// Resulting {@link Query} object. /// /// throw in overridden method to disallow /// protected internal virtual Query GetBooleanQuery(IList clauses, bool disableCoord) { if (clauses.Count == 0) { return null; // all clause words were filtered away by the analyzer. } BooleanQuery query = NewBooleanQuery(disableCoord); foreach (var clause in clauses) { query.Add(clause); } return query; } /// Factory method for generating a query. Called when parser /// parses an input term token that contains one or more wildcard /// characters (? and *), but is not a prefix term token (one /// that has just a single * character at the end) ///

/// Depending on settings, prefix term may be lower-cased /// automatically. It will not go through the default Analyzer, /// however, since normal Analyzers are unlikely to work properly /// with wildcard templates. ///

/// Can be overridden by extending classes, to provide custom handling for /// wildcard queries, which may be necessary due to missing analyzer calls. /// ///

/// Name of the field query will use. /// /// Term token that contains one or more wild card /// characters (? or *), but is not simple prefix term /// /// /// Resulting {@link Query} built for the term /// /// throw in overridden method to disallow /// protected internal virtual Query GetWildcardQuery(String field, String termStr) { if ("*".Equals(field)) { if ("*".Equals(termStr)) return NewMatchAllDocsQuery(); } if (!allowLeadingWildcard && (termStr.StartsWith("*") || termStr.StartsWith("?"))) throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery"); if (lowercaseExpandedTerms) { termStr = termStr.ToLower(); } Term t = new Term(field, termStr); return NewWildcardQuery(t); } /// Factory method for generating a query (similar to /// {@link #getWildcardQuery}). Called when parser parses an input term /// token that uses prefix notation; that is, contains a single '*' wildcard /// character as its last character. Since this is a special case /// of generic wildcard term, and such a query can be optimized easily, /// this usually results in a different query object. ///

/// Depending on settings, a prefix term may be lower-cased /// automatically. It will not go through the default Analyzer, /// however, since normal Analyzers are unlikely to work properly /// with wildcard templates. ///

/// Can be overridden by extending classes, to provide custom handling for /// wild card queries, which may be necessary due to missing analyzer calls. /// ///

/// Name of the field query will use. /// /// Term token to use for building term for the query /// (without trailing '*' character!) /// /// /// Resulting {@link Query} built for the term /// /// throw in overridden method to disallow /// protected internal virtual Query GetPrefixQuery(String field, String termStr) { if (!allowLeadingWildcard && termStr.StartsWith("*")) throw new ParseException("'*' not allowed as first character in PrefixQuery"); if (lowercaseExpandedTerms) { termStr = termStr.ToLower(); } Term t = new Term(field, termStr); return NewPrefixQuery(t); } /// Factory method for generating a query (similar to /// {@link #getWildcardQuery}). Called when parser parses /// an input term token that has the fuzzy suffix (~) appended. /// /// /// Name of the field query will use. /// /// Term token to use for building term for the query /// /// /// Resulting {@link Query} built for the term /// /// throw in overridden method to disallow /// protected internal virtual Query GetFuzzyQuery(String field, String termStr, float minSimilarity) { if (lowercaseExpandedTerms) { termStr = termStr.ToLower(); } Term t = new Term(field, termStr); return NewFuzzyQuery(t, minSimilarity, fuzzyPrefixLength); } /// Returns a String where the escape char has been /// removed, or kept only once if there was a double escape. /// /// Supports escaped unicode characters, e. g. translates /// \\u0041 to A. /// /// private String DiscardEscapeChar(String input) { // Create char array to hold unescaped char sequence char[] output = new char[input.Length]; // The Length of the output can be less than the input // due to discarded escape chars. This variable holds // the actual Length of the output int Length = 0; // We remember whether the last processed character was // an escape character bool lastCharWasEscapeChar = false; // The multiplier the current unicode digit must be multiplied with. // E. g. the first digit must be multiplied with 16^3, the second with 16^2... int codePointMultiplier = 0; // Used to calculate the codepoint of the escaped unicode character int codePoint = 0; for (int i = 0; i < input.Length; i++) { char curChar = input[i]; if (codePointMultiplier > 0) { codePoint += HexToInt(curChar) * codePointMultiplier; codePointMultiplier = Number.URShift(codePointMultiplier, 4); if (codePointMultiplier == 0) { output[Length++] = (char)codePoint; codePoint = 0; } } else if (lastCharWasEscapeChar) { if (curChar == 'u') { // found an escaped unicode character codePointMultiplier = 16 * 16 * 16; } else { // this character was escaped output[Length] = curChar; Length++; } lastCharWasEscapeChar = false; } else { if (curChar == '\\') { lastCharWasEscapeChar = true; } else { output[Length] = curChar; Length++; } } } if (codePointMultiplier > 0) { throw new ParseException("Truncated unicode escape sequence."); } if (lastCharWasEscapeChar) { throw new ParseException("Term can not end with escape character."); } return new String(output, 0, Length); } /// Returns the numeric value of the hexadecimal character private static int HexToInt(char c) { if ('0' <= c && c <= '9') { return c - '0'; } else if ('a' <= c && c <= 'f') { return c - 'a' + 10; } else if ('A' <= c && c <= 'F') { return c - 'A' + 10; } else { throw new ParseException("None-hex character in unicode escape sequence: " + c); } } /// Returns a String where those characters that QueryParser /// expects to be escaped are escaped by a preceding \. /// public static String Escape(String s) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < s.Length; i++) { char c = s[i]; // These characters are part of the query syntax and must be escaped if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&') { sb.Append('\\'); } sb.Append(c); } return sb.ToString(); } /// Command line tool to test QueryParser, using {@link Lucene.Net.Analysis.SimpleAnalyzer}. /// Usage:
/// java Lucene.Net.QueryParsers.QueryParser <input> ///
[STAThread] public static void Main(String[] args) { if (args.Length == 0) { Console.WriteLine("Usage: java org.apache.lucene.queryParser.QueryParser "); Environment.Exit(0); } QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", new SimpleAnalyzer()); Query q = qp.Parse(args[0]); Console.WriteLine(q.ToString("field")); } // * Query ::= ( Clause )* // * Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" ) public int Conjunction() { int ret = CONJ_NONE; switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case AndToken: case OrToken: switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case AndToken: Jj_consume_token(AndToken); ret = CONJ_AND; break; case OrToken: Jj_consume_token(OrToken); ret = CONJ_OR; break; default: jj_la1[0] = jj_gen; Jj_consume_token(-1); throw new ParseException(); } break; default: jj_la1[1] = jj_gen; break; } { if (true) return ret; } throw new ApplicationException("Missing return statement in function"); } public int Modifiers() { int ret = MOD_NONE; switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case NotToken: case PlusToken: case MinusToken: switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case PlusToken: Jj_consume_token(PlusToken); ret = MOD_REQ; break; case MinusToken: Jj_consume_token(MinusToken); ret = MOD_NOT; break; case NotToken: Jj_consume_token(NotToken); ret = MOD_NOT; break; default: jj_la1[2] = jj_gen; Jj_consume_token(-1); throw new ParseException(); } break; default: jj_la1[3] = jj_gen; break; } { if (true) return ret; } throw new Exception("Missing return statement in function"); } // This makes sure that there is no garbage after the query string public Query TopLevelQuery(String field) { Query q; q = Query(field); Jj_consume_token(0); { if (true) return q; } throw new Exception("Missing return statement in function"); } public Query Query(String field) { List clauses = new List(); Query q, firstQuery = null; int conj, mods; mods = Modifiers(); q = Clause(field); AddClause(clauses, CONJ_NONE, mods, q); if (mods == MOD_NONE) firstQuery = q; while (true) { switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case AndToken: case OrToken: case NotToken: case PlusToken: case MinusToken: case LParanToken: case StarToken: case QuotedToken: case TermToken: case PrefixTermToken: case WildTermToken: case RangeInStartToken: case RangeExStartToken: case NumberToken: break; default: jj_la1[4] = jj_gen; goto label_1; } conj = Conjunction(); mods = Modifiers(); q = Clause(field); AddClause(clauses, conj, mods, q); } label_1: if (clauses.Count == 1 && firstQuery != null) { if (true) return firstQuery; } return GetBooleanQuery(clauses); } public Query Clause(String field) { Query q; Token fieldToken = null, boost = null; if (Jj_2_1(2)) { switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case TermToken: fieldToken = Jj_consume_token(TermToken); Jj_consume_token(ColonToken); field = DiscardEscapeChar(fieldToken.image); break; case StarToken: Jj_consume_token(StarToken); Jj_consume_token(ColonToken); field = "*"; break; default: jj_la1[5] = jj_gen; Jj_consume_token(-1); throw new ParseException(); } } else { ; } switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case StarToken: case QuotedToken: case TermToken: case PrefixTermToken: case WildTermToken: case RangeInStartToken: case RangeExStartToken: case NumberToken: q = Term(field); break; case LParanToken: Jj_consume_token(LParanToken); q = Query(field); Jj_consume_token(RParenToken); switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case CaratToken: Jj_consume_token(CaratToken); boost = Jj_consume_token(NumberToken); break; default: jj_la1[6] = jj_gen; break; } break; default: jj_la1[7] = jj_gen; Jj_consume_token(-1); throw new ParseException(); } if (boost != null) { try { float f = Single.Parse(boost.image); q.Boost = f; } catch (Exception) { } } { if (true) return q; } throw new Exception("Missing return statement in function"); } public Query Term(String field) { Token term, boost = null, fuzzySlop = null, goop1, goop2; bool prefix = false; bool wildcard = false; bool fuzzy = false; Query q; switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case StarToken: case TermToken: case PrefixTermToken: case WildTermToken: case NumberToken: switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case TermToken: term = Jj_consume_token(TermToken); break; case StarToken: term = Jj_consume_token(StarToken); wildcard = true; break; case PrefixTermToken: term = Jj_consume_token(PrefixTermToken); prefix = true; break; case WildTermToken: term = Jj_consume_token(WildTermToken); wildcard = true; break; case NumberToken: term = Jj_consume_token(NumberToken); break; default: jj_la1[8] = jj_gen; Jj_consume_token(-1); throw new ParseException(); } switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case FuzzySlopToken: fuzzySlop = Jj_consume_token(FuzzySlopToken); fuzzy = true; break; default: jj_la1[9] = jj_gen; break; } switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case CaratToken: Jj_consume_token(CaratToken); boost = Jj_consume_token(NumberToken); switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case FuzzySlopToken: fuzzySlop = Jj_consume_token(FuzzySlopToken); fuzzy = true; break; default: jj_la1[10] = jj_gen; break; } break; default: jj_la1[11] = jj_gen; break; } String termImage = DiscardEscapeChar(term.image); if (wildcard) { q = GetWildcardQuery(field, termImage); } else if (prefix) { q = GetPrefixQuery(field, DiscardEscapeChar(term.image.Substring(0, (term.image.Length - 1) - (0)))); } else if (fuzzy) { float fms = fuzzyMinSim; try { fms = Single.Parse(fuzzySlop.image.Substring(1)); } catch (Exception) { } if (fms < 0.0f || fms > 1.0f) { { if (true) throw new ParseException( "Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !"); } } q = GetFuzzyQuery(field, termImage, fms); } else { q = GetFieldQuery(field, termImage); } break; case RangeInStartToken: Jj_consume_token(RangeInStartToken); switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case RangeInGoopToken: goop1 = Jj_consume_token(RangeInGoopToken); break; case RangeInQuotedToken: goop1 = Jj_consume_token(RangeInQuotedToken); break; default: jj_la1[12] = jj_gen; Jj_consume_token(-1); throw new ParseException(); } switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case RangeInToToken: Jj_consume_token(RangeInToToken); break; default: jj_la1[13] = jj_gen; break; } switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case RangeInGoopToken: goop2 = Jj_consume_token(RangeInGoopToken); break; case RangeInQuotedToken: goop2 = Jj_consume_token(RangeInQuotedToken); break; default: jj_la1[14] = jj_gen; Jj_consume_token(-1); throw new ParseException(); } Jj_consume_token(RangeInEndToken); switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case CaratToken: Jj_consume_token(CaratToken); boost = Jj_consume_token(NumberToken); break; default: jj_la1[15] = jj_gen; break; } if (goop1.kind == RangeInQuotedToken) { goop1.image = goop1.image.Substring(1, (goop1.image.Length - 1) - (1)); } if (goop2.kind == RangeInQuotedToken) { goop2.image = goop2.image.Substring(1, (goop2.image.Length - 1) - (1)); } q = GetRangeQuery(field, DiscardEscapeChar(goop1.image), DiscardEscapeChar(goop2.image), true); break; case RangeExStartToken: Jj_consume_token(RangeExStartToken); switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case RangeExGoopToken: goop1 = Jj_consume_token(RangeExGoopToken); break; case RangeExQuotedToken: goop1 = Jj_consume_token(RangeExQuotedToken); break; default: jj_la1[16] = jj_gen; Jj_consume_token(-1); throw new ParseException(); } switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case RangeExToToken: Jj_consume_token(RangeExToToken); break; default: jj_la1[17] = jj_gen; break; } switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case RangeExGoopToken: goop2 = Jj_consume_token(RangeExGoopToken); break; case RangeExQuotedToken: goop2 = Jj_consume_token(RangeExQuotedToken); break; default: jj_la1[18] = jj_gen; Jj_consume_token(-1); throw new ParseException(); } Jj_consume_token(RangeExEndToken); switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case CaratToken: Jj_consume_token(CaratToken); boost = Jj_consume_token(NumberToken); break; default: jj_la1[19] = jj_gen; break; } if (goop1.kind == RangeExQuotedToken) { goop1.image = goop1.image.Substring(1, (goop1.image.Length - 1) - (1)); } if (goop2.kind == RangeExQuotedToken) { goop2.image = goop2.image.Substring(1, (goop2.image.Length - 1) - (1)); } q = GetRangeQuery(field, DiscardEscapeChar(goop1.image), DiscardEscapeChar(goop2.image), false); break; case QuotedToken: term = Jj_consume_token(QuotedToken); switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case FuzzySlopToken: fuzzySlop = Jj_consume_token(FuzzySlopToken); break; default: jj_la1[20] = jj_gen; break; } switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) { case CaratToken: Jj_consume_token(CaratToken); boost = Jj_consume_token(NumberToken); break; default: jj_la1[21] = jj_gen; break; } int s = phraseSlop; if (fuzzySlop != null) { try { s = (int)Single.Parse(fuzzySlop.image.Substring(1)); } catch (Exception) { } } q = GetFieldQuery(field, DiscardEscapeChar(term.image.Substring(1, (term.image.Length - 1) - (1))), s); break; default: jj_la1[22] = jj_gen; Jj_consume_token(-1); throw new ParseException(); } if (boost != null) { float f = (float)1.0; try { f = Single.Parse(boost.image); } catch (Exception) { /* Should this be handled somehow? (defaults to "no boost", if * boost number is invalid) */ } // avoid boosting null queries, such as those caused by stop words if (q != null) { q.Boost = f; } } { if (true) return q; } throw new Exception("Missing return statement in function"); } private bool Jj_2_1(int xla) { jj_la = xla; jj_lastpos = jj_scanpos = token; try { return !Jj_3_1(); } catch (LookaheadSuccess) { return true; } finally { Jj_save(0, xla); } } private bool Jj_3R_2() { if (jj_scan_token(TermToken)) return true; if (jj_scan_token(ColonToken)) return true; return false; } private bool Jj_3_1() { Token xsp; xsp = jj_scanpos; if (Jj_3R_2()) { jj_scanpos = xsp; if (Jj_3R_3()) return true; } return false; } private bool Jj_3R_3() { if (jj_scan_token(StarToken)) return true; if (jj_scan_token(ColonToken)) return true; return false; } /* Generated Token Manager. */ public QueryParserTokenManager token_source; /* Current token. */ public Token token; /* Next token. */ public Token jj_nt; private int jj_ntk; private Token jj_scanpos, jj_lastpos; private int jj_la; private int jj_gen; private int[] jj_la1 = new int[23]; private static int[] jj_la1_0; private static int[] jj_la1_1; private static void Jj_la1_init_0() { jj_la1_0 = new int[] { 0x300, 0x300, 0x1c00, 0x1c00, 0x3ed3f00, 0x90000, 0x20000, 0x3ed2000, 0x2690000, 0x100000, 0x100000, 0x20000, 0x30000000, 0x4000000, 0x30000000, 0x20000, 0x0, 0x40000000, 0x0, 0x20000 , 0x100000, 0x20000, 0x3ed0000, }; } private static void Jj_la1_init_1() { jj_la1_1 = new int[] { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, }; } private JJCalls[] jj_2_rtns = new JJCalls[1]; private bool jj_rescan = false; private int jj_gc = 0; /// Constructor with user supplied CharStream. protected internal QueryParser(ICharStream stream) { token_source = new QueryParserTokenManager(stream); token = new Token(); jj_ntk = -1; jj_gen = 0; for (int i = 0; i < 23; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.Length; i++) jj_2_rtns[i] = new JJCalls(); } /// Reinitialise. public void ReInit(ICharStream stream) { token_source.ReInit(stream); token = new Token(); jj_ntk = -1; jj_gen = 0; for (int i = 0; i < 23; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.Length; i++) jj_2_rtns[i] = new JJCalls(); } /// Constructor with generated Token Manager. protected QueryParser(QueryParserTokenManager tm) { token_source = tm; token = new Token(); jj_ntk = -1; jj_gen = 0; for (int i = 0; i < 23; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.Length; i++) jj_2_rtns[i] = new JJCalls(); } /// Reinitialise. public void ReInit(QueryParserTokenManager tm) { token_source = tm; token = new Token(); jj_ntk = -1; jj_gen = 0; for (int i = 0; i < 23; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.Length; i++) jj_2_rtns[i] = new JJCalls(); } private Token Jj_consume_token(int kind) { Token oldToken; if ((oldToken = token).next != null) token = token.next; else token = token.next = token_source.GetNextToken(); jj_ntk = -1; if (token.kind == kind) { jj_gen++; if (++jj_gc > 100) { jj_gc = 0; for (int i = 0; i < jj_2_rtns.Length; i++) { JJCalls c = jj_2_rtns[i]; while (c != null) { if (c.gen < jj_gen) c.first = null; c = c.next; } } } return token; } token = oldToken; jj_kind = kind; throw GenerateParseException(); } [Serializable] private sealed class LookaheadSuccess : System.Exception { } private LookaheadSuccess jj_ls = new LookaheadSuccess(); private bool jj_scan_token(int kind) { if (jj_scanpos == jj_lastpos) { jj_la--; if (jj_scanpos.next == null) { jj_lastpos = jj_scanpos = jj_scanpos.next = token_source.GetNextToken(); } else { jj_lastpos = jj_scanpos = jj_scanpos.next; } } else { jj_scanpos = jj_scanpos.next; } if (jj_rescan) { int i = 0; Token tok = token; while (tok != null && tok != jj_scanpos) { i++; tok = tok.next; } if (tok != null) Jj_add_error_token(kind, i); } if (jj_scanpos.kind != kind) return true; if (jj_la == 0 && jj_scanpos == jj_lastpos) throw jj_ls; return false; } /// Get the next Token. public Token GetNextToken() { if (token.next != null) token = token.next; else token = token.next = token_source.GetNextToken(); jj_ntk = -1; jj_gen++; return token; } /// Get the specific Token. public Token getToken(int index) { Token t = token; for (int i = 0; i < index; i++) { if (t.next != null) t = t.next; else t = t.next = token_source.GetNextToken(); } return t; } private int Jj_ntk() { if ((jj_nt = token.next) == null) return (jj_ntk = (token.next = token_source.GetNextToken()).kind); else return (jj_ntk = jj_nt.kind); } private List jj_expentries = new List(); private int[] jj_expentry; private int jj_kind = -1; private int[] jj_lasttokens = new int[100]; private int jj_endpos; private void Jj_add_error_token(int kind, int pos) { if (pos >= 100) return; if (pos == jj_endpos + 1) { jj_lasttokens[jj_endpos++] = kind; } else if (jj_endpos != 0) { jj_expentry = new int[jj_endpos]; for (int i = 0; i < jj_endpos; i++) { jj_expentry[i] = jj_lasttokens[i]; } foreach (var oldentry in jj_expentries) { if (oldentry.Length == jj_expentry.Length) { for (int i = 0; i < jj_expentry.Length; i++) { if (oldentry[i] != jj_expentry[i]) { continue; } } jj_expentries.Add(jj_expentry); break; } } if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind; } } /// Generate ParseException. public virtual ParseException GenerateParseException() { jj_expentries.Clear(); bool[] la1tokens = new bool[34]; if (jj_kind >= 0) { la1tokens[jj_kind] = true; jj_kind = -1; } for (int i = 0; i < 23; i++) { if (jj_la1[i] == jj_gen) { for (int j = 0; j < 32; j++) { if ((jj_la1_0[i] & (1 << j)) != 0) { la1tokens[j] = true; } if ((jj_la1_1[i] & (1 << j)) != 0) { la1tokens[32 + j] = true; } } } } for (int i = 0; i < 34; i++) { if (la1tokens[i]) { jj_expentry = new int[1]; jj_expentry[0] = i; jj_expentries.Add(jj_expentry); } } jj_endpos = 0; Jj_rescan_token(); Jj_add_error_token(0, 0); int[][] exptokseq = new int[jj_expentries.Count][]; for (int i = 0; i < jj_expentries.Count; i++) { exptokseq[i] = jj_expentries[i]; } return new ParseException(token, exptokseq, tokenImage); } /// Enable tracing. public void Enable_tracing() { } /// Disable tracing. public void Disable_tracing() { } private void Jj_rescan_token() { jj_rescan = true; for (int i = 0; i < 1; i++) { try { JJCalls p = jj_2_rtns[i]; do { if (p.gen > jj_gen) { jj_la = p.arg; jj_lastpos = jj_scanpos = p.first; switch (i) { case 0: Jj_3_1(); break; } } p = p.next; } while (p != null); } catch (LookaheadSuccess) { } } jj_rescan = false; } private void Jj_save(int index, int xla) { JJCalls p = jj_2_rtns[index]; while (p.gen > jj_gen) { if (p.next == null) { p = p.next = new JJCalls(); break; } p = p.next; } p.gen = jj_gen + xla - jj_la; p.first = token; p.arg = xla; } internal sealed class JJCalls { internal int gen; internal Token first; internal int arg; internal JJCalls next; } static QueryParser() { { Jj_la1_init_0(); Jj_la1_init_1(); } } } }