Coverage Report - org.apache.maven.index.DefaultQueryCreator
 
Classes in this File Line Coverage Branch Coverage Complexity
DefaultQueryCreator
57 %
77/135
47 %
53/112
11,25
 
 1  
 package org.apache.maven.index;
 2  
 
 3  
 /*
 4  
  * Licensed to the Apache Software Foundation (ASF) under one
 5  
  * or more contributor license agreements.  See the NOTICE file
 6  
  * distributed with this work for additional information
 7  
  * regarding copyright ownership.  The ASF licenses this file
 8  
  * to you under the Apache License, Version 2.0 (the
 9  
  * "License"); you may not use this file except in compliance
 10  
  * with the License.  You may obtain a copy of the License at
 11  
  *
 12  
  *   http://www.apache.org/licenses/LICENSE-2.0    
 13  
  *
 14  
  * Unless required by applicable law or agreed to in writing,
 15  
  * software distributed under the License is distributed on an
 16  
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 17  
  * KIND, either express or implied.  See the License for the
 18  
  * specific language governing permissions and limitations
 19  
  * under the License.
 20  
  */
 21  
 
 22  
 import java.io.IOException;
 23  
 import java.io.StringReader;
 24  
 
 25  
 import org.apache.lucene.analysis.TokenStream;
 26  
 import org.apache.lucene.index.Term;
 27  
 import org.apache.lucene.queryParser.ParseException;
 28  
 import org.apache.lucene.queryParser.QueryParser;
 29  
 import org.apache.lucene.queryParser.QueryParser.Operator;
 30  
 import org.apache.lucene.search.BooleanClause.Occur;
 31  
 import org.apache.lucene.search.BooleanQuery;
 32  
 import org.apache.lucene.search.PrefixQuery;
 33  
 import org.apache.lucene.search.Query;
 34  
 import org.apache.lucene.search.TermQuery;
 35  
 import org.apache.lucene.search.WildcardQuery;
 36  
 import org.apache.lucene.util.Version;
 37  
 import org.apache.maven.index.context.NexusAnalyzer;
 38  
 import org.apache.maven.index.creator.JarFileContentsIndexCreator;
 39  
 import org.apache.maven.index.creator.MinimalArtifactInfoIndexCreator;
 40  
 import org.apache.maven.index.expr.SearchExpression;
 41  
 import org.apache.maven.index.expr.SearchTyped;
 42  
 import org.codehaus.plexus.component.annotations.Component;
 43  
 import org.codehaus.plexus.component.annotations.Requirement;
 44  
 import org.codehaus.plexus.logging.Logger;
 45  
 
 46  
 /**
 47  
  * A default {@link QueryCreator} constructs Lucene query for provided query text.
 48  
  * <p>
 49  
  * By default wildcards are created such as query text matches beginning of the field value or beginning of the
 50  
  * class/package name segment for {@link ArtifactInfo#NAMES NAMES} field. But it can be controlled by using special
 51  
  * markers:
 52  
  * <ul>
 53  
  * <li>* - any character</li>
 54  
  * <li>'^' - beginning of the text</li>
 55  
  * <li>'$' or '&lt;' or ' ' end of the text</li>
 56  
  * </ul>
 57  
  * For example:
 58  
  * <ul>
 59  
  * <li>junit - matches junit and junit-foo, but not foo-junit</li>
 60  
  * <li>*junit - matches junit, junit-foo and foo-junit</li>
 61  
  * <li>^junit$ - matches junit, but not junit-foo, nor foo-junit</li>
 62  
  * </ul>
 63  
  * 
 64  
  * @author Eugene Kuleshov
 65  
  */
 66  
 @Component( role = QueryCreator.class )
 67  208
 public class DefaultQueryCreator
 68  
     implements QueryCreator
 69  
 {
 70  
     @Requirement
 71  
     private Logger logger;
 72  
 
 73  
     protected Logger getLogger()
 74  
     {
 75  1
         return logger;
 76  
     }
 77  
 
 78  
     // ==
 79  
 
 80  
     public IndexerField selectIndexerField( final Field field, final SearchType type )
 81  
     {
 82  4841
         IndexerField lastField = null;
 83  
 
 84  4841
         for ( IndexerField indexerField : field.getIndexerFields() )
 85  
         {
 86  7199
             lastField = indexerField;
 87  
 
 88  7199
             if ( type.matchesIndexerField( indexerField ) )
 89  
             {
 90  4827
                 return indexerField;
 91  
             }
 92  
         }
 93  
 
 94  14
         return lastField;
 95  
     }
 96  
 
 97  
     public Query constructQuery( final Field field, final SearchExpression expression )
 98  
         throws ParseException
 99  
     {
 100  2364
         SearchType searchType = SearchType.SCORED;
 101  
 
 102  2364
         if ( expression instanceof SearchTyped )
 103  
         {
 104  2348
             searchType = ( (SearchTyped) expression ).getSearchType();
 105  
         }
 106  
 
 107  2364
         return constructQuery( field, expression.getStringValue(), searchType );
 108  
     }
 109  
 
 110  
     public Query constructQuery( final Field field, final String query, final SearchType type )
 111  
         throws ParseException
 112  
     {
 113  2552
         if ( type == null )
 114  
         {
 115  0
             throw new NullPointerException( "Cannot construct query with type of \"null\"!" );
 116  
         }
 117  
 
 118  2552
         if ( field == null )
 119  
         {
 120  0
             throw new NullPointerException( "Cannot construct query for field \"null\"!" );
 121  
         }
 122  
         else
 123  
         {
 124  2552
             return constructQuery( field, selectIndexerField( field, type ), query, type );
 125  
         }
 126  
     }
 127  
 
 128  
     @Deprecated
 129  
     public Query constructQuery( String field, String query )
 130  
     {
 131  0
         Query result = null;
 132  
 
 133  0
         if ( MinimalArtifactInfoIndexCreator.FLD_GROUP_ID_KW.getKey().equals( field )
 134  
             || MinimalArtifactInfoIndexCreator.FLD_ARTIFACT_ID_KW.getKey().equals( field )
 135  
             || MinimalArtifactInfoIndexCreator.FLD_VERSION_KW.getKey().equals( field )
 136  
             || JarFileContentsIndexCreator.FLD_CLASSNAMES_KW.getKey().equals( field ) )
 137  
         {
 138  
             // these are special untokenized fields, kept for use cases like TreeView is (exact matching).
 139  0
             result = legacyConstructQuery( field, query );
 140  
         }
 141  
         else
 142  
         {
 143  0
             QueryParser qp = new QueryParser( Version.LUCENE_24, field, new NexusAnalyzer() );
 144  
 
 145  
             // small cheap trick
 146  
             // if a query is not "expert" (does not contain field:val kind of expression)
 147  
             // but it contains star and/or punctuation chars, example: "common-log*"
 148  0
             if ( !query.contains( ":" ) )
 149  
             {
 150  0
                 if ( query.contains( "*" ) && query.matches( ".*(\\.|-|_).*" ) )
 151  
                 {
 152  0
                     query =
 153  
                         query.toLowerCase().replaceAll( "\\*", "X" ).replaceAll( "\\.|-|_", " " ).replaceAll( "X", "*" );
 154  
                 }
 155  
             }
 156  
 
 157  
             try
 158  
             {
 159  0
                 result = qp.parse( query );
 160  
             }
 161  0
             catch ( ParseException e )
 162  
             {
 163  0
                 getLogger().debug(
 164  
                     "Query parsing with \"legacy\" method, we got ParseException from QueryParser: " + e.getMessage() );
 165  
 
 166  0
                 result = legacyConstructQuery( field, query );
 167  0
             }
 168  
         }
 169  
 
 170  0
         if ( getLogger().isDebugEnabled() )
 171  
         {
 172  0
             getLogger().debug( "Query parsed as: " + result.toString() );
 173  
         }
 174  
 
 175  0
         return result;
 176  
     }
 177  
 
 178  
     // ==
 179  
 
 180  
     public Query constructQuery( final Field field, final IndexerField indexerField, final String query,
 181  
                                  final SearchType type )
 182  
         throws ParseException
 183  
     {
 184  4833
         if ( indexerField == null )
 185  
         {
 186  0
             getLogger().warn(
 187  
                 "Querying for field \""
 188  
                     + field.toString()
 189  
                     + "\" without any indexer field was tried. Please review your code, and consider adding this field to index!" );
 190  
 
 191  0
             return null;
 192  
         }
 193  4833
         if ( !indexerField.isIndexed() )
 194  
         {
 195  0
             getLogger().warn(
 196  
                 "Querying for non-indexed field " + field.toString()
 197  
                     + " was tried. Please review your code or consider adding this field to index!" );
 198  
 
 199  0
             return null;
 200  
         }
 201  
 
 202  4833
         if ( query.startsWith( "*" ) || query.startsWith( "?" ) )
 203  
         {
 204  16
             throw new ParseException( "Query cannot start with '*' or '?'!" );
 205  
         }
 206  
 
 207  4817
         if ( Field.NOT_PRESENT.equals( query ) )
 208  
         {
 209  2
             return new WildcardQuery( new Term( indexerField.getKey(), "*" ) );
 210  
         }
 211  
 
 212  4815
         if ( SearchType.EXACT.equals( type ) )
 213  
         {
 214  163
             if ( indexerField.isKeyword() )
 215  
             {
 216  
                 // no tokenization should happen against the field!
 217  161
                 if ( query.contains( "*" ) || query.contains( "?" ) )
 218  
                 {
 219  1
                     return new WildcardQuery( new Term( indexerField.getKey(), query ) );
 220  
                 }
 221  
                 else
 222  
                 {
 223  
                     // exactly what callee wants
 224  160
                     return new TermQuery( new Term( indexerField.getKey(), query ) );
 225  
                 }
 226  
             }
 227  2
             else if ( !indexerField.isKeyword() && indexerField.isStored() )
 228  
             {
 229  
                 // TODO: resolve this better! Decouple QueryCreator and IndexCreators!
 230  
                 // This is a hack/workaround here
 231  2
                 if ( JarFileContentsIndexCreator.FLD_CLASSNAMES_KW.equals( indexerField ) )
 232  
                 {
 233  1
                     if ( query.startsWith( "/" ) )
 234  
                     {
 235  0
                         return new TermQuery( new Term( indexerField.getKey(), query.toLowerCase().replaceAll( "\\.",
 236  
                             "/" ) ) );
 237  
                     }
 238  
                     else
 239  
                     {
 240  1
                         return new TermQuery( new Term( indexerField.getKey(), "/"
 241  
                             + query.toLowerCase().replaceAll( "\\.", "/" ) ) );
 242  
                     }
 243  
                 }
 244  
                 else
 245  
                 {
 246  1
                     getLogger().warn(
 247  
                         type.toString()
 248  
                             + " type of querying for non-keyword (but stored) field "
 249  
                             + indexerField.getOntology().toString()
 250  
                             + " was tried. Please review your code, or indexCreator involved, since this type of querying of this field is currently unsupported." );
 251  
 
 252  
                     // will never succeed (unless we supply him "filter" too, but that would kill performance)
 253  
                     // and is possible with stored fields only
 254  1
                     return null;
 255  
                 }
 256  
             }
 257  
             else
 258  
             {
 259  0
                 getLogger().warn(
 260  
                     type.toString()
 261  
                         + " type of querying for non-keyword (and not stored) field "
 262  
                         + indexerField.getOntology().toString()
 263  
                         + " was tried. Please review your code, or indexCreator involved, since this type of querying of this field is impossible." );
 264  
 
 265  
                 // not a keyword indexerField, nor stored. No hope at all. Impossible even with "filtering"
 266  0
                 return null;
 267  
             }
 268  
         }
 269  4652
         else if ( SearchType.SCORED.equals( type ) )
 270  
         {
 271  4652
             if ( JarFileContentsIndexCreator.FLD_CLASSNAMES.equals( indexerField ) )
 272  
             {
 273  17
                 String qpQuery = query.toLowerCase().replaceAll( "\\.", " " ).replaceAll( "/", " " );
 274  
                 // tokenization should happen against the field!
 275  17
                 QueryParser qp = new QueryParser( Version.LUCENE_30, indexerField.getKey(), new NexusAnalyzer() );
 276  17
                 qp.setDefaultOperator( Operator.AND );
 277  17
                 return qp.parse( qpQuery );
 278  
             }
 279  4635
             else if ( indexerField.isKeyword() )
 280  
             {
 281  
                 // no tokenization should happen against the field!
 282  2285
                 if ( query.contains( "*" ) || query.contains( "?" ) )
 283  
                 {
 284  20
                     return new WildcardQuery( new Term( indexerField.getKey(), query ) );
 285  
                 }
 286  
                 else
 287  
                 {
 288  2265
                     BooleanQuery bq = new BooleanQuery();
 289  
 
 290  2265
                     Term t = new Term( indexerField.getKey(), query );
 291  
 
 292  2265
                     bq.add( new TermQuery( t ), Occur.SHOULD );
 293  
 
 294  2265
                     PrefixQuery pq = new PrefixQuery( t );
 295  2265
                     pq.setBoost( 0.8f );
 296  
 
 297  2265
                     bq.add( pq, Occur.SHOULD );
 298  
 
 299  2265
                     return bq;
 300  
                 }
 301  
             }
 302  
             else
 303  
             {
 304  
                 // to save "original" query
 305  2350
                 String qpQuery = query;
 306  
 
 307  
                 // tokenization should happen against the field!
 308  2350
                 QueryParser qp = new QueryParser( Version.LUCENE_30, indexerField.getKey(), new NexusAnalyzer() );
 309  2350
                 qp.setDefaultOperator( Operator.AND );
 310  
 
 311  
                 // small cheap trick
 312  
                 // if a query is not "expert" (does not contain field:val kind of expression)
 313  
                 // but it contains star and/or punctuation chars, example: "common-log*"
 314  
                 // since Lucene does not support multi-terms WITH wildcards.
 315  
                 // So, here, we "mimic" NexusAnalyzer (this should be fixed!)
 316  
                 // but do this with PRESERVING original query!
 317  2350
                 if ( qpQuery.matches( ".*(\\.|-|_|/).*" ) )
 318  
                 {
 319  2293
                     qpQuery =
 320  
                         qpQuery.toLowerCase().replaceAll( "\\*", "X" ).replaceAll( "\\.|-|_|/", " " ).replaceAll( "X",
 321  
                             "*" ).replaceAll( " \\* ", "" ).replaceAll( "^\\* ", "" ).replaceAll( " \\*$", "" );
 322  
                 }
 323  
 
 324  
                 // "fix" it with trailing "*" if not there, but only if it not ends with a space
 325  2350
                 if ( !qpQuery.endsWith( "*" ) && !qpQuery.endsWith( " " ) )
 326  
                 {
 327  2322
                     qpQuery += "*";
 328  
                 }
 329  
 
 330  
                 try
 331  
                 {
 332  
                     // qpQuery = "\"" + qpQuery + "\"";
 333  
 
 334  2350
                     BooleanQuery q1 = new BooleanQuery();
 335  
 
 336  2350
                     q1.add( qp.parse( qpQuery ), Occur.SHOULD );
 337  
 
 338  2350
                     if ( qpQuery.contains( " " ) )
 339  
                     {
 340  2296
                         q1.add( qp.parse( "\"" + qpQuery + "\"" ), Occur.SHOULD );
 341  
                     }
 342  
 
 343  2350
                     Query q2 = null;
 344  
 
 345  2350
                     int termCount = countTerms( indexerField, query );
 346  
 
 347  
                     // try with KW only if the processed query in qpQuery does not have spaces!
 348  2350
                     if ( !query.contains( " " ) && termCount > 1 )
 349  
                     {
 350  
                         // get the KW field
 351  2289
                         IndexerField keywordField = selectIndexerField( indexerField.getOntology(), SearchType.EXACT );
 352  
 
 353  2289
                         if ( keywordField.isKeyword() )
 354  
                         {
 355  2281
                             q2 = constructQuery( indexerField.getOntology(), keywordField, query, type );
 356  
                         }
 357  
                     }
 358  
 
 359  2350
                     if ( q2 == null )
 360  
                     {
 361  69
                         return q1;
 362  
                     }
 363  
                     else
 364  
                     {
 365  2281
                         BooleanQuery bq = new BooleanQuery();
 366  
 
 367  
                         // trick with order
 368  2281
                         bq.add( q2, Occur.SHOULD );
 369  2281
                         bq.add( q1, Occur.SHOULD );
 370  
 
 371  2281
                         return bq;
 372  
                     }
 373  
                 }
 374  0
                 catch ( ParseException e )
 375  
                 {
 376  
                     // TODO: we are not falling back anymore to legacy!
 377  0
                     throw e;
 378  
 
 379  
                     // getLogger().debug(
 380  
                     // "Query parsing with \"legacy\" method, we got ParseException from QueryParser: "
 381  
                     // + e.getMessage() );
 382  
                     //
 383  
                     // return legacyConstructQuery( indexerField.getKey(), query );
 384  
                 }
 385  
             }
 386  
         }
 387  
         else
 388  
         {
 389  
             // what search type is this?
 390  0
             return null;
 391  
         }
 392  
     }
 393  
 
 394  
     public Query legacyConstructQuery( String field, String query )
 395  
     {
 396  0
         if ( query == null || query.length() == 0 )
 397  
         {
 398  0
             getLogger().info( "Empty or null query for field:" + field );
 399  
 
 400  0
             return null;
 401  
         }
 402  
 
 403  0
         String q = query.toLowerCase();
 404  
 
 405  0
         char h = query.charAt( 0 );
 406  
 
 407  0
         if ( JarFileContentsIndexCreator.FLD_CLASSNAMES_KW.getKey().equals( field )
 408  
             || JarFileContentsIndexCreator.FLD_CLASSNAMES.getKey().equals( field ) )
 409  
         {
 410  0
             q = q.replaceAll( "\\.", "/" );
 411  
 
 412  0
             if ( h == '^' )
 413  
             {
 414  0
                 q = q.substring( 1 );
 415  
 
 416  0
                 if ( q.charAt( 0 ) != '/' )
 417  
                 {
 418  0
                     q = '/' + q;
 419  
                 }
 420  
             }
 421  0
             else if ( h != '*' )
 422  
             {
 423  0
                 q = "*/" + q;
 424  
             }
 425  
         }
 426  
         else
 427  
         {
 428  0
             if ( h == '^' )
 429  
             {
 430  0
                 q = q.substring( 1 );
 431  
             }
 432  0
             else if ( h != '*' )
 433  
             {
 434  0
                 q = "*" + q;
 435  
             }
 436  
         }
 437  
 
 438  0
         int l = q.length() - 1;
 439  0
         char c = q.charAt( l );
 440  0
         if ( c == ' ' || c == '<' || c == '$' )
 441  
         {
 442  0
             q = q.substring( 0, q.length() - 1 );
 443  
         }
 444  0
         else if ( c != '*' )
 445  
         {
 446  0
             q += "*";
 447  
         }
 448  
 
 449  0
         int n = q.indexOf( '*' );
 450  0
         if ( n == -1 )
 451  
         {
 452  0
             return new TermQuery( new Term( field, q ) );
 453  
         }
 454  0
         else if ( n > 0 && n == q.length() - 1 )
 455  
         {
 456  0
             return new PrefixQuery( new Term( field, q.substring( 0, q.length() - 1 ) ) );
 457  
         }
 458  
 
 459  0
         return new WildcardQuery( new Term( field, q ) );
 460  
     }
 461  
 
 462  
     // ==
 463  
 
 464  208
     private NexusAnalyzer nexusAnalyzer = new NexusAnalyzer();
 465  
 
 466  
     protected int countTerms( final IndexerField indexerField, final String query )
 467  
     {
 468  
         try
 469  
         {
 470  2350
             TokenStream ts = nexusAnalyzer.reusableTokenStream( indexerField.getKey(), new StringReader( query ) );
 471  
 
 472  2350
             int result = 0;
 473  
 
 474  11512
             while ( ts.incrementToken() )
 475  
             {
 476  9162
                 result++;
 477  
             }
 478  
 
 479  2350
             return result;
 480  
         }
 481  0
         catch ( IOException e )
 482  
         {
 483  
             // will not happen
 484  0
             return 1;
 485  
         }
 486  
     }
 487  
 }