1 | |
package org.apache.maven.index; |
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
|
20 | |
|
21 | |
|
22 | |
import java.io.IOException; |
23 | |
import java.io.StringReader; |
24 | |
|
25 | |
import org.apache.lucene.analysis.TokenStream; |
26 | |
import org.apache.lucene.index.Term; |
27 | |
import org.apache.lucene.queryParser.ParseException; |
28 | |
import org.apache.lucene.queryParser.QueryParser; |
29 | |
import org.apache.lucene.queryParser.QueryParser.Operator; |
30 | |
import org.apache.lucene.search.BooleanClause.Occur; |
31 | |
import org.apache.lucene.search.BooleanQuery; |
32 | |
import org.apache.lucene.search.PrefixQuery; |
33 | |
import org.apache.lucene.search.Query; |
34 | |
import org.apache.lucene.search.TermQuery; |
35 | |
import org.apache.lucene.search.WildcardQuery; |
36 | |
import org.apache.lucene.util.Version; |
37 | |
import org.apache.maven.index.context.NexusAnalyzer; |
38 | |
import org.apache.maven.index.creator.JarFileContentsIndexCreator; |
39 | |
import org.apache.maven.index.creator.MinimalArtifactInfoIndexCreator; |
40 | |
import org.apache.maven.index.expr.SearchExpression; |
41 | |
import org.apache.maven.index.expr.SearchTyped; |
42 | |
import org.codehaus.plexus.component.annotations.Component; |
43 | |
import org.codehaus.plexus.component.annotations.Requirement; |
44 | |
import org.codehaus.plexus.logging.Logger; |
45 | |
|
46 | |
|
47 | |
|
48 | |
|
49 | |
|
50 | |
|
51 | |
|
52 | |
|
53 | |
|
54 | |
|
55 | |
|
56 | |
|
57 | |
|
58 | |
|
59 | |
|
60 | |
|
61 | |
|
62 | |
|
63 | |
|
64 | |
|
65 | |
|
66 | |
@Component( role = QueryCreator.class ) |
67 | 208 | public class DefaultQueryCreator |
68 | |
implements QueryCreator |
69 | |
{ |
70 | |
@Requirement |
71 | |
private Logger logger; |
72 | |
|
73 | |
protected Logger getLogger() |
74 | |
{ |
75 | 1 | return logger; |
76 | |
} |
77 | |
|
78 | |
|
79 | |
|
80 | |
public IndexerField selectIndexerField( final Field field, final SearchType type ) |
81 | |
{ |
82 | 4841 | IndexerField lastField = null; |
83 | |
|
84 | 4841 | for ( IndexerField indexerField : field.getIndexerFields() ) |
85 | |
{ |
86 | 7199 | lastField = indexerField; |
87 | |
|
88 | 7199 | if ( type.matchesIndexerField( indexerField ) ) |
89 | |
{ |
90 | 4827 | return indexerField; |
91 | |
} |
92 | |
} |
93 | |
|
94 | 14 | return lastField; |
95 | |
} |
96 | |
|
97 | |
public Query constructQuery( final Field field, final SearchExpression expression ) |
98 | |
throws ParseException |
99 | |
{ |
100 | 2364 | SearchType searchType = SearchType.SCORED; |
101 | |
|
102 | 2364 | if ( expression instanceof SearchTyped ) |
103 | |
{ |
104 | 2348 | searchType = ( (SearchTyped) expression ).getSearchType(); |
105 | |
} |
106 | |
|
107 | 2364 | return constructQuery( field, expression.getStringValue(), searchType ); |
108 | |
} |
109 | |
|
110 | |
public Query constructQuery( final Field field, final String query, final SearchType type ) |
111 | |
throws ParseException |
112 | |
{ |
113 | 2552 | if ( type == null ) |
114 | |
{ |
115 | 0 | throw new NullPointerException( "Cannot construct query with type of \"null\"!" ); |
116 | |
} |
117 | |
|
118 | 2552 | if ( field == null ) |
119 | |
{ |
120 | 0 | throw new NullPointerException( "Cannot construct query for field \"null\"!" ); |
121 | |
} |
122 | |
else |
123 | |
{ |
124 | 2552 | return constructQuery( field, selectIndexerField( field, type ), query, type ); |
125 | |
} |
126 | |
} |
127 | |
|
128 | |
@Deprecated |
129 | |
public Query constructQuery( String field, String query ) |
130 | |
{ |
131 | 0 | Query result = null; |
132 | |
|
133 | 0 | if ( MinimalArtifactInfoIndexCreator.FLD_GROUP_ID_KW.getKey().equals( field ) |
134 | |
|| MinimalArtifactInfoIndexCreator.FLD_ARTIFACT_ID_KW.getKey().equals( field ) |
135 | |
|| MinimalArtifactInfoIndexCreator.FLD_VERSION_KW.getKey().equals( field ) |
136 | |
|| JarFileContentsIndexCreator.FLD_CLASSNAMES_KW.getKey().equals( field ) ) |
137 | |
{ |
138 | |
|
139 | 0 | result = legacyConstructQuery( field, query ); |
140 | |
} |
141 | |
else |
142 | |
{ |
143 | 0 | QueryParser qp = new QueryParser( Version.LUCENE_24, field, new NexusAnalyzer() ); |
144 | |
|
145 | |
|
146 | |
|
147 | |
|
148 | 0 | if ( !query.contains( ":" ) ) |
149 | |
{ |
150 | 0 | if ( query.contains( "*" ) && query.matches( ".*(\\.|-|_).*" ) ) |
151 | |
{ |
152 | 0 | query = |
153 | |
query.toLowerCase().replaceAll( "\\*", "X" ).replaceAll( "\\.|-|_", " " ).replaceAll( "X", "*" ); |
154 | |
} |
155 | |
} |
156 | |
|
157 | |
try |
158 | |
{ |
159 | 0 | result = qp.parse( query ); |
160 | |
} |
161 | 0 | catch ( ParseException e ) |
162 | |
{ |
163 | 0 | getLogger().debug( |
164 | |
"Query parsing with \"legacy\" method, we got ParseException from QueryParser: " + e.getMessage() ); |
165 | |
|
166 | 0 | result = legacyConstructQuery( field, query ); |
167 | 0 | } |
168 | |
} |
169 | |
|
170 | 0 | if ( getLogger().isDebugEnabled() ) |
171 | |
{ |
172 | 0 | getLogger().debug( "Query parsed as: " + result.toString() ); |
173 | |
} |
174 | |
|
175 | 0 | return result; |
176 | |
} |
177 | |
|
178 | |
|
179 | |
|
180 | |
public Query constructQuery( final Field field, final IndexerField indexerField, final String query, |
181 | |
final SearchType type ) |
182 | |
throws ParseException |
183 | |
{ |
184 | 4833 | if ( indexerField == null ) |
185 | |
{ |
186 | 0 | getLogger().warn( |
187 | |
"Querying for field \"" |
188 | |
+ field.toString() |
189 | |
+ "\" without any indexer field was tried. Please review your code, and consider adding this field to index!" ); |
190 | |
|
191 | 0 | return null; |
192 | |
} |
193 | 4833 | if ( !indexerField.isIndexed() ) |
194 | |
{ |
195 | 0 | getLogger().warn( |
196 | |
"Querying for non-indexed field " + field.toString() |
197 | |
+ " was tried. Please review your code or consider adding this field to index!" ); |
198 | |
|
199 | 0 | return null; |
200 | |
} |
201 | |
|
202 | 4833 | if ( query.startsWith( "*" ) || query.startsWith( "?" ) ) |
203 | |
{ |
204 | 16 | throw new ParseException( "Query cannot start with '*' or '?'!" ); |
205 | |
} |
206 | |
|
207 | 4817 | if ( Field.NOT_PRESENT.equals( query ) ) |
208 | |
{ |
209 | 2 | return new WildcardQuery( new Term( indexerField.getKey(), "*" ) ); |
210 | |
} |
211 | |
|
212 | 4815 | if ( SearchType.EXACT.equals( type ) ) |
213 | |
{ |
214 | 163 | if ( indexerField.isKeyword() ) |
215 | |
{ |
216 | |
|
217 | 161 | if ( query.contains( "*" ) || query.contains( "?" ) ) |
218 | |
{ |
219 | 1 | return new WildcardQuery( new Term( indexerField.getKey(), query ) ); |
220 | |
} |
221 | |
else |
222 | |
{ |
223 | |
|
224 | 160 | return new TermQuery( new Term( indexerField.getKey(), query ) ); |
225 | |
} |
226 | |
} |
227 | 2 | else if ( !indexerField.isKeyword() && indexerField.isStored() ) |
228 | |
{ |
229 | |
|
230 | |
|
231 | 2 | if ( JarFileContentsIndexCreator.FLD_CLASSNAMES_KW.equals( indexerField ) ) |
232 | |
{ |
233 | 1 | if ( query.startsWith( "/" ) ) |
234 | |
{ |
235 | 0 | return new TermQuery( new Term( indexerField.getKey(), query.toLowerCase().replaceAll( "\\.", |
236 | |
"/" ) ) ); |
237 | |
} |
238 | |
else |
239 | |
{ |
240 | 1 | return new TermQuery( new Term( indexerField.getKey(), "/" |
241 | |
+ query.toLowerCase().replaceAll( "\\.", "/" ) ) ); |
242 | |
} |
243 | |
} |
244 | |
else |
245 | |
{ |
246 | 1 | getLogger().warn( |
247 | |
type.toString() |
248 | |
+ " type of querying for non-keyword (but stored) field " |
249 | |
+ indexerField.getOntology().toString() |
250 | |
+ " was tried. Please review your code, or indexCreator involved, since this type of querying of this field is currently unsupported." ); |
251 | |
|
252 | |
|
253 | |
|
254 | 1 | return null; |
255 | |
} |
256 | |
} |
257 | |
else |
258 | |
{ |
259 | 0 | getLogger().warn( |
260 | |
type.toString() |
261 | |
+ " type of querying for non-keyword (and not stored) field " |
262 | |
+ indexerField.getOntology().toString() |
263 | |
+ " was tried. Please review your code, or indexCreator involved, since this type of querying of this field is impossible." ); |
264 | |
|
265 | |
|
266 | 0 | return null; |
267 | |
} |
268 | |
} |
269 | 4652 | else if ( SearchType.SCORED.equals( type ) ) |
270 | |
{ |
271 | 4652 | if ( JarFileContentsIndexCreator.FLD_CLASSNAMES.equals( indexerField ) ) |
272 | |
{ |
273 | 17 | String qpQuery = query.toLowerCase().replaceAll( "\\.", " " ).replaceAll( "/", " " ); |
274 | |
|
275 | 17 | QueryParser qp = new QueryParser( Version.LUCENE_30, indexerField.getKey(), new NexusAnalyzer() ); |
276 | 17 | qp.setDefaultOperator( Operator.AND ); |
277 | 17 | return qp.parse( qpQuery ); |
278 | |
} |
279 | 4635 | else if ( indexerField.isKeyword() ) |
280 | |
{ |
281 | |
|
282 | 2285 | if ( query.contains( "*" ) || query.contains( "?" ) ) |
283 | |
{ |
284 | 20 | return new WildcardQuery( new Term( indexerField.getKey(), query ) ); |
285 | |
} |
286 | |
else |
287 | |
{ |
288 | 2265 | BooleanQuery bq = new BooleanQuery(); |
289 | |
|
290 | 2265 | Term t = new Term( indexerField.getKey(), query ); |
291 | |
|
292 | 2265 | bq.add( new TermQuery( t ), Occur.SHOULD ); |
293 | |
|
294 | 2265 | PrefixQuery pq = new PrefixQuery( t ); |
295 | 2265 | pq.setBoost( 0.8f ); |
296 | |
|
297 | 2265 | bq.add( pq, Occur.SHOULD ); |
298 | |
|
299 | 2265 | return bq; |
300 | |
} |
301 | |
} |
302 | |
else |
303 | |
{ |
304 | |
|
305 | 2350 | String qpQuery = query; |
306 | |
|
307 | |
|
308 | 2350 | QueryParser qp = new QueryParser( Version.LUCENE_30, indexerField.getKey(), new NexusAnalyzer() ); |
309 | 2350 | qp.setDefaultOperator( Operator.AND ); |
310 | |
|
311 | |
|
312 | |
|
313 | |
|
314 | |
|
315 | |
|
316 | |
|
317 | 2350 | if ( qpQuery.matches( ".*(\\.|-|_|/).*" ) ) |
318 | |
{ |
319 | 2293 | qpQuery = |
320 | |
qpQuery.toLowerCase().replaceAll( "\\*", "X" ).replaceAll( "\\.|-|_|/", " " ).replaceAll( "X", |
321 | |
"*" ).replaceAll( " \\* ", "" ).replaceAll( "^\\* ", "" ).replaceAll( " \\*$", "" ); |
322 | |
} |
323 | |
|
324 | |
|
325 | 2350 | if ( !qpQuery.endsWith( "*" ) && !qpQuery.endsWith( " " ) ) |
326 | |
{ |
327 | 2322 | qpQuery += "*"; |
328 | |
} |
329 | |
|
330 | |
try |
331 | |
{ |
332 | |
|
333 | |
|
334 | 2350 | BooleanQuery q1 = new BooleanQuery(); |
335 | |
|
336 | 2350 | q1.add( qp.parse( qpQuery ), Occur.SHOULD ); |
337 | |
|
338 | 2350 | if ( qpQuery.contains( " " ) ) |
339 | |
{ |
340 | 2296 | q1.add( qp.parse( "\"" + qpQuery + "\"" ), Occur.SHOULD ); |
341 | |
} |
342 | |
|
343 | 2350 | Query q2 = null; |
344 | |
|
345 | 2350 | int termCount = countTerms( indexerField, query ); |
346 | |
|
347 | |
|
348 | 2350 | if ( !query.contains( " " ) && termCount > 1 ) |
349 | |
{ |
350 | |
|
351 | 2289 | IndexerField keywordField = selectIndexerField( indexerField.getOntology(), SearchType.EXACT ); |
352 | |
|
353 | 2289 | if ( keywordField.isKeyword() ) |
354 | |
{ |
355 | 2281 | q2 = constructQuery( indexerField.getOntology(), keywordField, query, type ); |
356 | |
} |
357 | |
} |
358 | |
|
359 | 2350 | if ( q2 == null ) |
360 | |
{ |
361 | 69 | return q1; |
362 | |
} |
363 | |
else |
364 | |
{ |
365 | 2281 | BooleanQuery bq = new BooleanQuery(); |
366 | |
|
367 | |
|
368 | 2281 | bq.add( q2, Occur.SHOULD ); |
369 | 2281 | bq.add( q1, Occur.SHOULD ); |
370 | |
|
371 | 2281 | return bq; |
372 | |
} |
373 | |
} |
374 | 0 | catch ( ParseException e ) |
375 | |
{ |
376 | |
|
377 | 0 | throw e; |
378 | |
|
379 | |
|
380 | |
|
381 | |
|
382 | |
|
383 | |
|
384 | |
} |
385 | |
} |
386 | |
} |
387 | |
else |
388 | |
{ |
389 | |
|
390 | 0 | return null; |
391 | |
} |
392 | |
} |
393 | |
|
394 | |
public Query legacyConstructQuery( String field, String query ) |
395 | |
{ |
396 | 0 | if ( query == null || query.length() == 0 ) |
397 | |
{ |
398 | 0 | getLogger().info( "Empty or null query for field:" + field ); |
399 | |
|
400 | 0 | return null; |
401 | |
} |
402 | |
|
403 | 0 | String q = query.toLowerCase(); |
404 | |
|
405 | 0 | char h = query.charAt( 0 ); |
406 | |
|
407 | 0 | if ( JarFileContentsIndexCreator.FLD_CLASSNAMES_KW.getKey().equals( field ) |
408 | |
|| JarFileContentsIndexCreator.FLD_CLASSNAMES.getKey().equals( field ) ) |
409 | |
{ |
410 | 0 | q = q.replaceAll( "\\.", "/" ); |
411 | |
|
412 | 0 | if ( h == '^' ) |
413 | |
{ |
414 | 0 | q = q.substring( 1 ); |
415 | |
|
416 | 0 | if ( q.charAt( 0 ) != '/' ) |
417 | |
{ |
418 | 0 | q = '/' + q; |
419 | |
} |
420 | |
} |
421 | 0 | else if ( h != '*' ) |
422 | |
{ |
423 | 0 | q = "*/" + q; |
424 | |
} |
425 | |
} |
426 | |
else |
427 | |
{ |
428 | 0 | if ( h == '^' ) |
429 | |
{ |
430 | 0 | q = q.substring( 1 ); |
431 | |
} |
432 | 0 | else if ( h != '*' ) |
433 | |
{ |
434 | 0 | q = "*" + q; |
435 | |
} |
436 | |
} |
437 | |
|
438 | 0 | int l = q.length() - 1; |
439 | 0 | char c = q.charAt( l ); |
440 | 0 | if ( c == ' ' || c == '<' || c == '$' ) |
441 | |
{ |
442 | 0 | q = q.substring( 0, q.length() - 1 ); |
443 | |
} |
444 | 0 | else if ( c != '*' ) |
445 | |
{ |
446 | 0 | q += "*"; |
447 | |
} |
448 | |
|
449 | 0 | int n = q.indexOf( '*' ); |
450 | 0 | if ( n == -1 ) |
451 | |
{ |
452 | 0 | return new TermQuery( new Term( field, q ) ); |
453 | |
} |
454 | 0 | else if ( n > 0 && n == q.length() - 1 ) |
455 | |
{ |
456 | 0 | return new PrefixQuery( new Term( field, q.substring( 0, q.length() - 1 ) ) ); |
457 | |
} |
458 | |
|
459 | 0 | return new WildcardQuery( new Term( field, q ) ); |
460 | |
} |
461 | |
|
462 | |
|
463 | |
|
464 | 208 | private NexusAnalyzer nexusAnalyzer = new NexusAnalyzer(); |
465 | |
|
466 | |
protected int countTerms( final IndexerField indexerField, final String query ) |
467 | |
{ |
468 | |
try |
469 | |
{ |
470 | 2350 | TokenStream ts = nexusAnalyzer.reusableTokenStream( indexerField.getKey(), new StringReader( query ) ); |
471 | |
|
472 | 2350 | int result = 0; |
473 | |
|
474 | 11512 | while ( ts.incrementToken() ) |
475 | |
{ |
476 | 9162 | result++; |
477 | |
} |
478 | |
|
479 | 2350 | return result; |
480 | |
} |
481 | 0 | catch ( IOException e ) |
482 | |
{ |
483 | |
|
484 | 0 | return 1; |
485 | |
} |
486 | |
} |
487 | |
} |