1 | |
package org.apache.maven.index; |
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
|
20 | |
|
21 | |
|
22 | |
import java.io.IOException; |
23 | |
import java.io.StringReader; |
24 | |
import java.util.ArrayList; |
25 | |
import java.util.Iterator; |
26 | |
import java.util.List; |
27 | |
|
28 | |
import org.apache.lucene.analysis.CachingTokenFilter; |
29 | |
import org.apache.lucene.analysis.TokenStream; |
30 | |
import org.apache.lucene.document.Document; |
31 | |
import org.apache.lucene.search.Explanation; |
32 | |
import org.apache.lucene.search.IndexSearcher; |
33 | |
import org.apache.lucene.search.Query; |
34 | |
import org.apache.lucene.search.TopDocs; |
35 | |
import org.apache.lucene.search.highlight.Formatter; |
36 | |
import org.apache.lucene.search.highlight.Highlighter; |
37 | |
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; |
38 | |
import org.apache.lucene.search.highlight.QueryScorer; |
39 | |
import org.apache.lucene.search.highlight.SimpleHTMLFormatter; |
40 | |
import org.apache.lucene.search.highlight.TextFragment; |
41 | |
import org.apache.maven.index.context.IndexUtils; |
42 | |
import org.apache.maven.index.context.IndexingContext; |
43 | |
import org.apache.maven.index.creator.JarFileContentsIndexCreator; |
44 | |
|
45 | |
|
46 | |
|
47 | |
|
48 | |
|
49 | |
|
50 | 334 | public class DefaultIteratorResultSet |
51 | |
implements IteratorResultSet |
52 | |
{ |
53 | |
private final IteratorSearchRequest searchRequest; |
54 | |
|
55 | |
private final IndexSearcher indexSearcher; |
56 | |
|
57 | |
private final List<IndexingContext> contexts; |
58 | |
|
59 | |
private final int[] starts; |
60 | |
|
61 | |
private final ArtifactInfoFilter filter; |
62 | |
|
63 | |
private final ArtifactInfoPostprocessor postprocessor; |
64 | |
|
65 | |
private final List<MatchHighlightRequest> matchHighlightRequests; |
66 | |
|
67 | |
private final TopDocs hits; |
68 | |
|
69 | |
private final int from; |
70 | |
|
71 | |
private final int count; |
72 | |
|
73 | |
private final int maxRecPointer; |
74 | |
|
75 | |
private int pointer; |
76 | |
|
77 | |
private int processedArtifactInfoCount; |
78 | |
|
79 | |
private ArtifactInfo ai; |
80 | |
|
81 | |
protected DefaultIteratorResultSet( final IteratorSearchRequest request, final IndexSearcher indexSearcher, |
82 | |
final List<IndexingContext> contexts, final TopDocs hits ) |
83 | |
throws IOException |
84 | 115 | { |
85 | 115 | this.searchRequest = request; |
86 | |
|
87 | 115 | this.indexSearcher = indexSearcher; |
88 | |
|
89 | 115 | this.contexts = contexts; |
90 | |
|
91 | |
{ |
92 | 115 | int maxDoc = 0; |
93 | 115 | this.starts = new int[contexts.size() + 1]; |
94 | 232 | for ( int i = 0; i < contexts.size(); i++ ) |
95 | |
{ |
96 | 117 | starts[i] = maxDoc; |
97 | 117 | maxDoc += contexts.get( i ).getIndexReader().maxDoc(); |
98 | |
} |
99 | 115 | starts[contexts.size()] = maxDoc; |
100 | |
} |
101 | |
|
102 | 115 | this.filter = request.getArtifactInfoFilter(); |
103 | |
|
104 | 115 | this.postprocessor = request.getArtifactInfoPostprocessor(); |
105 | |
|
106 | 115 | this.matchHighlightRequests = request.getMatchHighlightRequests(); |
107 | |
|
108 | 115 | this.hits = hits; |
109 | |
|
110 | 115 | this.from = request.getStart(); |
111 | |
|
112 | 115 | this.count = |
113 | |
( request.getCount() == AbstractSearchRequest.UNDEFINED ? hits.scoreDocs.length : Math.min( |
114 | |
request.getCount(), hits.scoreDocs.length ) ); |
115 | |
|
116 | 115 | this.pointer = from; |
117 | |
|
118 | 115 | this.processedArtifactInfoCount = 0; |
119 | |
|
120 | 115 | this.maxRecPointer = from + count; |
121 | |
|
122 | 115 | ai = createNextAi(); |
123 | |
|
124 | 115 | if ( ai == null ) |
125 | |
{ |
126 | 45 | cleanUp(); |
127 | |
} |
128 | 115 | } |
129 | |
|
130 | |
public boolean hasNext() |
131 | |
{ |
132 | 400 | return ai != null; |
133 | |
} |
134 | |
|
135 | |
public ArtifactInfo next() |
136 | |
{ |
137 | 334 | ArtifactInfo result = ai; |
138 | |
|
139 | |
try |
140 | |
{ |
141 | 334 | ai = createNextAi(); |
142 | |
} |
143 | 0 | catch ( IOException e ) |
144 | |
{ |
145 | 0 | ai = null; |
146 | |
|
147 | 0 | throw new IllegalStateException( "Cannot fetch next ArtifactInfo!", e ); |
148 | |
} |
149 | |
finally |
150 | |
{ |
151 | 334 | if ( ai == null ) |
152 | |
{ |
153 | 69 | cleanUp(); |
154 | |
} |
155 | |
} |
156 | |
|
157 | 334 | return result; |
158 | |
} |
159 | |
|
160 | |
public void remove() |
161 | |
{ |
162 | 0 | throw new UnsupportedOperationException( "Method not supported on " + getClass().getName() ); |
163 | |
} |
164 | |
|
165 | |
public Iterator<ArtifactInfo> iterator() |
166 | |
{ |
167 | 2 | return this; |
168 | |
} |
169 | |
|
170 | |
public void close() |
171 | |
{ |
172 | 93 | cleanUp(); |
173 | 93 | } |
174 | |
|
175 | |
public int getTotalProcessedArtifactInfoCount() |
176 | |
{ |
177 | 0 | return processedArtifactInfoCount; |
178 | |
} |
179 | |
|
180 | |
@Override |
181 | |
public void finalize() |
182 | |
throws Throwable |
183 | |
{ |
184 | 115 | super.finalize(); |
185 | |
|
186 | 115 | if ( !cleanedUp ) |
187 | |
{ |
188 | 0 | System.err.println( "#WARNING: Lock leaking from " + getClass().getName() + " for query " |
189 | |
+ searchRequest.getQuery().toString() ); |
190 | |
|
191 | 0 | cleanUp(); |
192 | |
} |
193 | 115 | } |
194 | |
|
195 | |
|
196 | |
|
197 | |
protected ArtifactInfo createNextAi() |
198 | |
throws IOException |
199 | |
{ |
200 | 449 | ArtifactInfo result = null; |
201 | |
|
202 | |
|
203 | |
|
204 | |
|
205 | |
|
206 | |
|
207 | 825 | while ( ( result == null ) && ( pointer < maxRecPointer ) && ( pointer < hits.scoreDocs.length ) ) |
208 | |
{ |
209 | 376 | Document doc = indexSearcher.doc( hits.scoreDocs[pointer].doc ); |
210 | |
|
211 | 376 | IndexingContext context = getIndexingContextForPointer( doc, hits.scoreDocs[pointer].doc ); |
212 | |
|
213 | 376 | result = IndexUtils.constructArtifactInfo( doc, context ); |
214 | |
|
215 | 376 | if ( result != null ) |
216 | |
{ |
217 | |
|
218 | |
|
219 | 376 | if ( searchRequest.isLuceneExplain() ) |
220 | |
{ |
221 | 4 | result.getAttributes().put( Explanation.class.getName(), |
222 | |
indexSearcher.explain( searchRequest.getQuery(), hits.scoreDocs[pointer].doc ).toString() ); |
223 | |
} |
224 | |
|
225 | 376 | result.setLuceneScore( hits.scoreDocs[pointer].score ); |
226 | |
|
227 | 376 | result.repository = context.getRepositoryId(); |
228 | |
|
229 | 376 | result.context = context.getId(); |
230 | |
|
231 | 376 | if ( filter != null ) |
232 | |
{ |
233 | 47 | if ( !filter.accepts( context, result ) ) |
234 | |
{ |
235 | 41 | result = null; |
236 | |
} |
237 | |
} |
238 | |
|
239 | 376 | if ( result != null && postprocessor != null ) |
240 | |
{ |
241 | 0 | postprocessor.postprocess( context, result ); |
242 | |
} |
243 | |
|
244 | 376 | if ( result != null && matchHighlightRequests.size() > 0 ) |
245 | |
{ |
246 | 20 | calculateHighlights( context, doc, result ); |
247 | |
} |
248 | |
} |
249 | |
|
250 | 376 | pointer++; |
251 | 376 | processedArtifactInfoCount++; |
252 | 376 | } |
253 | |
|
254 | 449 | return result; |
255 | |
} |
256 | |
|
257 | 115 | private volatile boolean cleanedUp = false; |
258 | |
|
259 | |
protected synchronized void cleanUp() |
260 | |
{ |
261 | 207 | if ( cleanedUp ) |
262 | |
{ |
263 | 92 | return; |
264 | |
} |
265 | |
|
266 | 115 | for ( IndexingContext ctx : contexts ) |
267 | |
{ |
268 | 117 | ctx.unlock(); |
269 | |
} |
270 | |
|
271 | 115 | this.cleanedUp = true; |
272 | 115 | } |
273 | |
|
274 | |
|
275 | |
|
276 | |
|
277 | |
|
278 | |
|
279 | |
|
280 | |
|
281 | |
protected void calculateHighlights( IndexingContext context, Document d, ArtifactInfo ai ) |
282 | |
throws IOException |
283 | |
{ |
284 | 20 | IndexerField field = null; |
285 | |
|
286 | 20 | String text = null; |
287 | |
|
288 | 20 | List<String> highlightFragment = null; |
289 | |
|
290 | 20 | for ( MatchHighlightRequest hr : matchHighlightRequests ) |
291 | |
{ |
292 | 20 | field = selectStoredIndexerField( hr.getField() ); |
293 | |
|
294 | 20 | if ( field != null ) |
295 | |
{ |
296 | 20 | text = ai.getFieldValue( field.getOntology() ); |
297 | |
|
298 | 20 | if ( text != null ) |
299 | |
{ |
300 | 20 | highlightFragment = highlightField( context, hr, field, text ); |
301 | |
|
302 | 20 | if ( highlightFragment != null && highlightFragment.size() > 0 ) |
303 | |
{ |
304 | 5 | MatchHighlight matchHighlight = new MatchHighlight( hr.getField(), highlightFragment ); |
305 | |
|
306 | 5 | ai.getMatchHighlights().add( matchHighlight ); |
307 | 20 | } |
308 | |
} |
309 | |
} |
310 | |
} |
311 | 20 | } |
312 | |
|
313 | |
|
314 | |
|
315 | |
|
316 | |
|
317 | |
|
318 | |
|
319 | |
protected IndexerField selectStoredIndexerField( Field field ) |
320 | |
{ |
321 | |
|
322 | 20 | if ( MAVEN.CLASSNAMES.equals( field ) ) |
323 | |
{ |
324 | 5 | return JarFileContentsIndexCreator.FLD_CLASSNAMES; |
325 | |
} |
326 | |
else |
327 | |
{ |
328 | 15 | return field.getIndexerFields().isEmpty() ? null : field.getIndexerFields().iterator().next(); |
329 | |
} |
330 | |
} |
331 | |
|
332 | |
|
333 | |
|
334 | |
|
335 | |
|
336 | |
|
337 | |
|
338 | |
|
339 | |
|
340 | |
|
341 | |
|
342 | |
protected List<String> highlightField( IndexingContext context, MatchHighlightRequest hr, IndexerField field, |
343 | |
String text ) |
344 | |
throws IOException |
345 | |
{ |
346 | |
|
347 | 20 | if ( MAVEN.CLASSNAMES.equals( field.getOntology() ) ) |
348 | |
{ |
349 | 5 | text = text.replace( '/', '.' ).replaceAll( "^\\.", "" ).replaceAll( "\n\\.", "\n" ); |
350 | |
} |
351 | |
|
352 | 20 | Query rewrittenQuery = hr.getQuery().rewrite( context.getIndexReader() ); |
353 | |
|
354 | 20 | CachingTokenFilter tokenStream = |
355 | |
new CachingTokenFilter( context.getAnalyzer().tokenStream( field.getKey(), new StringReader( text ) ) ); |
356 | |
|
357 | 20 | Formatter formatter = null; |
358 | |
|
359 | 20 | if ( MatchHighlightMode.HTML.equals( hr.getHighlightMode() ) ) |
360 | |
{ |
361 | 20 | formatter = new SimpleHTMLFormatter(); |
362 | |
} |
363 | |
else |
364 | |
{ |
365 | 0 | throw new UnsupportedOperationException( "Hightlight more \"" + hr.getHighlightMode().toString() |
366 | |
+ "\" is not supported!" ); |
367 | |
} |
368 | |
|
369 | 20 | return getBestFragments( rewrittenQuery, formatter, tokenStream, text, 3 ); |
370 | |
} |
371 | |
|
372 | |
protected final List<String> getBestFragments( Query query, Formatter formatter, TokenStream tokenStream, |
373 | |
String text, int maxNumFragments ) |
374 | |
throws IOException |
375 | |
{ |
376 | 20 | Highlighter highlighter = new Highlighter( formatter, new CleaningEncoder(), new QueryScorer( query ) ); |
377 | |
|
378 | 20 | highlighter.setTextFragmenter( new OneLineFragmenter() ); |
379 | |
|
380 | 20 | tokenStream.reset(); |
381 | |
|
382 | 20 | maxNumFragments = Math.max( 1, maxNumFragments ); |
383 | |
|
384 | |
TextFragment[] frag; |
385 | |
|
386 | 20 | ArrayList<String> fragTexts = new ArrayList<String>( maxNumFragments ); |
387 | |
|
388 | |
try |
389 | |
{ |
390 | 20 | frag = highlighter.getBestTextFragments( tokenStream, text, false, maxNumFragments ); |
391 | |
|
392 | 40 | for ( int i = 0; i < frag.length; i++ ) |
393 | |
{ |
394 | 20 | if ( ( frag[i] != null ) && ( frag[i].getScore() > 0 ) ) |
395 | |
{ |
396 | 5 | fragTexts.add( frag[i].toString() ); |
397 | |
} |
398 | |
} |
399 | |
} |
400 | 0 | catch ( InvalidTokenOffsetsException e ) |
401 | |
{ |
402 | |
|
403 | 20 | } |
404 | |
|
405 | 20 | return fragTexts; |
406 | |
} |
407 | |
|
408 | |
protected IndexingContext getIndexingContextForPointer( Document doc, int docPtr ) |
409 | |
{ |
410 | 376 | return contexts.get( readerIndex( docPtr, this.starts, this.contexts.size() ) ); |
411 | |
} |
412 | |
|
413 | |
private static int readerIndex( int n, int[] starts, int numSubReaders ) |
414 | |
{ |
415 | 376 | int lo = 0; |
416 | 376 | int hi = numSubReaders - 1; |
417 | |
|
418 | 775 | while ( hi >= lo ) |
419 | |
{ |
420 | 406 | int mid = ( lo + hi ) >>> 1; |
421 | 406 | int midValue = starts[mid]; |
422 | 406 | if ( n < midValue ) |
423 | |
{ |
424 | 20 | hi = mid - 1; |
425 | |
} |
426 | 386 | else if ( n > midValue ) |
427 | |
{ |
428 | 379 | lo = mid + 1; |
429 | |
} |
430 | |
else |
431 | |
{ |
432 | 7 | while ( mid + 1 < numSubReaders && starts[mid + 1] == midValue ) |
433 | |
{ |
434 | 0 | mid++; |
435 | |
} |
436 | 7 | return mid; |
437 | |
} |
438 | 399 | } |
439 | 369 | return hi; |
440 | |
} |
441 | |
} |