View Javadoc

1   package org.apache.maven.index;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0    
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.IOException;
23  import java.io.StringReader;
24  import java.util.ArrayList;
25  import java.util.Iterator;
26  import java.util.List;
27  
28  import org.apache.lucene.analysis.CachingTokenFilter;
29  import org.apache.lucene.analysis.TokenStream;
30  import org.apache.lucene.document.Document;
31  import org.apache.lucene.search.Explanation;
32  import org.apache.lucene.search.IndexSearcher;
33  import org.apache.lucene.search.Query;
34  import org.apache.lucene.search.TopDocs;
35  import org.apache.lucene.search.highlight.Formatter;
36  import org.apache.lucene.search.highlight.Highlighter;
37  import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
38  import org.apache.lucene.search.highlight.QueryScorer;
39  import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
40  import org.apache.lucene.search.highlight.TextFragment;
41  import org.apache.maven.index.context.IndexUtils;
42  import org.apache.maven.index.context.IndexingContext;
43  import org.apache.maven.index.creator.JarFileContentsIndexCreator;
44  
45  /**
46   * Default implementation of IteratorResultSet. TODO: there is too much of logic, refactor this!
47   * 
48   * @author cstamas
49   */
50  public class DefaultIteratorResultSet
51      implements IteratorResultSet
52  {
53      private final IteratorSearchRequest searchRequest;
54  
55      private final IndexSearcher indexSearcher;
56  
57      private final List<IndexingContext> contexts;
58  
59      private final int[] starts;
60  
61      private final ArtifactInfoFilter filter;
62  
63      private final ArtifactInfoPostprocessor postprocessor;
64  
65      private final List<MatchHighlightRequest> matchHighlightRequests;
66  
67      private final TopDocs hits;
68  
69      private final int from;
70  
71      private final int count;
72  
73      private final int maxRecPointer;
74  
75      private int pointer;
76  
77      private int processedArtifactInfoCount;
78  
79      private ArtifactInfo ai;
80  
81      protected DefaultIteratorResultSet( final IteratorSearchRequest request, final IndexSearcher indexSearcher,
82                                          final List<IndexingContext> contexts, final TopDocs hits )
83          throws IOException
84      {
85          this.searchRequest = request;
86  
87          this.indexSearcher = indexSearcher;
88  
89          this.contexts = contexts;
90  
91          {
92              int maxDoc = 0;
93              this.starts = new int[contexts.size() + 1]; // build starts array
94              for ( int i = 0; i < contexts.size(); i++ )
95              {
96                  starts[i] = maxDoc;
97                  maxDoc += contexts.get( i ).getIndexReader().maxDoc(); // compute maxDocs
98              }
99              starts[contexts.size()] = maxDoc;
100         }
101 
102         this.filter = request.getArtifactInfoFilter();
103 
104         this.postprocessor = request.getArtifactInfoPostprocessor();
105 
106         this.matchHighlightRequests = request.getMatchHighlightRequests();
107 
108         this.hits = hits;
109 
110         this.from = request.getStart();
111 
112         this.count =
113             ( request.getCount() == AbstractSearchRequest.UNDEFINED ? hits.scoreDocs.length : Math.min(
114                 request.getCount(), hits.scoreDocs.length ) );
115 
116         this.pointer = from;
117 
118         this.processedArtifactInfoCount = 0;
119 
120         this.maxRecPointer = from + count;
121 
122         ai = createNextAi();
123 
124         if ( ai == null )
125         {
126             cleanUp();
127         }
128     }
129 
130     public boolean hasNext()
131     {
132         return ai != null;
133     }
134 
135     public ArtifactInfo next()
136     {
137         ArtifactInfo result = ai;
138 
139         try
140         {
141             ai = createNextAi();
142         }
143         catch ( IOException e )
144         {
145             ai = null;
146 
147             throw new IllegalStateException( "Cannot fetch next ArtifactInfo!", e );
148         }
149         finally
150         {
151             if ( ai == null )
152             {
153                 cleanUp();
154             }
155         }
156 
157         return result;
158     }
159 
160     public void remove()
161     {
162         throw new UnsupportedOperationException( "Method not supported on " + getClass().getName() );
163     }
164 
165     public Iterator<ArtifactInfo> iterator()
166     {
167         return this;
168     }
169 
170     public void close()
171     {
172         cleanUp();
173     }
174 
175     public int getTotalProcessedArtifactInfoCount()
176     {
177         return processedArtifactInfoCount;
178     }
179 
180     @Override
181     public void finalize()
182         throws Throwable
183     {
184         super.finalize();
185 
186         if ( !cleanedUp )
187         {
188             System.err.println( "#WARNING: Lock leaking from " + getClass().getName() + " for query "
189                 + searchRequest.getQuery().toString() );
190 
191             cleanUp();
192         }
193     }
194 
195     // ==
196 
197     protected ArtifactInfo createNextAi()
198         throws IOException
199     {
200         ArtifactInfo result = null;
201 
202         // we should stop if:
203         // a) we found what we want
204         // b) pointer advanced over more documents that user requested
205         // c) pointer advanced over more documents that hits has
206         // or we found what we need
207         while ( ( result == null ) && ( pointer < maxRecPointer ) && ( pointer < hits.scoreDocs.length ) )
208         {
209             Document doc = indexSearcher.doc( hits.scoreDocs[pointer].doc );
210 
211             IndexingContext context = getIndexingContextForPointer( doc, hits.scoreDocs[pointer].doc );
212 
213             result = IndexUtils.constructArtifactInfo( doc, context );
214 
215             if ( result != null )
216             {
217                 // WARNING: NOT FOR PRODUCTION SYSTEMS, THIS IS VERY COSTLY OPERATION
218                 // For debugging only!!!
219                 if ( searchRequest.isLuceneExplain() )
220                 {
221                     result.getAttributes().put( Explanation.class.getName(),
222                         indexSearcher.explain( searchRequest.getQuery(), hits.scoreDocs[pointer].doc ).toString() );
223                 }
224 
225                 result.setLuceneScore( hits.scoreDocs[pointer].score );
226 
227                 result.repository = context.getRepositoryId();
228 
229                 result.context = context.getId();
230 
231                 if ( filter != null )
232                 {
233                     if ( !filter.accepts( context, result ) )
234                     {
235                         result = null;
236                     }
237                 }
238 
239                 if ( result != null && postprocessor != null )
240                 {
241                     postprocessor.postprocess( context, result );
242                 }
243 
244                 if ( result != null && matchHighlightRequests.size() > 0 )
245                 {
246                     calculateHighlights( context, doc, result );
247                 }
248             }
249 
250             pointer++;
251             processedArtifactInfoCount++;
252         }
253 
254         return result;
255     }
256 
257     private volatile boolean cleanedUp = false;
258 
259     protected synchronized void cleanUp()
260     {
261         if ( cleanedUp )
262         {
263             return;
264         }
265 
266         for ( IndexingContext ctx : contexts )
267         {
268             ctx.unlock();
269         }
270 
271         this.cleanedUp = true;
272     }
273 
274     /**
275      * Creates the MatchHighlights and adds them to ArtifactInfo if found/can.
276      * 
277      * @param context
278      * @param d
279      * @param ai
280      */
281     protected void calculateHighlights( IndexingContext context, Document d, ArtifactInfo ai )
282         throws IOException
283     {
284         IndexerField field = null;
285 
286         String text = null;
287 
288         List<String> highlightFragment = null;
289 
290         for ( MatchHighlightRequest hr : matchHighlightRequests )
291         {
292             field = selectStoredIndexerField( hr.getField() );
293 
294             if ( field != null )
295             {
296                 text = ai.getFieldValue( field.getOntology() );
297 
298                 if ( text != null )
299                 {
300                     highlightFragment = highlightField( context, hr, field, text );
301 
302                     if ( highlightFragment != null && highlightFragment.size() > 0 )
303                     {
304                         MatchHighlight matchHighlight = new MatchHighlight( hr.getField(), highlightFragment );
305 
306                         ai.getMatchHighlights().add( matchHighlight );
307                     }
308                 }
309             }
310         }
311     }
312 
313     /**
314      * Select a STORED IndexerField assigned to passed in Field.
315      * 
316      * @param field
317      * @return
318      */
319     protected IndexerField selectStoredIndexerField( Field field )
320     {
321         // hack here
322         if ( MAVEN.CLASSNAMES.equals( field ) )
323         {
324             return JarFileContentsIndexCreator.FLD_CLASSNAMES;
325         }
326         else
327         {
328             return field.getIndexerFields().isEmpty() ? null : field.getIndexerFields().iterator().next();
329         }
330     }
331 
332     /**
333      * Returns a string that contains match fragment highlighted in style as user requested.
334      * 
335      * @param context
336      * @param hr
337      * @param field
338      * @param doc
339      * @return
340      * @throws IOException
341      */
342     protected List<String> highlightField( IndexingContext context, MatchHighlightRequest hr, IndexerField field,
343                                            String text )
344         throws IOException
345     {
346         // exception with classnames
347         if ( MAVEN.CLASSNAMES.equals( field.getOntology() ) )
348         {
349             text = text.replace( '/', '.' ).replaceAll( "^\\.", "" ).replaceAll( "\n\\.", "\n" );
350         }
351 
352         Query rewrittenQuery = hr.getQuery().rewrite( context.getIndexReader() );
353 
354         CachingTokenFilter tokenStream =
355             new CachingTokenFilter( context.getAnalyzer().tokenStream( field.getKey(), new StringReader( text ) ) );
356 
357         Formatter formatter = null;
358 
359         if ( MatchHighlightMode.HTML.equals( hr.getHighlightMode() ) )
360         {
361             formatter = new SimpleHTMLFormatter();
362         }
363         else
364         {
365             throw new UnsupportedOperationException( "Hightlight more \"" + hr.getHighlightMode().toString()
366                 + "\" is not supported!" );
367         }
368 
369         return getBestFragments( rewrittenQuery, formatter, tokenStream, text, 3 );
370     }
371 
372     protected final List<String> getBestFragments( Query query, Formatter formatter, TokenStream tokenStream,
373                                                    String text, int maxNumFragments )
374         throws IOException
375     {
376         Highlighter highlighter = new Highlighter( formatter, new CleaningEncoder(), new QueryScorer( query ) );
377 
378         highlighter.setTextFragmenter( new OneLineFragmenter() );
379 
380         tokenStream.reset();
381 
382         maxNumFragments = Math.max( 1, maxNumFragments ); // sanity check
383 
384         TextFragment[] frag;
385         // Get text
386         ArrayList<String> fragTexts = new ArrayList<String>( maxNumFragments );
387 
388         try
389         {
390             frag = highlighter.getBestTextFragments( tokenStream, text, false, maxNumFragments );
391 
392             for ( int i = 0; i < frag.length; i++ )
393             {
394                 if ( ( frag[i] != null ) && ( frag[i].getScore() > 0 ) )
395                 {
396                     fragTexts.add( frag[i].toString() );
397                 }
398             }
399         }
400         catch ( InvalidTokenOffsetsException e )
401         {
402             // empty?
403         }
404 
405         return fragTexts;
406     }
407 
408     protected IndexingContext getIndexingContextForPointer( Document doc, int docPtr )
409     {
410         return contexts.get( readerIndex( docPtr, this.starts, this.contexts.size() ) );
411     }
412 
413     private static int readerIndex( int n, int[] starts, int numSubReaders )
414     { // find reader for doc n:
415         int lo = 0; // search starts array
416         int hi = numSubReaders - 1; // for first element less
417 
418         while ( hi >= lo )
419         {
420             int mid = ( lo + hi ) >>> 1;
421             int midValue = starts[mid];
422             if ( n < midValue )
423             {
424                 hi = mid - 1;
425             }
426             else if ( n > midValue )
427             {
428                 lo = mid + 1;
429             }
430             else
431             { // found a match
432                 while ( mid + 1 < numSubReaders && starts[mid + 1] == midValue )
433                 {
434                     mid++; // scan to last match
435                 }
436                 return mid;
437             }
438         }
439         return hi;
440     }
441 }