View Javadoc

1   package org.apache.maven.index;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0    
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.IOException;
23  import java.util.ArrayList;
24  import java.util.Arrays;
25  import java.util.Collection;
26  import java.util.Comparator;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.Set;
30  import java.util.TreeMap;
31  import java.util.TreeSet;
32  
33  import org.apache.lucene.document.Document;
34  import org.apache.lucene.index.IndexReader;
35  import org.apache.lucene.index.MultiReader;
36  import org.apache.lucene.search.IndexSearcher;
37  import org.apache.lucene.search.Query;
38  import org.apache.lucene.search.ScoreDoc;
39  import org.apache.lucene.search.TopScoreDocCollector;
40  import org.apache.maven.index.context.IndexUtils;
41  import org.apache.maven.index.context.IndexingContext;
42  import org.apache.maven.index.context.NexusIndexSearcher;
43  import org.codehaus.plexus.component.annotations.Component;
44  import org.codehaus.plexus.logging.AbstractLogEnabled;
45  
46  /**
47   * A default search engine implementation
48   * 
49   * @author Eugene Kuleshov
50   * @author Tamas Cservenak
51   */
52  @Component( role = SearchEngine.class )
53  public class DefaultSearchEngine
54      extends AbstractLogEnabled
55      implements SearchEngine
56  {
57      @Deprecated
58      public Set<ArtifactInfo> searchFlat( Comparator<ArtifactInfo> artifactInfoComparator,
59                                           IndexingContext indexingContext, Query query )
60          throws IOException
61      {
62          return searchFlatPaged( new FlatSearchRequest( query, artifactInfoComparator, indexingContext ),
63              Arrays.asList( indexingContext ), true ).getResults();
64      }
65  
66      @Deprecated
67      public Set<ArtifactInfo> searchFlat( Comparator<ArtifactInfo> artifactInfoComparator,
68                                           Collection<IndexingContext> indexingContexts, Query query )
69          throws IOException
70      {
71          return searchFlatPaged( new FlatSearchRequest( query, artifactInfoComparator ), indexingContexts ).getResults();
72      }
73  
74      public FlatSearchResponse searchFlatPaged( FlatSearchRequest request, Collection<IndexingContext> indexingContexts )
75          throws IOException
76      {
77          return searchFlatPaged( request, indexingContexts, false );
78      }
79  
80      public FlatSearchResponse forceSearchFlatPaged( FlatSearchRequest request,
81                                                      Collection<IndexingContext> indexingContexts )
82          throws IOException
83      {
84          return searchFlatPaged( request, indexingContexts, true );
85      }
86  
87      protected FlatSearchResponse searchFlatPaged( FlatSearchRequest request,
88                                                    Collection<IndexingContext> indexingContexts, boolean ignoreContext )
89          throws IOException
90      {
91          List<IndexingContext> contexts = getParticipatingContexts( indexingContexts, ignoreContext );
92  
93          try
94          {
95              final TreeSet<ArtifactInfo> result = new TreeSet<ArtifactInfo>( request.getArtifactInfoComparator() );
96  
97              for ( IndexingContext ctx : contexts )
98              {
99                  ctx.lock();
100             }
101 
102             return new FlatSearchResponse( request.getQuery(), searchFlat( request, result, contexts,
103                 request.getQuery() ), result );
104         }
105         finally
106         {
107             for ( IndexingContext ctx : contexts )
108             {
109                 ctx.unlock();
110             }
111         }
112     }
113 
114     // ==
115 
116     public GroupedSearchResponse searchGrouped( GroupedSearchRequest request,
117                                                 Collection<IndexingContext> indexingContexts )
118         throws IOException
119     {
120         return searchGrouped( request, indexingContexts, false );
121     }
122 
123     public GroupedSearchResponse forceSearchGrouped( GroupedSearchRequest request,
124                                                      Collection<IndexingContext> indexingContexts )
125         throws IOException
126     {
127         return searchGrouped( request, indexingContexts, true );
128     }
129 
130     protected GroupedSearchResponse searchGrouped( GroupedSearchRequest request,
131                                                    Collection<IndexingContext> indexingContexts, boolean ignoreContext )
132         throws IOException
133     {
134         List<IndexingContext> contexts = getParticipatingContexts( indexingContexts, ignoreContext );
135 
136         try
137         {
138             final TreeMap<String, ArtifactInfoGroup> result =
139                 new TreeMap<String, ArtifactInfoGroup>( request.getGroupKeyComparator() );
140 
141             for ( IndexingContext ctx : contexts )
142             {
143                 ctx.lock();
144             }
145 
146             return new GroupedSearchResponse( request.getQuery(), searchGrouped( request, result,
147                 request.getGrouping(), contexts, request.getQuery() ), result );
148 
149         }
150         finally
151         {
152             for ( IndexingContext ctx : contexts )
153             {
154                 ctx.unlock();
155             }
156         }
157     }
158 
159     // ===
160 
161     protected int searchFlat( FlatSearchRequest req, Collection<ArtifactInfo> result,
162                               List<IndexingContext> participatingContexts, Query query )
163         throws IOException
164     {
165         int hitCount = 0;
166 
167         for ( IndexingContext context : participatingContexts )
168         {
169             final TopScoreDocCollector collector = doSearchWithCeiling( req, context.getIndexSearcher(), query );
170 
171             if ( collector.getTotalHits() == 0 )
172             {
173                 // context has no hits, just continue to next one
174                 continue;
175             }
176 
177             ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs;
178 
179             // uhm btw hitCount contains dups
180 
181             hitCount += collector.getTotalHits();
182 
183             int start = 0; // from == FlatSearchRequest.UNDEFINED ? 0 : from;
184 
185             // we have to pack the results as long: a) we have found aiCount ones b) we depleted hits
186             for ( int i = start; i < scoreDocs.length; i++ )
187             {
188                 Document doc = context.getIndexSearcher().doc( scoreDocs[i].doc );
189 
190                 ArtifactInfo artifactInfo = IndexUtils.constructArtifactInfo( doc, context );
191 
192                 if ( artifactInfo != null )
193                 {
194                     artifactInfo.repository = context.getRepositoryId();
195 
196                     artifactInfo.context = context.getId();
197 
198                     result.add( artifactInfo );
199                 }
200             }
201         }
202 
203         return hitCount;
204     }
205 
206     protected int searchGrouped( GroupedSearchRequest req, Map<String, ArtifactInfoGroup> result, Grouping grouping,
207                                  List<IndexingContext> participatingContexts, Query query )
208         throws IOException
209     {
210         int hitCount = 0;
211 
212         for ( IndexingContext context : participatingContexts )
213         {
214             final TopScoreDocCollector collector = doSearchWithCeiling( req, context.getIndexSearcher(), query );
215 
216             if ( collector.getTotalHits() > 0 )
217             {
218                 ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs;
219 
220                 hitCount += collector.getTotalHits();
221 
222                 for ( int i = 0; i < scoreDocs.length; i++ )
223                 {
224                     Document doc = context.getIndexSearcher().doc( scoreDocs[i].doc );
225 
226                     ArtifactInfo artifactInfo = IndexUtils.constructArtifactInfo( doc, context );
227 
228                     if ( artifactInfo != null )
229                     {
230                         artifactInfo.repository = context.getRepositoryId();
231 
232                         artifactInfo.context = context.getId();
233 
234                         if ( !grouping.addArtifactInfo( result, artifactInfo ) )
235                         {
236                             // fix the hitCount accordingly
237                             hitCount--;
238                         }
239                     }
240                 }
241             }
242         }
243 
244         return hitCount;
245     }
246 
247     // == NG Search
248 
249     public IteratorSearchResponse searchIteratorPaged( IteratorSearchRequest request,
250                                                        Collection<IndexingContext> indexingContexts )
251         throws IOException
252     {
253         return searchIteratorPaged( request, indexingContexts, false );
254     }
255 
256     public IteratorSearchResponse forceSearchIteratorPaged( IteratorSearchRequest request,
257                                                             Collection<IndexingContext> indexingContexts )
258         throws IOException
259     {
260         return searchIteratorPaged( request, indexingContexts, true );
261     }
262 
263     private IteratorSearchResponse searchIteratorPaged( IteratorSearchRequest request,
264                                                         Collection<IndexingContext> indexingContexts,
265                                                         boolean ignoreContext )
266         throws IOException
267     {
268         try
269         {
270             List<IndexingContext> contexts = getParticipatingContexts( indexingContexts, ignoreContext );
271 
272             IndexReader multiReader = getMergedIndexReader( indexingContexts, ignoreContext );
273 
274             IndexSearcher indexSearcher = new NexusIndexSearcher( multiReader );
275 
276             TopScoreDocCollector hits = doSearchWithCeiling( request, indexSearcher, request.getQuery() );
277 
278             return new IteratorSearchResponse( request.getQuery(), hits.getTotalHits(), new DefaultIteratorResultSet(
279                 request, indexSearcher, contexts, hits.topDocs() ) );
280         }
281         catch ( Throwable e )
282         {
283             // perform cleaup, otherwise DefaultIteratorResultSet will do it
284             for ( IndexingContext ctx : indexingContexts )
285             {
286                 if ( ignoreContext || ctx.isSearchable() )
287                 {
288                     ctx.unlock();
289                 }
290             }
291 
292             if ( e instanceof IOException )
293             {
294                 throw (IOException) e;
295             }
296             else
297             {
298                 // wrap it
299                 IOException ex = new IOException( e.getMessage() );
300                 ex.initCause( e );
301                 throw ex;
302             }
303         }
304     }
305 
306     // ==
307 
308     protected TopScoreDocCollector doSearchWithCeiling( final AbstractSearchRequest request,
309                                                         final IndexSearcher indexSearcher, final Query query )
310         throws IOException
311     {
312         int topHitCount = getTopDocsCollectorHitNum( request, AbstractSearchRequest.UNDEFINED );
313 
314         if ( AbstractSearchRequest.UNDEFINED != topHitCount )
315         {
316             // count is set, simply just execute it as-is
317             final TopScoreDocCollector hits = TopScoreDocCollector.create( topHitCount, true );
318 
319             indexSearcher.search( query, hits );
320 
321             return hits;
322         }
323         else
324         {
325             // set something reasonable as 1k
326             topHitCount = 1000;
327 
328             // perform search
329             TopScoreDocCollector hits = TopScoreDocCollector.create( topHitCount, true );
330             indexSearcher.search( query, hits );
331 
332             // check total hits against, does it fit?
333             if ( topHitCount < hits.getTotalHits() )
334             {
335                 topHitCount = hits.getTotalHits();
336 
337                 if ( getLogger().isDebugEnabled() )
338                 {
339                     // warn the user and leave trace just before OOM might happen
340                     // the hits.getTotalHits() might be HUUGE
341                     getLogger().debug(
342                         "Executing unbounded search, and fitting topHitCounts to "
343                             + topHitCount
344                             + ", an OOMEx might follow. To avoid OOM use narrower queries or limit your expectancy with request.setCount() method where appropriate. See MINDEXER-14 for details." );
345                 }
346 
347                 // redo all, but this time with correct numbers
348                 hits = TopScoreDocCollector.create( topHitCount, true );
349                 indexSearcher.search( query, hits );
350             }
351 
352             return hits;
353         }
354     }
355 
356     /**
357      * Returns the list of participating contexts. Does not locks them, just builds a list of them.
358      */
359     protected List<IndexingContext> getParticipatingContexts( final Collection<IndexingContext> indexingContexts,
360                                                               final boolean ignoreContext )
361     {
362         // to not change the API all away, but we need stable ordering here
363         // filter for those 1st, that take part in here
364         final ArrayList<IndexingContext> contexts = new ArrayList<IndexingContext>( indexingContexts.size() );
365 
366         for ( IndexingContext ctx : indexingContexts )
367         {
368             if ( ignoreContext || ctx.isSearchable() )
369             {
370                 contexts.add( ctx );
371             }
372         }
373 
374         return contexts;
375     }
376 
377     /**
378      * Locks down participating contexts, and returns a "merged" reader of them. In case of error, unlocks as part of
379      * cleanup and re-throws exception. Without error, it is the duty of caller to unlock contexts!
380      * 
381      * @param indexingContexts
382      * @param ignoreContext
383      * @return
384      * @throws IOException
385      */
386     protected IndexReader getMergedIndexReader( final Collection<IndexingContext> indexingContexts,
387                                                 final boolean ignoreContext )
388         throws IOException
389     {
390         final List<IndexingContext> contexts = getParticipatingContexts( indexingContexts, ignoreContext );
391 
392         try
393         {
394             final ArrayList<IndexReader> contextsToSearch = new ArrayList<IndexReader>( contexts.size() );
395 
396             for ( IndexingContext ctx : contexts )
397             {
398                 ctx.lock();
399 
400                 contextsToSearch.add( ctx.getIndexReader() );
401             }
402 
403             MultiReader multiReader =
404                 new MultiReader( contextsToSearch.toArray( new IndexReader[contextsToSearch.size()] ) );
405 
406             return multiReader;
407         }
408         catch ( Throwable e )
409         {
410             // perform cleaup, otherwise DefaultIteratorResultSet will do it
411             for ( IndexingContext ctx : contexts )
412             {
413                 ctx.unlock();
414             }
415 
416             if ( e instanceof IOException )
417             {
418                 throw (IOException) e;
419             }
420             else
421             {
422                 // wrap it
423                 IOException ex = new IOException( e.getMessage() );
424                 ex.initCause( e );
425                 throw ex;
426             }
427         }
428     }
429 
430     protected int getTopDocsCollectorHitNum( final AbstractSearchRequest request, final int ceiling )
431     {
432         if ( request instanceof AbstractSearchPageableRequest )
433         {
434             final AbstractSearchPageableRequest prequest = (AbstractSearchPageableRequest) request;
435 
436             if ( AbstractSearchRequest.UNDEFINED != prequest.getCount() )
437             {
438                 // easy, user knows and tells us how many results he want
439                 return prequest.getCount() + prequest.getStart();
440             }
441         }
442         else
443         {
444             if ( AbstractSearchRequest.UNDEFINED != request.getCount() )
445             {
446                 // easy, user knows and tells us how many results he want
447                 return request.getCount();
448             }
449         }
450 
451         return ceiling;
452     }
453 }