View Javadoc
1   package org.apache.maven.index;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0    
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import javax.inject.Named;
23  import javax.inject.Singleton;
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.Arrays;
27  import java.util.Collection;
28  import java.util.Comparator;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.Set;
32  import java.util.TreeMap;
33  import java.util.TreeSet;
34  
35  import org.apache.lucene.document.Document;
36  import org.apache.lucene.search.IndexSearcher;
37  import org.apache.lucene.search.Query;
38  import org.apache.lucene.search.ScoreDoc;
39  import org.apache.lucene.search.TopScoreDocCollector;
40  import org.apache.maven.index.context.IndexUtils;
41  import org.apache.maven.index.context.IndexingContext;
42  import org.apache.maven.index.context.NexusIndexMultiReader;
43  import org.apache.maven.index.context.NexusIndexMultiSearcher;
44  import org.slf4j.Logger;
45  import org.slf4j.LoggerFactory;
46  
47  /**
48   * A default search engine implementation
49   * 
50   * @author Eugene Kuleshov
51   * @author Tamas Cservenak
52   */
53  @Singleton
54  @Named
55  public class DefaultSearchEngine
56      implements SearchEngine
57  {
58  
59      private final Logger logger = LoggerFactory.getLogger( getClass() );
60  
61      protected Logger getLogger()
62      {
63          return logger;
64      }
65  
66      @Deprecated
67      public Set<ArtifactInfo> searchFlat( Comparator<ArtifactInfo> artifactInfoComparator,
68                                           IndexingContext indexingContext, Query query )
69          throws IOException
70      {
71          return searchFlatPaged( new FlatSearchRequest( query, artifactInfoComparator, indexingContext ),
72              Arrays.asList( indexingContext ), true ).getResults();
73      }
74  
75      @Deprecated
76      public Set<ArtifactInfo> searchFlat( Comparator<ArtifactInfo> artifactInfoComparator,
77                                           Collection<IndexingContext> indexingContexts, Query query )
78          throws IOException
79      {
80          return searchFlatPaged( new FlatSearchRequest( query, artifactInfoComparator ), indexingContexts ).getResults();
81      }
82  
83      public FlatSearchResponse searchFlatPaged( FlatSearchRequest request, Collection<IndexingContext> indexingContexts )
84          throws IOException
85      {
86          return searchFlatPaged( request, indexingContexts, false );
87      }
88  
89      public FlatSearchResponse forceSearchFlatPaged( FlatSearchRequest request,
90                                                      Collection<IndexingContext> indexingContexts )
91          throws IOException
92      {
93          return searchFlatPaged( request, indexingContexts, true );
94      }
95  
96      protected FlatSearchResponse searchFlatPaged( FlatSearchRequest request,
97                                                    Collection<IndexingContext> indexingContexts, boolean ignoreContext )
98          throws IOException
99      {
100         List<IndexingContext> contexts = getParticipatingContexts( indexingContexts, ignoreContext );
101 
102         final TreeSet<ArtifactInfo> result = new TreeSet<ArtifactInfo>( request.getArtifactInfoComparator() );
103         return new FlatSearchResponse( request.getQuery(), searchFlat( request, result, contexts, request.getQuery() ),
104             result );
105     }
106 
107     // ==
108 
109     public GroupedSearchResponse searchGrouped( GroupedSearchRequest request,
110                                                 Collection<IndexingContext> indexingContexts )
111         throws IOException
112     {
113         return searchGrouped( request, indexingContexts, false );
114     }
115 
116     public GroupedSearchResponse forceSearchGrouped( GroupedSearchRequest request,
117                                                      Collection<IndexingContext> indexingContexts )
118         throws IOException
119     {
120         return searchGrouped( request, indexingContexts, true );
121     }
122 
123     protected GroupedSearchResponse searchGrouped( GroupedSearchRequest request,
124                                                    Collection<IndexingContext> indexingContexts, boolean ignoreContext )
125         throws IOException
126     {
127         List<IndexingContext> contexts = getParticipatingContexts( indexingContexts, ignoreContext );
128 
129         final TreeMap<String, ArtifactInfoGroup> result =
130             new TreeMap<String, ArtifactInfoGroup>( request.getGroupKeyComparator() );
131 
132         return new GroupedSearchResponse( request.getQuery(), searchGrouped( request, result, request.getGrouping(),
133             contexts, request.getQuery() ), result );
134     }
135 
136     // ===
137 
138     protected int searchFlat( FlatSearchRequest req, Collection<ArtifactInfo> result,
139                               List<IndexingContext> participatingContexts, Query query )
140         throws IOException
141     {
142         int hitCount = 0;
143         for ( IndexingContext context : participatingContexts )
144         {
145             final IndexSearcher indexSearcher = context.acquireIndexSearcher();
146             try
147             {
148                 final TopScoreDocCollector collector = doSearchWithCeiling( req, indexSearcher, query );
149 
150                 if ( collector.getTotalHits() == 0 )
151                 {
152                     // context has no hits, just continue to next one
153                     continue;
154                 }
155 
156                 ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs;
157 
158                 // uhm btw hitCount contains dups
159 
160                 hitCount += collector.getTotalHits();
161 
162                 int start = 0; // from == FlatSearchRequest.UNDEFINED ? 0 : from;
163 
164                 // we have to pack the results as long: a) we have found aiCount ones b) we depleted hits
165                 for ( int i = start; i < scoreDocs.length; i++ )
166                 {
167                     Document doc = indexSearcher.doc( scoreDocs[i].doc );
168 
169                     ArtifactInfo artifactInfo = IndexUtils.constructArtifactInfo( doc, context );
170 
171                     if ( artifactInfo != null )
172                     {
173                         artifactInfo.setRepository( context.getRepositoryId() );
174                         artifactInfo.setContext( context.getId() );
175 
176                         if ( req.getArtifactInfoFilter() != null )
177                         {
178                             if ( !req.getArtifactInfoFilter().accepts( context, artifactInfo ) )
179                             {
180                                 continue;
181                             }
182                         }
183                         if ( req.getArtifactInfoPostprocessor() != null )
184                         {
185                             req.getArtifactInfoPostprocessor().postprocess( context, artifactInfo );
186                         }
187 
188                         result.add( artifactInfo );
189                     }
190                 }
191             }
192             finally
193             {
194                 context.releaseIndexSearcher( indexSearcher );
195             }
196         }
197 
198         return hitCount;
199     }
200 
201     protected int searchGrouped( GroupedSearchRequest req, Map<String, ArtifactInfoGroup> result, Grouping grouping,
202                                  List<IndexingContext> participatingContexts, Query query )
203         throws IOException
204     {
205         int hitCount = 0;
206 
207         for ( IndexingContext context : participatingContexts )
208         {
209             final IndexSearcher indexSearcher = context.acquireIndexSearcher();
210             try
211             {
212                 final TopScoreDocCollector collector = doSearchWithCeiling( req, indexSearcher, query );
213 
214                 if ( collector.getTotalHits() > 0 )
215                 {
216                     ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs;
217 
218                     hitCount += collector.getTotalHits();
219 
220                     for ( int i = 0; i < scoreDocs.length; i++ )
221                     {
222                         Document doc = indexSearcher.doc( scoreDocs[i].doc );
223 
224                         ArtifactInfo artifactInfo = IndexUtils.constructArtifactInfo( doc, context );
225 
226                         if ( artifactInfo != null )
227                         {
228                             artifactInfo.setRepository( context.getRepositoryId() );
229                             artifactInfo.setContext( context.getId() );
230 
231                             if ( req.getArtifactInfoFilter() != null )
232                             {
233                                 if ( !req.getArtifactInfoFilter().accepts( context, artifactInfo ) )
234                                 {
235                                     continue;
236                                 }
237                             }
238                             if ( req.getArtifactInfoPostprocessor() != null )
239                             {
240                                 req.getArtifactInfoPostprocessor().postprocess( context, artifactInfo );
241                             }
242 
243                             if ( !grouping.addArtifactInfo( result, artifactInfo ) )
244                             {
245                                 // fix the hitCount accordingly
246                                 hitCount--;
247                             }
248                         }
249                     }
250                 }
251             }
252             finally
253             {
254                 context.releaseIndexSearcher( indexSearcher );
255             }
256         }
257 
258         return hitCount;
259     }
260 
261     // == NG Search
262 
263     public IteratorSearchResponse searchIteratorPaged( IteratorSearchRequest request,
264                                                        Collection<IndexingContext> indexingContexts )
265         throws IOException
266     {
267         return searchIteratorPaged( request, indexingContexts, false );
268     }
269 
270     public IteratorSearchResponse forceSearchIteratorPaged( IteratorSearchRequest request,
271                                                             Collection<IndexingContext> indexingContexts )
272         throws IOException
273     {
274         return searchIteratorPaged( request, indexingContexts, true );
275     }
276 
277     private IteratorSearchResponse searchIteratorPaged( IteratorSearchRequest request,
278                                                         Collection<IndexingContext> indexingContexts,
279                                                         boolean ignoreContext )
280         throws IOException
281     {
282         List<IndexingContext> contexts = getParticipatingContexts( indexingContexts, ignoreContext );
283 
284         NexusIndexMultiReader multiReader = getMergedIndexReader( indexingContexts, ignoreContext );
285 
286         NexusIndexMultiSearcher indexSearcher = new NexusIndexMultiSearcher( multiReader );
287 
288         try
289         {
290             TopScoreDocCollector hits = doSearchWithCeiling( request, indexSearcher, request.getQuery() );
291 
292             return new IteratorSearchResponse( request.getQuery(), hits.getTotalHits(),
293                                                new DefaultIteratorResultSet( request, indexSearcher, contexts,
294                                                                              hits.topDocs() ) );
295         }
296         catch ( IOException e )
297         {
298             try
299             {
300                 indexSearcher.release();
301             }
302             catch ( Exception secondary )
303             {
304                 // do not mask original exception
305             }
306             throw e;
307         }
308         catch ( RuntimeException e )
309         {
310             try
311             {
312                 indexSearcher.release();
313             }
314             catch ( Exception secondary )
315             {
316                 // do not mask original exception
317             }
318             throw e;
319         }
320     }
321 
322     // ==
323 
324     protected TopScoreDocCollector doSearchWithCeiling( final AbstractSearchRequest request,
325                                                         final IndexSearcher indexSearcher, final Query query )
326         throws IOException
327     {
328         int topHitCount = getTopDocsCollectorHitNum( request, AbstractSearchRequest.UNDEFINED );
329 
330         if ( AbstractSearchRequest.UNDEFINED != topHitCount )
331         {
332             // count is set, simply just execute it as-is
333             final TopScoreDocCollector hits = TopScoreDocCollector.create( topHitCount );
334 
335             indexSearcher.search( query, hits );
336 
337             return hits;
338         }
339         else
340         {
341             // set something reasonable as 1k
342             topHitCount = 1000;
343 
344             // perform search
345             TopScoreDocCollector hits = TopScoreDocCollector.create( topHitCount );
346             indexSearcher.search( query, hits );
347 
348             // check total hits against, does it fit?
349             if ( topHitCount < hits.getTotalHits() )
350             {
351                 topHitCount = hits.getTotalHits();
352 
353                 if ( getLogger().isDebugEnabled() )
354                 {
355                     // warn the user and leave trace just before OOM might happen
356                     // the hits.getTotalHits() might be HUUGE
357                     getLogger().debug(
358                         "Executing unbounded search, and fitting topHitCounts to " + topHitCount
359                         + ", an OOMEx might follow. To avoid OOM use narrower queries or limit your expectancy with "
360                         + "request.setCount() method where appropriate. See MINDEXER-14 for details." );
361                 }
362 
363                 // redo all, but this time with correct numbers
364                 hits = TopScoreDocCollector.create( topHitCount );
365                 indexSearcher.search( query, hits );
366             }
367 
368             return hits;
369         }
370     }
371 
372     /**
373      * Returns the list of participating contexts. Does not locks them, just builds a list of them.
374      */
375     protected List<IndexingContext> getParticipatingContexts( final Collection<IndexingContext> indexingContexts,
376                                                               final boolean ignoreContext )
377     {
378         // to not change the API all away, but we need stable ordering here
379         // filter for those 1st, that take part in here
380         final ArrayList<IndexingContext> contexts = new ArrayList<IndexingContext>( indexingContexts.size() );
381 
382         for ( IndexingContext ctx : indexingContexts )
383         {
384             if ( ignoreContext || ctx.isSearchable() )
385             {
386                 contexts.add( ctx );
387             }
388         }
389 
390         return contexts;
391     }
392 
393     /**
394      * Locks down participating contexts, and returns a "merged" reader of them. In case of error, unlocks as part of
395      * cleanup and re-throws exception. Without error, it is the duty of caller to unlock contexts!
396      * 
397      * @param indexingContexts
398      * @param ignoreContext
399      * @return
400      * @throws IOException
401      */
402     protected NexusIndexMultiReader getMergedIndexReader( final Collection<IndexingContext> indexingContexts,
403                                                           final boolean ignoreContext )
404         throws IOException
405     {
406         final List<IndexingContext> contexts = getParticipatingContexts( indexingContexts, ignoreContext );
407         return new NexusIndexMultiReader( contexts );
408     }
409 
410     protected int getTopDocsCollectorHitNum( final AbstractSearchRequest request, final int ceiling )
411     {
412         if ( request instanceof AbstractSearchPageableRequest )
413         {
414             final AbstractSearchPageableRequest prequest = (AbstractSearchPageableRequest) request;
415 
416             if ( AbstractSearchRequest.UNDEFINED != prequest.getCount() )
417             {
418                 // easy, user knows and tells us how many results he want
419                 return prequest.getCount() + prequest.getStart();
420             }
421         }
422         else
423         {
424             if ( AbstractSearchRequest.UNDEFINED != request.getCount() )
425             {
426                 // easy, user knows and tells us how many results he want
427                 return request.getCount();
428             }
429         }
430 
431         return ceiling;
432     }
433 }