View Javadoc

1   package org.apache.maven.index;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.File;
23  import java.io.IOException;
24  import java.util.Arrays;
25  import java.util.HashSet;
26  
27  import junit.framework.Assert;
28  
29  import org.apache.lucene.search.Query;
30  import org.apache.lucene.store.Directory;
31  import org.apache.lucene.store.RAMDirectory;
32  import org.apache.maven.index.context.IndexingContext;
33  import org.apache.maven.index.expr.SourcedSearchExpression;
34  
35  public class DuplicateSearchTest
36      extends AbstractNexusIndexerTest
37  {
38      protected File repo = new File( getBasedir(), "src/test/repo" );
39  
40      protected IndexingContext context1;
41  
42      protected Directory contextDir1 = new RAMDirectory();
43  
44      protected IndexingContext context2;
45  
46      protected Directory contextDir2 = new RAMDirectory();
47  
48      @Override
49      protected void prepareNexusIndexer( NexusIndexer nexusIndexer )
50          throws Exception
51      {
52          // we have a context with ID "repo1-ctx" that contains index of repository with ID "repo1"
53          context = nexusIndexer.addIndexingContext( "repo1-ctx", "repo1", repo, indexDir, null, null, FULL_CREATORS );
54          // we have a context with ID "repo2-ctx" that contains index of repository with ID "repo2"
55          context1 = nexusIndexer.addIndexingContext( "repo2-ctx", "repo2", repo, contextDir1, null, null, FULL_CREATORS );
56          // we have a context with ID "repo3-ctx" that contains index of repository with ID "repo2"
57          context2 = nexusIndexer.addIndexingContext( "repo3-ctx", "repo2", repo, contextDir2, null, null, FULL_CREATORS );
58  
59          // note: those three contexts, while representing different entities are actually indexing the same repository
60          // directory, hence, will have exactly same content! Also, context1 and context2 do say, they both index
61          // repository with ID "repo2"!
62  
63          nexusIndexer.scan( context );
64          nexusIndexer.scan( context1 );
65          nexusIndexer.scan( context2 );
66  
67          assertNotNull( context.getTimestamp() );
68          assertNotNull( context1.getTimestamp() );
69          assertNotNull( context2.getTimestamp() );
70      }
71  
72      // a bit of explanation:
73      // we focus on a G "org.slf4j". The given section (subdir tree) looks like this (simplified):
74      // ├── org
75      //    ├── slf4j
76      //       ├── jcl104-over-slf4j
77      //       │   └── 1.4.2
78      //       │   ├── jcl104-over-slf4j-1.4.2-sources.jar
79      //       │   ├── jcl104-over-slf4j-1.4.2-sources.jar.sha1
80      //       │   ├── jcl104-over-slf4j-1.4.2.jar
81      //       │   ├── jcl104-over-slf4j-1.4.2.jar.sha1
82      //       │   ├── jcl104-over-slf4j-1.4.2.pom
83      //       │   └── jcl104-over-slf4j-1.4.2.pom.sha1
84      //       ├── slf4j-api
85      //       │   ├── 1.4.1
86      //       │   │   ├── slf4j-api-1.4.1-sources.jar
87      //       │   │   ├── slf4j-api-1.4.1-sources.jar.sha1
88      //       │   │   ├── slf4j-api-1.4.1.jar
89      //       │   │   ├── slf4j-api-1.4.1.jar.sha1
90      //       │   │   ├── slf4j-api-1.4.1.pom
91      //       │   │   └── slf4j-api-1.4.1.pom.sha1
92      //       │   └── 1.4.2
93      //       │   ├── slf4j-api-1.4.2-sources.jar
94      //       │   ├── slf4j-api-1.4.2-sources.jar.sha1
95      //       │   ├── slf4j-api-1.4.2.jar
96      //       │   ├── slf4j-api-1.4.2.jar.sha1
97      //       │   ├── slf4j-api-1.4.2.pom
98      //       │   └── slf4j-api-1.4.2.pom.sha1
99      //       └── slf4j-log4j12
100     //       └── 1.4.1
101     //       ├── slf4j-log4j12-1.4.1-bin.tar.gz
102     //       ├── slf4j-log4j12-1.4.1-bin.zip
103     //       ├── slf4j-log4j12-1.4.1-sources.jar
104     //       ├── slf4j-log4j12-1.4.1-sources.jar.sha1
105     //       ├── slf4j-log4j12-1.4.1.jar
106     //       ├── slf4j-log4j12-1.4.1.jar.sha1
107     //       ├── slf4j-log4j12-1.4.1.pom
108     //       └── slf4j-log4j12-1.4.1.pom.sha1
109     //
110     // Records on index are created as: each main and each "classified" artifact is one Document.
111     // Meaning, with structure above, for groupId "org.slf4j" we have 10 records:
112     // G:A:V
113     // org.slf4j:jcl104-over-slf4j:1.4.2:jar
114     // org.slf4j:jcl104-over-slf4j:1.4.2:jar:sources
115     // org.slf4j:slf4j-api:1.4.1:jar
116     // org.slf4j:slf4j-api:1.4.1:jar:sources
117     // org.slf4j:slf4j-api:1.4.2:jar
118     // org.slf4j:slf4j-api:1.4.2:jar:sources
119     // org.slf4j:slf4j-log4j12:1.4.1:jar
120     // org.slf4j:slf4j-log4j12:1.4.1:jar:sources
121     // org.slf4j:slf4j-log4j12:1.4.1:zip:bin
122     // org.slf4j:slf4j-log4j12:1.4.1:tar.gz:bin
123     //
124     // ArtifactInfo, along with GAV carries contextId and repositoryId too!
125 
126     public void testProveSvnRev1158917IsWrong()
127         throws IOException
128     {
129         // change is SVN Rev1158917 (http://svn.apache.org/viewvc?view=revision&revision=1158917) is wrong (and is
130         // undone)
131         // because after removing it, we still dont have GAV dupes in results, here is a proof:
132 
133         Query query = nexusIndexer.constructQuery( MAVEN.GROUP_ID, new SourcedSearchExpression( "org.slf4j" ) );
134         FlatSearchRequest fsReq = new FlatSearchRequest( query );
135         fsReq.getContexts().add( context );
136         fsReq.getContexts().add( context1 );
137         fsReq.getContexts().add( context2 );
138 
139         FlatSearchResponse fsResp = nexusIndexer.searchFlat( fsReq );
140 
141         Assert.assertEquals( "We have 10 GAVs coming from three contextes", 10, fsResp.getResults().size() );
142 
143         // Why? Look at the FlatSearchRequest default comparator it uses, it is ArtifactInfo.VERSION_COMPARATOR
144         // that neglects contextId and repositoryId and compares GAVs only, and the Collection fixed in SVN Rev1158917
145         // is actually a Set<ArtifactInfo with proper comparator set.
146     }
147 
148     public void testHowUniqueSearchShouldBeDone()
149         throws IOException
150     {
151         // my use case: I am searching for duplicates in given two contexts belonging to given groupId "org.slf4j"
152         // I expect to find intersection of two reposes, since both of those indexes/reposes contains that
153 
154         Query query = nexusIndexer.constructQuery( MAVEN.GROUP_ID, new SourcedSearchExpression( "org.slf4j" ) );
155 
156         FlatSearchRequest fsReq = new FlatSearchRequest( query );
157         fsReq.setArtifactInfoComparator( ArtifactInfo.CONTEXT_VERSION_COMPARATOR );
158         fsReq.getContexts().add( context );
159         fsReq.getContexts().add( context1 );
160         fsReq.getContexts().add( context2 );
161 
162         FlatSearchResponse fsResp = nexusIndexer.searchFlat( fsReq );
163 
164         Assert.assertEquals( "We have 10 GAVs coming from three contextes, it is 30", 30, fsResp.getResults().size() );
165 
166         // Why? We set explicitly the comparator to CONTEXT_VERSION_COMPARATOR, that compares GAV+contextId, hence,
167         // will return all hits from all participating contexts.
168     }
169 
170     public void testHowtoPerformAggregatedSearch()
171         throws IOException
172     {
173         // Note: currently this is implemented for IteratorSearches only! TBD for Flat and Grouped searches
174 
175         // my use case: searching across multiple contexts, querying how many combinations of GAs exists in groupId
176         // "org.slf4j".
177 
178         Query query = nexusIndexer.constructQuery( MAVEN.GROUP_ID, new SourcedSearchExpression( "org.slf4j" ) );
179 
180         IteratorSearchRequest isReq = new IteratorSearchRequest( query );
181 
182         // so, how many different GA combinations exists, this is almost equal to SQLs group by "groupId, artifactId"
183         isReq.setArtifactInfoFilter( new UniqueArtifactFilterPostprocessor( new HashSet<Field>( Arrays.asList(
184             MAVEN.GROUP_ID, MAVEN.ARTIFACT_ID ) ) ) );
185         isReq.getContexts().add( context );
186         isReq.getContexts().add( context1 );
187         isReq.getContexts().add( context2 );
188 
189         // Note: iteratorSearch is completely different beast that flat or grouped searches. While it excels in
190         // low memory consumption and extra features (like presented here), it needs special care: you have to handle it
191         // as resource, since lazy loading requires context locking, and if you forget to do so, you will end up with a
192         // flaky
193         // application that will most probably fail (by deadlocking itself or thrashing indexes).
194 
195         IteratorSearchResponse isResp = null;
196         int actualResultCount = 0;
197 
198         try
199         {
200             isResp = nexusIndexer.searchIterator( isReq );
201 
202             // consume the iterator to count actual result set size
203             for ( ArtifactInfo ai : isResp )
204             {
205                 actualResultCount++;
206             }
207         }
208         finally
209         {
210             if ( isResp != null )
211             {
212                 isResp.close();
213             }
214         }
215 
216         Assert.assertEquals( "Iterator delivered to us 3 results, since we have 3 GA combinations", 3,
217             actualResultCount );
218         Assert.assertEquals(
219             "IteratorSearch is strange beast, due to it's nature, it cannot say how many elements it (will) return in advance, due to filtering, postprocessing, etc",
220             -1, isResp.getReturnedHitsCount() );
221         Assert.assertEquals(
222             "The processing/search tackled 10 GAVs coming from three contextes, it is 30. This is the record count that were hit by processing of this search, but IS NOT the count results (it depends on filtering, comparators, etc)!",
223             30, isResp.getTotalHitsCount() );
224     }
225 }