1 package org.apache.maven.index; 2 3 /* 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 */ 21 22 import java.io.File; 23 import java.io.IOException; 24 import java.util.Arrays; 25 import java.util.HashSet; 26 27 import junit.framework.Assert; 28 29 import org.apache.lucene.search.Query; 30 import org.apache.lucene.store.Directory; 31 import org.apache.lucene.store.RAMDirectory; 32 import org.apache.maven.index.context.IndexingContext; 33 import org.apache.maven.index.expr.SourcedSearchExpression; 34 35 public class DuplicateSearchTest 36 extends AbstractNexusIndexerTest 37 { 38 protected File repo = new File( getBasedir(), "src/test/repo" ); 39 40 protected IndexingContext context1; 41 42 protected Directory contextDir1 = new RAMDirectory(); 43 44 protected IndexingContext context2; 45 46 protected Directory contextDir2 = new RAMDirectory(); 47 48 @Override 49 protected void prepareNexusIndexer( NexusIndexer nexusIndexer ) 50 throws Exception 51 { 52 // we have a context with ID "repo1-ctx" that contains index of repository with ID "repo1" 53 context = nexusIndexer.addIndexingContext( "repo1-ctx", "repo1", repo, indexDir, null, null, FULL_CREATORS ); 54 // we have a context with ID "repo2-ctx" that contains index of repository with ID "repo2" 55 context1 = nexusIndexer.addIndexingContext( "repo2-ctx", "repo2", repo, contextDir1, null, null, FULL_CREATORS ); 56 // we have a context with ID "repo3-ctx" that contains index of repository with ID "repo2" 57 context2 = nexusIndexer.addIndexingContext( "repo3-ctx", "repo2", repo, contextDir2, null, null, FULL_CREATORS ); 58 59 // note: those three contexts, while representing different entities are actually indexing the same repository 60 // directory, hence, will have exactly same content! Also, context1 and context2 do say, they both index 61 // repository with ID "repo2"! 62 63 nexusIndexer.scan( context ); 64 nexusIndexer.scan( context1 ); 65 nexusIndexer.scan( context2 ); 66 67 assertNotNull( context.getTimestamp() ); 68 assertNotNull( context1.getTimestamp() ); 69 assertNotNull( context2.getTimestamp() ); 70 } 71 72 // a bit of explanation: 73 // we focus on a G "org.slf4j". The given section (subdir tree) looks like this (simplified): 74 // ├── org 75 // ├── slf4j 76 // ├── jcl104-over-slf4j 77 // │ └── 1.4.2 78 // │ ├── jcl104-over-slf4j-1.4.2-sources.jar 79 // │ ├── jcl104-over-slf4j-1.4.2-sources.jar.sha1 80 // │ ├── jcl104-over-slf4j-1.4.2.jar 81 // │ ├── jcl104-over-slf4j-1.4.2.jar.sha1 82 // │ ├── jcl104-over-slf4j-1.4.2.pom 83 // │ └── jcl104-over-slf4j-1.4.2.pom.sha1 84 // ├── slf4j-api 85 // │ ├── 1.4.1 86 // │ │ ├── slf4j-api-1.4.1-sources.jar 87 // │ │ ├── slf4j-api-1.4.1-sources.jar.sha1 88 // │ │ ├── slf4j-api-1.4.1.jar 89 // │ │ ├── slf4j-api-1.4.1.jar.sha1 90 // │ │ ├── slf4j-api-1.4.1.pom 91 // │ │ └── slf4j-api-1.4.1.pom.sha1 92 // │ └── 1.4.2 93 // │ ├── slf4j-api-1.4.2-sources.jar 94 // │ ├── slf4j-api-1.4.2-sources.jar.sha1 95 // │ ├── slf4j-api-1.4.2.jar 96 // │ ├── slf4j-api-1.4.2.jar.sha1 97 // │ ├── slf4j-api-1.4.2.pom 98 // │ └── slf4j-api-1.4.2.pom.sha1 99 // └── slf4j-log4j12 100 // └── 1.4.1 101 // ├── slf4j-log4j12-1.4.1-bin.tar.gz 102 // ├── slf4j-log4j12-1.4.1-bin.zip 103 // ├── slf4j-log4j12-1.4.1-sources.jar 104 // ├── slf4j-log4j12-1.4.1-sources.jar.sha1 105 // ├── slf4j-log4j12-1.4.1.jar 106 // ├── slf4j-log4j12-1.4.1.jar.sha1 107 // ├── slf4j-log4j12-1.4.1.pom 108 // └── slf4j-log4j12-1.4.1.pom.sha1 109 // 110 // Records on index are created as: each main and each "classified" artifact is one Document. 111 // Meaning, with structure above, for groupId "org.slf4j" we have 10 records: 112 // G:A:V 113 // org.slf4j:jcl104-over-slf4j:1.4.2:jar 114 // org.slf4j:jcl104-over-slf4j:1.4.2:jar:sources 115 // org.slf4j:slf4j-api:1.4.1:jar 116 // org.slf4j:slf4j-api:1.4.1:jar:sources 117 // org.slf4j:slf4j-api:1.4.2:jar 118 // org.slf4j:slf4j-api:1.4.2:jar:sources 119 // org.slf4j:slf4j-log4j12:1.4.1:jar 120 // org.slf4j:slf4j-log4j12:1.4.1:jar:sources 121 // org.slf4j:slf4j-log4j12:1.4.1:zip:bin 122 // org.slf4j:slf4j-log4j12:1.4.1:tar.gz:bin 123 // 124 // ArtifactInfo, along with GAV carries contextId and repositoryId too! 125 126 public void testProveSvnRev1158917IsWrong() 127 throws IOException 128 { 129 // change is SVN Rev1158917 (http://svn.apache.org/viewvc?view=revision&revision=1158917) is wrong (and is 130 // undone) 131 // because after removing it, we still dont have GAV dupes in results, here is a proof: 132 133 Query query = nexusIndexer.constructQuery( MAVEN.GROUP_ID, new SourcedSearchExpression( "org.slf4j" ) ); 134 FlatSearchRequest fsReq = new FlatSearchRequest( query ); 135 fsReq.getContexts().add( context ); 136 fsReq.getContexts().add( context1 ); 137 fsReq.getContexts().add( context2 ); 138 139 FlatSearchResponse fsResp = nexusIndexer.searchFlat( fsReq ); 140 141 Assert.assertEquals( "We have 10 GAVs coming from three contextes", 10, fsResp.getResults().size() ); 142 143 // Why? Look at the FlatSearchRequest default comparator it uses, it is ArtifactInfo.VERSION_COMPARATOR 144 // that neglects contextId and repositoryId and compares GAVs only, and the Collection fixed in SVN Rev1158917 145 // is actually a Set<ArtifactInfo with proper comparator set. 146 } 147 148 public void testHowUniqueSearchShouldBeDone() 149 throws IOException 150 { 151 // my use case: I am searching for duplicates in given two contexts belonging to given groupId "org.slf4j" 152 // I expect to find intersection of two reposes, since both of those indexes/reposes contains that 153 154 Query query = nexusIndexer.constructQuery( MAVEN.GROUP_ID, new SourcedSearchExpression( "org.slf4j" ) ); 155 156 FlatSearchRequest fsReq = new FlatSearchRequest( query ); 157 fsReq.setArtifactInfoComparator( ArtifactInfo.CONTEXT_VERSION_COMPARATOR ); 158 fsReq.getContexts().add( context ); 159 fsReq.getContexts().add( context1 ); 160 fsReq.getContexts().add( context2 ); 161 162 FlatSearchResponse fsResp = nexusIndexer.searchFlat( fsReq ); 163 164 Assert.assertEquals( "We have 10 GAVs coming from three contextes, it is 30", 30, fsResp.getResults().size() ); 165 166 // Why? We set explicitly the comparator to CONTEXT_VERSION_COMPARATOR, that compares GAV+contextId, hence, 167 // will return all hits from all participating contexts. 168 } 169 170 public void testHowtoPerformAggregatedSearch() 171 throws IOException 172 { 173 // Note: currently this is implemented for IteratorSearches only! TBD for Flat and Grouped searches 174 175 // my use case: searching across multiple contexts, querying how many combinations of GAs exists in groupId 176 // "org.slf4j". 177 178 Query query = nexusIndexer.constructQuery( MAVEN.GROUP_ID, new SourcedSearchExpression( "org.slf4j" ) ); 179 180 IteratorSearchRequest isReq = new IteratorSearchRequest( query ); 181 182 // so, how many different GA combinations exists, this is almost equal to SQLs group by "groupId, artifactId" 183 isReq.setArtifactInfoFilter( new UniqueArtifactFilterPostprocessor( new HashSet<Field>( Arrays.asList( 184 MAVEN.GROUP_ID, MAVEN.ARTIFACT_ID ) ) ) ); 185 isReq.getContexts().add( context ); 186 isReq.getContexts().add( context1 ); 187 isReq.getContexts().add( context2 ); 188 189 // Note: iteratorSearch is completely different beast that flat or grouped searches. While it excels in 190 // low memory consumption and extra features (like presented here), it needs special care: you have to handle it 191 // as resource, since lazy loading requires context locking, and if you forget to do so, you will end up with a 192 // flaky 193 // application that will most probably fail (by deadlocking itself or thrashing indexes). 194 195 IteratorSearchResponse isResp = null; 196 int actualResultCount = 0; 197 198 try 199 { 200 isResp = nexusIndexer.searchIterator( isReq ); 201 202 // consume the iterator to count actual result set size 203 for ( ArtifactInfo ai : isResp ) 204 { 205 actualResultCount++; 206 } 207 } 208 finally 209 { 210 if ( isResp != null ) 211 { 212 isResp.close(); 213 } 214 } 215 216 Assert.assertEquals( "Iterator delivered to us 3 results, since we have 3 GA combinations", 3, 217 actualResultCount ); 218 Assert.assertEquals( 219 "IteratorSearch is strange beast, due to it's nature, it cannot say how many elements it (will) return in advance, due to filtering, postprocessing, etc", 220 -1, isResp.getReturnedHitsCount() ); 221 Assert.assertEquals( 222 "The processing/search tackled 10 GAVs coming from three contextes, it is 30. This is the record count that were hit by processing of this search, but IS NOT the count results (it depends on filtering, comparators, etc)!", 223 30, isResp.getTotalHitsCount() ); 224 } 225 }