001 package org.apache.archiva.reports.consumers; 002 003 /* 004 * Licensed to the Apache Software Foundation (ASF) under one 005 * or more contributor license agreements. See the NOTICE file 006 * distributed with this work for additional information 007 * regarding copyright ownership. The ASF licenses this file 008 * to you under the Apache License, Version 2.0 (the 009 * "License"); you may not use this file except in compliance 010 * with the License. You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, 015 * software distributed under the License is distributed on an 016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 * KIND, either express or implied. See the License for the 018 * specific language governing permissions and limitations 019 * under the License. 020 */ 021 022 import org.apache.archiva.admin.model.beans.ManagedRepository; 023 import org.apache.archiva.checksum.ChecksumAlgorithm; 024 import org.apache.archiva.checksum.ChecksummedFile; 025 import org.apache.archiva.configuration.ArchivaConfiguration; 026 import org.apache.archiva.configuration.ConfigurationNames; 027 import org.apache.archiva.configuration.FileTypes; 028 import org.apache.archiva.consumers.AbstractMonitoredConsumer; 029 import org.apache.archiva.consumers.ConsumerException; 030 import org.apache.archiva.consumers.KnownRepositoryContentConsumer; 031 import org.apache.archiva.metadata.model.ArtifactMetadata; 032 import org.apache.archiva.metadata.repository.MetadataRepository; 033 import org.apache.archiva.metadata.repository.MetadataRepositoryException; 034 import org.apache.archiva.metadata.repository.RepositorySession; 035 import org.apache.archiva.metadata.repository.RepositorySessionFactory; 036 import org.apache.archiva.metadata.repository.storage.RepositoryPathTranslator; 037 import org.apache.archiva.redback.components.registry.Registry; 038 import org.apache.archiva.redback.components.registry.RegistryListener; 039 import org.apache.archiva.reports.RepositoryProblemFacet; 040 import org.apache.commons.collections.CollectionUtils; 041 import org.slf4j.Logger; 042 import org.slf4j.LoggerFactory; 043 import org.springframework.context.annotation.Scope; 044 import org.springframework.stereotype.Service; 045 046 import javax.annotation.PostConstruct; 047 import javax.inject.Inject; 048 import javax.inject.Named; 049 import java.io.File; 050 import java.io.IOException; 051 import java.util.ArrayList; 052 import java.util.Collections; 053 import java.util.Date; 054 import java.util.List; 055 056 /** 057 * Search the artifact repository of known SHA1 Checksums for potential duplicate artifacts. 058 * <p/> 059 * TODO: no need for this to be a scanner - we can just query the database / content repository to get a full list 060 */ 061 @Service ( "knownRepositoryContentConsumer#duplicate-artifacts" ) 062 @Scope ( "prototype" ) 063 public class DuplicateArtifactsConsumer 064 extends AbstractMonitoredConsumer 065 implements KnownRepositoryContentConsumer, RegistryListener 066 { 067 private Logger log = LoggerFactory.getLogger( DuplicateArtifactsConsumer.class ); 068 069 private String id = "duplicate-artifacts"; 070 071 private String description = "Check for Duplicate Artifacts via SHA1 Checksums"; 072 073 @Inject 074 private ArchivaConfiguration configuration; 075 076 @Inject 077 private FileTypes filetypes; 078 079 /** 080 * FIXME: this could be multiple implementations and needs to be configured. 081 */ 082 @Inject 083 private RepositorySessionFactory repositorySessionFactory; 084 085 private List<String> includes = new ArrayList<String>(); 086 087 private File repositoryDir; 088 089 private String repoId; 090 091 /** 092 * FIXME: needs to be selected based on the repository in question 093 */ 094 @Inject 095 @Named ( value = "repositoryPathTranslator#maven2" ) 096 private RepositoryPathTranslator pathTranslator; 097 098 private RepositorySession repositorySession; 099 100 public String getId() 101 { 102 return id; 103 } 104 105 public String getDescription() 106 { 107 return description; 108 } 109 110 public boolean isPermanent() 111 { 112 return false; 113 } 114 115 public List<String> getIncludes() 116 { 117 return includes; 118 } 119 120 public List<String> getExcludes() 121 { 122 return Collections.emptyList(); 123 } 124 125 public void beginScan( ManagedRepository repo, Date whenGathered ) 126 throws ConsumerException 127 { 128 repoId = repo.getId(); 129 this.repositoryDir = new File( repo.getLocation() ); 130 repositorySession = repositorySessionFactory.createSession(); 131 } 132 133 public void beginScan( ManagedRepository repo, Date whenGathered, boolean executeOnEntireRepo ) 134 throws ConsumerException 135 { 136 beginScan( repo, whenGathered ); 137 } 138 139 public void processFile( String path ) 140 throws ConsumerException 141 { 142 File artifactFile = new File( this.repositoryDir, path ); 143 144 // TODO: would be quicker to somehow make sure it ran after the update database consumer, or as a part of that 145 // perhaps could use an artifact context that is retained for all consumers? First in can set the SHA-1 146 // alternatively this could come straight from the storage resolver, which could populate the artifact metadata 147 // in the later parse call with the desired checksum and use that 148 String checksumSha1; 149 ChecksummedFile checksummedFile = new ChecksummedFile( artifactFile ); 150 try 151 { 152 checksumSha1 = checksummedFile.calculateChecksum( ChecksumAlgorithm.SHA1 ); 153 } 154 catch ( IOException e ) 155 { 156 throw new ConsumerException( e.getMessage(), e ); 157 } 158 159 MetadataRepository metadataRepository = repositorySession.getRepository(); 160 161 List<ArtifactMetadata> results; 162 try 163 { 164 results = metadataRepository.getArtifactsByChecksum( repoId, checksumSha1 ); 165 } 166 catch ( MetadataRepositoryException e ) 167 { 168 repositorySession.close(); 169 throw new ConsumerException( e.getMessage(), e ); 170 } 171 172 if ( CollectionUtils.isNotEmpty( results ) ) 173 { 174 ArtifactMetadata originalArtifact; 175 try 176 { 177 originalArtifact = pathTranslator.getArtifactForPath( repoId, path ); 178 } 179 catch ( Exception e ) 180 { 181 log.warn( "Not reporting problem for invalid artifact in checksum check: " + e.getMessage() ); 182 return; 183 } 184 185 for ( ArtifactMetadata dupArtifact : results ) 186 { 187 String id = path.substring( path.lastIndexOf( '/' ) + 1 ); 188 if ( dupArtifact.getId().equals( id ) && dupArtifact.getNamespace().equals( 189 originalArtifact.getNamespace() ) && dupArtifact.getProject().equals( 190 originalArtifact.getProject() ) && dupArtifact.getVersion().equals( 191 originalArtifact.getVersion() ) ) 192 { 193 // Skip reference to itself. 194 195 log.debug( "Not counting duplicate for artifact {} for path {}", dupArtifact, path ); 196 197 continue; 198 } 199 200 RepositoryProblemFacet problem = new RepositoryProblemFacet(); 201 problem.setRepositoryId( repoId ); 202 problem.setNamespace( originalArtifact.getNamespace() ); 203 problem.setProject( originalArtifact.getProject() ); 204 problem.setVersion( originalArtifact.getVersion() ); 205 problem.setId( id ); 206 // FIXME: need to get the right storage resolver for the repository the dupe artifact is in, it might be 207 // a different type 208 // FIXME: we need the project version here, not the artifact version 209 problem.setMessage( "Duplicate Artifact Detected: " + path + " <--> " + pathTranslator.toPath( 210 dupArtifact.getNamespace(), dupArtifact.getProject(), dupArtifact.getVersion(), 211 dupArtifact.getId() ) ); 212 problem.setProblem( "duplicate-artifact" ); 213 214 try 215 { 216 metadataRepository.addMetadataFacet( repoId, problem ); 217 } 218 catch ( MetadataRepositoryException e ) 219 { 220 throw new ConsumerException( e.getMessage(), e ); 221 } 222 } 223 } 224 } 225 226 public void processFile( String path, boolean executeOnEntireRepo ) 227 throws ConsumerException 228 { 229 processFile( path ); 230 } 231 232 public void completeScan() 233 { 234 repositorySession.close(); 235 } 236 237 public void completeScan( boolean executeOnEntireRepo ) 238 { 239 completeScan(); 240 } 241 242 public void afterConfigurationChange( Registry registry, String propertyName, Object propertyValue ) 243 { 244 if ( ConfigurationNames.isRepositoryScanning( propertyName ) ) 245 { 246 initIncludes(); 247 } 248 } 249 250 public void beforeConfigurationChange( Registry registry, String propertyName, Object propertyValue ) 251 { 252 /* do nothing */ 253 } 254 255 private void initIncludes() 256 { 257 includes.clear(); 258 259 includes.addAll( filetypes.getFileTypePatterns( FileTypes.ARTIFACTS ) ); 260 } 261 262 @PostConstruct 263 public void initialize() 264 { 265 initIncludes(); 266 configuration.addChangeListener( this ); 267 } 268 }