001    package org.apache.archiva.reports.consumers;
002    
003    /*
004     * Licensed to the Apache Software Foundation (ASF) under one
005     * or more contributor license agreements.  See the NOTICE file
006     * distributed with this work for additional information
007     * regarding copyright ownership.  The ASF licenses this file
008     * to you under the Apache License, Version 2.0 (the
009     * "License"); you may not use this file except in compliance
010     * with the License.  You may obtain a copy of the License at
011     *
012     *  http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing,
015     * software distributed under the License is distributed on an
016     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017     * KIND, either express or implied.  See the License for the
018     * specific language governing permissions and limitations
019     * under the License.
020     */
021    
022    import org.apache.archiva.admin.model.beans.ManagedRepository;
023    import org.apache.archiva.checksum.ChecksumAlgorithm;
024    import org.apache.archiva.checksum.ChecksummedFile;
025    import org.apache.archiva.configuration.ArchivaConfiguration;
026    import org.apache.archiva.configuration.ConfigurationNames;
027    import org.apache.archiva.configuration.FileTypes;
028    import org.apache.archiva.consumers.AbstractMonitoredConsumer;
029    import org.apache.archiva.consumers.ConsumerException;
030    import org.apache.archiva.consumers.KnownRepositoryContentConsumer;
031    import org.apache.archiva.metadata.model.ArtifactMetadata;
032    import org.apache.archiva.metadata.repository.MetadataRepository;
033    import org.apache.archiva.metadata.repository.MetadataRepositoryException;
034    import org.apache.archiva.metadata.repository.RepositorySession;
035    import org.apache.archiva.metadata.repository.RepositorySessionFactory;
036    import org.apache.archiva.metadata.repository.storage.RepositoryPathTranslator;
037    import org.apache.archiva.redback.components.registry.Registry;
038    import org.apache.archiva.redback.components.registry.RegistryListener;
039    import org.apache.archiva.reports.RepositoryProblemFacet;
040    import org.apache.commons.collections.CollectionUtils;
041    import org.slf4j.Logger;
042    import org.slf4j.LoggerFactory;
043    import org.springframework.context.annotation.Scope;
044    import org.springframework.stereotype.Service;
045    
046    import javax.annotation.PostConstruct;
047    import javax.inject.Inject;
048    import javax.inject.Named;
049    import java.io.File;
050    import java.io.IOException;
051    import java.util.ArrayList;
052    import java.util.Collections;
053    import java.util.Date;
054    import java.util.List;
055    
056    /**
057     * Search the artifact repository of known SHA1 Checksums for potential duplicate artifacts.
058     * <p/>
059     * TODO: no need for this to be a scanner - we can just query the database / content repository to get a full list
060     */
061    @Service ( "knownRepositoryContentConsumer#duplicate-artifacts" )
062    @Scope ( "prototype" )
063    public class DuplicateArtifactsConsumer
064        extends AbstractMonitoredConsumer
065        implements KnownRepositoryContentConsumer, RegistryListener
066    {
067        private Logger log = LoggerFactory.getLogger( DuplicateArtifactsConsumer.class );
068    
069        private String id = "duplicate-artifacts";
070    
071        private String description = "Check for Duplicate Artifacts via SHA1 Checksums";
072    
073        @Inject
074        private ArchivaConfiguration configuration;
075    
076        @Inject
077        private FileTypes filetypes;
078    
079        /**
080         * FIXME: this could be multiple implementations and needs to be configured.
081         */
082        @Inject
083        private RepositorySessionFactory repositorySessionFactory;
084    
085        private List<String> includes = new ArrayList<String>();
086    
087        private File repositoryDir;
088    
089        private String repoId;
090    
091        /**
092         * FIXME: needs to be selected based on the repository in question
093         */
094        @Inject
095        @Named ( value = "repositoryPathTranslator#maven2" )
096        private RepositoryPathTranslator pathTranslator;
097    
098        private RepositorySession repositorySession;
099    
100        public String getId()
101        {
102            return id;
103        }
104    
105        public String getDescription()
106        {
107            return description;
108        }
109    
110        public boolean isPermanent()
111        {
112            return false;
113        }
114    
115        public List<String> getIncludes()
116        {
117            return includes;
118        }
119    
120        public List<String> getExcludes()
121        {
122            return Collections.emptyList();
123        }
124    
125        public void beginScan( ManagedRepository repo, Date whenGathered )
126            throws ConsumerException
127        {
128            repoId = repo.getId();
129            this.repositoryDir = new File( repo.getLocation() );
130            repositorySession = repositorySessionFactory.createSession();
131        }
132    
133        public void beginScan( ManagedRepository repo, Date whenGathered, boolean executeOnEntireRepo )
134            throws ConsumerException
135        {
136            beginScan( repo, whenGathered );
137        }
138    
139        public void processFile( String path )
140            throws ConsumerException
141        {
142            File artifactFile = new File( this.repositoryDir, path );
143    
144            // TODO: would be quicker to somehow make sure it ran after the update database consumer, or as a part of that
145            //  perhaps could use an artifact context that is retained for all consumers? First in can set the SHA-1
146            //  alternatively this could come straight from the storage resolver, which could populate the artifact metadata
147            //  in the later parse call with the desired checksum and use that
148            String checksumSha1;
149            ChecksummedFile checksummedFile = new ChecksummedFile( artifactFile );
150            try
151            {
152                checksumSha1 = checksummedFile.calculateChecksum( ChecksumAlgorithm.SHA1 );
153            }
154            catch ( IOException e )
155            {
156                throw new ConsumerException( e.getMessage(), e );
157            }
158    
159            MetadataRepository metadataRepository = repositorySession.getRepository();
160    
161            List<ArtifactMetadata> results;
162            try
163            {
164                results = metadataRepository.getArtifactsByChecksum( repoId, checksumSha1 );
165            }
166            catch ( MetadataRepositoryException e )
167            {
168                repositorySession.close();
169                throw new ConsumerException( e.getMessage(), e );
170            }
171    
172            if ( CollectionUtils.isNotEmpty( results ) )
173            {
174                ArtifactMetadata originalArtifact;
175                try
176                {
177                    originalArtifact = pathTranslator.getArtifactForPath( repoId, path );
178                }
179                catch ( Exception e )
180                {
181                    log.warn( "Not reporting problem for invalid artifact in checksum check: " + e.getMessage() );
182                    return;
183                }
184    
185                for ( ArtifactMetadata dupArtifact : results )
186                {
187                    String id = path.substring( path.lastIndexOf( '/' ) + 1 );
188                    if ( dupArtifact.getId().equals( id ) && dupArtifact.getNamespace().equals(
189                        originalArtifact.getNamespace() ) && dupArtifact.getProject().equals(
190                        originalArtifact.getProject() ) && dupArtifact.getVersion().equals(
191                        originalArtifact.getVersion() ) )
192                    {
193                        // Skip reference to itself.
194    
195                        log.debug( "Not counting duplicate for artifact {} for path {}", dupArtifact, path );
196    
197                        continue;
198                    }
199    
200                    RepositoryProblemFacet problem = new RepositoryProblemFacet();
201                    problem.setRepositoryId( repoId );
202                    problem.setNamespace( originalArtifact.getNamespace() );
203                    problem.setProject( originalArtifact.getProject() );
204                    problem.setVersion( originalArtifact.getVersion() );
205                    problem.setId( id );
206                    // FIXME: need to get the right storage resolver for the repository the dupe artifact is in, it might be
207                    //       a different type
208                    // FIXME: we need the project version here, not the artifact version
209                    problem.setMessage( "Duplicate Artifact Detected: " + path + " <--> " + pathTranslator.toPath(
210                        dupArtifact.getNamespace(), dupArtifact.getProject(), dupArtifact.getVersion(),
211                        dupArtifact.getId() ) );
212                    problem.setProblem( "duplicate-artifact" );
213    
214                    try
215                    {
216                        metadataRepository.addMetadataFacet( repoId, problem );
217                    }
218                    catch ( MetadataRepositoryException e )
219                    {
220                        throw new ConsumerException( e.getMessage(), e );
221                    }
222                }
223            }
224        }
225    
226        public void processFile( String path, boolean executeOnEntireRepo )
227            throws ConsumerException
228        {
229            processFile( path );
230        }
231    
232        public void completeScan()
233        {
234            repositorySession.close();
235        }
236    
237        public void completeScan( boolean executeOnEntireRepo )
238        {
239            completeScan();
240        }
241    
242        public void afterConfigurationChange( Registry registry, String propertyName, Object propertyValue )
243        {
244            if ( ConfigurationNames.isRepositoryScanning( propertyName ) )
245            {
246                initIncludes();
247            }
248        }
249    
250        public void beforeConfigurationChange( Registry registry, String propertyName, Object propertyValue )
251        {
252            /* do nothing */
253        }
254    
255        private void initIncludes()
256        {
257            includes.clear();
258    
259            includes.addAll( filetypes.getFileTypePatterns( FileTypes.ARTIFACTS ) );
260        }
261    
262        @PostConstruct
263        public void initialize()
264        {
265            initIncludes();
266            configuration.addChangeListener( this );
267        }
268    }