001    package org.apache.archiva.repository.scanner;
002    
003    /*
004     * Licensed to the Apache Software Foundation (ASF) under one
005     * or more contributor license agreements.  See the NOTICE file
006     * distributed with this work for additional information
007     * regarding copyright ownership.  The ASF licenses this file
008     * to you under the Apache License, Version 2.0 (the
009     * "License"); you may not use this file except in compliance
010     * with the License.  You may obtain a copy of the License at
011     *
012     *  http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing,
015     * software distributed under the License is distributed on an
016     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017     * KIND, either express or implied.  See the License for the
018     * specific language governing permissions and limitations
019     * under the License.
020     */
021    
022    import org.apache.archiva.admin.model.beans.ManagedRepository;
023    import org.apache.archiva.consumers.InvalidRepositoryContentConsumer;
024    import org.apache.archiva.consumers.KnownRepositoryContentConsumer;
025    
026    import java.util.List;
027    import java.util.Set;
028    
029    /**
030     * RepositoryScanner
031     *
032     *
033     */
034    public interface RepositoryScanner
035    {
036        /**
037         * The value to pass to {@link #scan(ManagedRepository, long)} to have the scan
038         * operate in a fresh fashion, with no check on changes based on timestamp.
039         */
040        long FRESH_SCAN = 0;
041    
042        /**
043         * <p>
044         * Typical Ignorable Content patterns.
045         * </p>
046         * <p/>
047         * <p><strong>
048         * NOTE: Do not use for normal webapp or task driven repository scanning.
049         * </strong></p>
050         * <p/>
051         * <p>
052         * These patterns are only valid for archiva-cli and archiva-converter use.
053         * </p>
054         */
055        static final String[] IGNORABLE_CONTENT =
056            { "bin/**", "reports/**", ".index", ".reports/**", ".maven/**", "**/.svn/**", "**/*snapshot-version",
057                "*/website/**", "*/licences/**", "**/.htaccess", "**/*.html", "**/*.txt", "**/README*", "**/CHANGELOG*",
058                "**/KEYS*", ".indexer" };
059    
060        /**
061         * Scan the repository for content changes.
062         * <p/>
063         * Internally, this will use the as-configured known and invalid consumer lists.
064         *
065         * @param repository   the repository to change.
066         * @param changesSince the timestamp to use as a threshold on what is considered new or changed.
067         *                     (To have all content be taken into consideration regardless of timestamp,
068         *                     use the {@link #FRESH_SCAN} constant)
069         * @return the statistics for this scan.
070         * @throws RepositoryScannerException if there was a fundamental problem with getting the discoverer started.
071         */
072        RepositoryScanStatistics scan( ManagedRepository repository, long changesSince )
073            throws RepositoryScannerException;
074    
075        /**
076         * Scan the repository for content changes.
077         * <p/>
078         * Internally, this will use the as-configured known and invalid consumer lists.
079         *
080         * @param repository              the repository to change.
081         * @param knownContentConsumers   the list of consumers that follow the {@link KnownRepositoryContentConsumer}
082         *                                interface that should be used for this scan.
083         * @param invalidContentConsumers the list of consumers that follow the {@link InvalidRepositoryContentConsumer}
084         *                                interface that should be used for this scan.
085         * @param ignoredContentPatterns  list of patterns that should be ignored and not sent to any consumer.
086         * @param changesSince            the timestamp to use as a threshold on what is considered new or changed.
087         *                                (To have all content be taken into consideration regardless of timestamp,
088         *                                use the {@link #FRESH_SCAN} constant)
089         * @return the statistics for this scan.
090         * @throws RepositoryScannerException if there was a fundamental problem with getting the discoverer started.
091         */
092        RepositoryScanStatistics scan( ManagedRepository repository,
093                                       List<KnownRepositoryContentConsumer> knownContentConsumers,
094                                       List<InvalidRepositoryContentConsumer> invalidContentConsumers,
095                                       List<String> ignoredContentPatterns, long changesSince )
096            throws RepositoryScannerException;
097    
098        Set<RepositoryScannerInstance> getInProgressScans();
099    }