001    package org.apache.archiva.metadata.repository.stats;
002    
003    /*
004     * Licensed to the Apache Software Foundation (ASF) under one
005     * or more contributor license agreements.  See the NOTICE file
006     * distributed with this work for additional information
007     * regarding copyright ownership.  The ASF licenses this file
008     * to you under the Apache License, Version 2.0 (the
009     * "License"); you may not use this file except in compliance
010     * with the License.  You may obtain a copy of the License at
011     *
012     *   http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing,
015     * software distributed under the License is distributed on an
016     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017     * KIND, either express or implied.  See the License for the
018     * specific language governing permissions and limitations
019     * under the License.
020     */
021    
022    import org.apache.archiva.metadata.model.ArtifactMetadata;
023    import org.apache.archiva.metadata.model.maven2.MavenArtifactFacet;
024    import org.apache.archiva.metadata.repository.MetadataRepository;
025    import org.apache.archiva.metadata.repository.MetadataRepositoryException;
026    import org.apache.archiva.metadata.repository.MetadataResolutionException;
027    import org.apache.commons.lang.time.StopWatch;
028    import org.apache.jackrabbit.commons.JcrUtils;
029    import org.slf4j.Logger;
030    import org.slf4j.LoggerFactory;
031    import org.springframework.stereotype.Service;
032    
033    import java.text.ParseException;
034    import java.text.SimpleDateFormat;
035    import java.util.ArrayList;
036    import java.util.Collection;
037    import java.util.Collections;
038    import java.util.Date;
039    import java.util.HashMap;
040    import java.util.List;
041    import java.util.Map;
042    import java.util.TimeZone;
043    import javax.jcr.Node;
044    import javax.jcr.RepositoryException;
045    import javax.jcr.Session;
046    import javax.jcr.query.Query;
047    import javax.jcr.query.QueryManager;
048    import javax.jcr.query.QueryResult;
049    import javax.jcr.query.Row;
050    
051    /**
052     *
053     */
054    @Service("repositoryStatisticsManager#default")
055    public class DefaultRepositoryStatisticsManager
056        implements RepositoryStatisticsManager
057    {
058        private static final Logger log = LoggerFactory.getLogger( DefaultRepositoryStatisticsManager.class );
059    
060        private static final TimeZone UTC_TIME_ZONE = TimeZone.getTimeZone( "UTC" );
061    
062        public boolean hasStatistics( MetadataRepository metadataRepository, String repositoryId )
063            throws MetadataRepositoryException
064        {
065            return metadataRepository.hasMetadataFacet( repositoryId, RepositoryStatistics.FACET_ID );
066        }
067    
068        public RepositoryStatistics getLastStatistics( MetadataRepository metadataRepository, String repositoryId )
069            throws MetadataRepositoryException
070        {
071            StopWatch stopWatch = new StopWatch();
072            stopWatch.start();
073            // TODO: consider a more efficient implementation that directly gets the last one from the content repository
074            List<String> scans = metadataRepository.getMetadataFacets( repositoryId, RepositoryStatistics.FACET_ID );
075            if ( scans == null )
076            {
077                return null;
078            }
079            Collections.sort( scans );
080            if ( !scans.isEmpty() )
081            {
082                String name = scans.get( scans.size() - 1 );
083                RepositoryStatistics repositoryStatistics =
084                    (RepositoryStatistics) metadataRepository.getMetadataFacet( repositoryId, RepositoryStatistics.FACET_ID,
085                                                                                name );
086                stopWatch.stop();
087                log.debug( "time to find last RepositoryStatistics: {} ms", stopWatch.getTime() );
088                return repositoryStatistics;
089            }
090            else
091            {
092                return null;
093            }
094        }
095    
096        private void walkRepository( MetadataRepository metadataRepository, RepositoryStatistics stats, String repositoryId,
097                                     String ns )
098            throws MetadataResolutionException
099        {
100            for ( String namespace : metadataRepository.getNamespaces( repositoryId, ns ) )
101            {
102                walkRepository( metadataRepository, stats, repositoryId, ns + "." + namespace );
103            }
104    
105            Collection<String> projects = metadataRepository.getProjects( repositoryId, ns );
106            if ( !projects.isEmpty() )
107            {
108                stats.setTotalGroupCount( stats.getTotalGroupCount() + 1 );
109                stats.setTotalProjectCount( stats.getTotalProjectCount() + projects.size() );
110    
111                for ( String project : projects )
112                {
113                    for ( String version : metadataRepository.getProjectVersions( repositoryId, ns, project ) )
114                    {
115                        for ( ArtifactMetadata artifact : metadataRepository.getArtifacts( repositoryId, ns, project,
116                                                                                           version ) )
117                        {
118                            stats.setTotalArtifactCount( stats.getTotalArtifactCount() + 1 );
119                            stats.setTotalArtifactFileSize( stats.getTotalArtifactFileSize() + artifact.getSize() );
120    
121                            MavenArtifactFacet facet =
122                                (MavenArtifactFacet) artifact.getFacet( MavenArtifactFacet.FACET_ID );
123                            if ( facet != null )
124                            {
125                                String type = facet.getType();
126                                stats.setTotalCountForType( type, stats.getTotalCountForType( type ) + 1 );
127                            }
128                        }
129                    }
130                }
131            }
132        }
133    
134        public void addStatisticsAfterScan( MetadataRepository metadataRepository, String repositoryId, Date startTime,
135                                            Date endTime, long totalFiles, long newFiles )
136            throws MetadataRepositoryException
137        {
138            RepositoryStatistics repositoryStatistics = new RepositoryStatistics();
139            repositoryStatistics.setRepositoryId( repositoryId );
140            repositoryStatistics.setScanStartTime( startTime );
141            repositoryStatistics.setScanEndTime( endTime );
142            repositoryStatistics.setTotalFileCount( totalFiles );
143            repositoryStatistics.setNewFileCount( newFiles );
144    
145            // TODO
146            // In the future, instead of being tied to a scan we might want to record information in the fly based on
147            // events that are occurring. Even without these totals we could query much of the information on demand based
148            // on information from the metadata content repository. In the mean time, we lock information in at scan time.
149            // Note that if new types are later discoverable due to a code change or new plugin, historical stats will not
150            // be updated and the repository will need to be rescanned.
151    
152            long startGather = System.currentTimeMillis();
153    
154            // FIXME what about other implementations ?
155    
156            if ( metadataRepository.canObtainAccess( Session.class ) )
157            {
158                // TODO: this is currently very raw and susceptible to changes in content structure. Should we instead
159                //   depend directly on the plugin and interrogate the JCR repository's knowledge of the structure?
160                populateStatisticsFromJcr( (Session) metadataRepository.obtainAccess( Session.class ), repositoryId,
161                                           repositoryStatistics );
162            }
163            else
164            {
165                // TODO:
166                //   if the file repository is used more permanently, we may seek a more efficient mechanism - e.g. we could
167                //   build an index, or store the aggregate information and update it on the fly. We can perhaps even walk
168                //   but retrieve less information to speed it up. In the mean time, we walk the repository using the
169                //   standard APIs
170                populateStatisticsFromRepositoryWalk( metadataRepository, repositoryId, repositoryStatistics );
171            }
172    
173            log.info( "Gathering statistics executed in {} ms",  ( System.currentTimeMillis() - startGather ) );
174    
175            metadataRepository.addMetadataFacet( repositoryId, repositoryStatistics );
176        }
177    
178        private void populateStatisticsFromJcr( Session session, String repositoryId,
179                                                RepositoryStatistics repositoryStatistics )
180            throws MetadataRepositoryException
181        {
182            // TODO: these may be best as running totals, maintained by observations on the properties in JCR
183    
184            try
185            {
186                QueryManager queryManager = session.getWorkspace().getQueryManager();
187    
188                // TODO: JCR-SQL2 query will not complete on a large repo in Jackrabbit 2.2.0 - see JCR-2835
189                //    Using the JCR-SQL2 variants gives
190                //      "org.apache.lucene.search.BooleanQuery$TooManyClauses: maxClauseCount is set to 1024"
191    //            String whereClause = "WHERE ISDESCENDANTNODE([/repositories/" + repositoryId + "/content])";
192    //            Query query = queryManager.createQuery( "SELECT size FROM [archiva:artifact] " + whereClause,
193    //                                                    Query.JCR_SQL2 );
194                String whereClause = "WHERE jcr:path LIKE '/repositories/" + repositoryId + "/content/%'";
195                Query query = queryManager.createQuery( "SELECT size FROM archiva:artifact " + whereClause, Query.SQL );
196    
197                QueryResult queryResult = query.execute();
198    
199                Map<String, Integer> totalByType = new HashMap<String, Integer>();
200                long totalSize = 0, totalArtifacts = 0;
201                for ( Row row : JcrUtils.getRows( queryResult ) )
202                {
203                    Node n = row.getNode();
204                    totalSize += row.getValue( "size" ).getLong();
205    
206                    String type;
207                    if ( n.hasNode( MavenArtifactFacet.FACET_ID ) )
208                    {
209                        Node facetNode = n.getNode( MavenArtifactFacet.FACET_ID );
210                        type = facetNode.getProperty( "type" ).getString();
211                    }
212                    else
213                    {
214                        type = "Other";
215                    }
216                    Integer prev = totalByType.get( type );
217                    totalByType.put( type, prev != null ? prev + 1 : 1 );
218    
219                    totalArtifacts++;
220                }
221    
222                repositoryStatistics.setTotalArtifactCount( totalArtifacts );
223                repositoryStatistics.setTotalArtifactFileSize( totalSize );
224                for ( Map.Entry<String, Integer> entry : totalByType.entrySet() )
225                {
226                    repositoryStatistics.setTotalCountForType( entry.getKey(), entry.getValue() );
227                }
228    
229                // The query ordering is a trick to ensure that the size is correct, otherwise due to lazy init it will be -1
230    //            query = queryManager.createQuery( "SELECT * FROM [archiva:project] " + whereClause, Query.JCR_SQL2 );
231                query = queryManager.createQuery( "SELECT * FROM archiva:project " + whereClause + " ORDER BY jcr:score",
232                                                  Query.SQL );
233                repositoryStatistics.setTotalProjectCount( query.execute().getRows().getSize() );
234    
235    //            query = queryManager.createQuery(
236    //                "SELECT * FROM [archiva:namespace] " + whereClause + " AND namespace IS NOT NULL", Query.JCR_SQL2 );
237                query = queryManager.createQuery(
238                    "SELECT * FROM archiva:namespace " + whereClause + " AND namespace IS NOT NULL ORDER BY jcr:score",
239                    Query.SQL );
240                repositoryStatistics.setTotalGroupCount( query.execute().getRows().getSize() );
241            }
242            catch ( RepositoryException e )
243            {
244                throw new MetadataRepositoryException( e.getMessage(), e );
245            }
246        }
247    
248        private void populateStatisticsFromRepositoryWalk( MetadataRepository metadataRepository, String repositoryId,
249                                                           RepositoryStatistics repositoryStatistics )
250            throws MetadataRepositoryException
251        {
252            try
253            {
254                for ( String ns : metadataRepository.getRootNamespaces( repositoryId ) )
255                {
256                    walkRepository( metadataRepository, repositoryStatistics, repositoryId, ns );
257                }
258            }
259            catch ( MetadataResolutionException e )
260            {
261                throw new MetadataRepositoryException( e.getMessage(), e );
262            }
263        }
264    
265        public void deleteStatistics( MetadataRepository metadataRepository, String repositoryId )
266            throws MetadataRepositoryException
267        {
268            metadataRepository.removeMetadataFacets( repositoryId, RepositoryStatistics.FACET_ID );
269        }
270    
271        public List<RepositoryStatistics> getStatisticsInRange( MetadataRepository metadataRepository, String repositoryId,
272                                                                Date startTime, Date endTime )
273            throws MetadataRepositoryException
274        {
275            List<RepositoryStatistics> results = new ArrayList<RepositoryStatistics>();
276            List<String> list = metadataRepository.getMetadataFacets( repositoryId, RepositoryStatistics.FACET_ID );
277            Collections.sort( list, Collections.reverseOrder() );
278            for ( String name : list )
279            {
280                try
281                {
282                    Date date = createNameFormat().parse( name );
283                    if ( ( startTime == null || !date.before( startTime ) ) && ( endTime == null || !date.after(
284                        endTime ) ) )
285                    {
286                        RepositoryStatistics stats =
287                            (RepositoryStatistics) metadataRepository.getMetadataFacet( repositoryId,
288                                                                                        RepositoryStatistics.FACET_ID,
289                                                                                        name );
290                        results.add( stats );
291                    }
292                }
293                catch ( ParseException e )
294                {
295                    log.error( "Invalid scan result found in the metadata repository: " + e.getMessage() );
296                    // continue and ignore this one
297                }
298            }
299            return results;
300        }
301    
302        private static SimpleDateFormat createNameFormat()
303        {
304            SimpleDateFormat fmt = new SimpleDateFormat( RepositoryStatistics.SCAN_TIMESTAMP_FORMAT );
305            fmt.setTimeZone( UTC_TIME_ZONE );
306            return fmt;
307        }
308    }