001 package org.apache.archiva.metadata.repository.stats; 002 003 /* 004 * Licensed to the Apache Software Foundation (ASF) under one 005 * or more contributor license agreements. See the NOTICE file 006 * distributed with this work for additional information 007 * regarding copyright ownership. The ASF licenses this file 008 * to you under the Apache License, Version 2.0 (the 009 * "License"); you may not use this file except in compliance 010 * with the License. You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, 015 * software distributed under the License is distributed on an 016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 * KIND, either express or implied. See the License for the 018 * specific language governing permissions and limitations 019 * under the License. 020 */ 021 022 import org.apache.archiva.metadata.model.ArtifactMetadata; 023 import org.apache.archiva.metadata.model.maven2.MavenArtifactFacet; 024 import org.apache.archiva.metadata.repository.MetadataRepository; 025 import org.apache.archiva.metadata.repository.MetadataRepositoryException; 026 import org.apache.archiva.metadata.repository.MetadataResolutionException; 027 import org.apache.commons.lang.time.StopWatch; 028 import org.apache.jackrabbit.commons.JcrUtils; 029 import org.slf4j.Logger; 030 import org.slf4j.LoggerFactory; 031 import org.springframework.stereotype.Service; 032 033 import java.text.ParseException; 034 import java.text.SimpleDateFormat; 035 import java.util.ArrayList; 036 import java.util.Collection; 037 import java.util.Collections; 038 import java.util.Date; 039 import java.util.HashMap; 040 import java.util.List; 041 import java.util.Map; 042 import java.util.TimeZone; 043 import javax.jcr.Node; 044 import javax.jcr.RepositoryException; 045 import javax.jcr.Session; 046 import javax.jcr.query.Query; 047 import javax.jcr.query.QueryManager; 048 import javax.jcr.query.QueryResult; 049 import javax.jcr.query.Row; 050 051 /** 052 * 053 */ 054 @Service("repositoryStatisticsManager#default") 055 public class DefaultRepositoryStatisticsManager 056 implements RepositoryStatisticsManager 057 { 058 private static final Logger log = LoggerFactory.getLogger( DefaultRepositoryStatisticsManager.class ); 059 060 private static final TimeZone UTC_TIME_ZONE = TimeZone.getTimeZone( "UTC" ); 061 062 public boolean hasStatistics( MetadataRepository metadataRepository, String repositoryId ) 063 throws MetadataRepositoryException 064 { 065 return metadataRepository.hasMetadataFacet( repositoryId, RepositoryStatistics.FACET_ID ); 066 } 067 068 public RepositoryStatistics getLastStatistics( MetadataRepository metadataRepository, String repositoryId ) 069 throws MetadataRepositoryException 070 { 071 StopWatch stopWatch = new StopWatch(); 072 stopWatch.start(); 073 // TODO: consider a more efficient implementation that directly gets the last one from the content repository 074 List<String> scans = metadataRepository.getMetadataFacets( repositoryId, RepositoryStatistics.FACET_ID ); 075 if ( scans == null ) 076 { 077 return null; 078 } 079 Collections.sort( scans ); 080 if ( !scans.isEmpty() ) 081 { 082 String name = scans.get( scans.size() - 1 ); 083 RepositoryStatistics repositoryStatistics = 084 (RepositoryStatistics) metadataRepository.getMetadataFacet( repositoryId, RepositoryStatistics.FACET_ID, 085 name ); 086 stopWatch.stop(); 087 log.debug( "time to find last RepositoryStatistics: {} ms", stopWatch.getTime() ); 088 return repositoryStatistics; 089 } 090 else 091 { 092 return null; 093 } 094 } 095 096 private void walkRepository( MetadataRepository metadataRepository, RepositoryStatistics stats, String repositoryId, 097 String ns ) 098 throws MetadataResolutionException 099 { 100 for ( String namespace : metadataRepository.getNamespaces( repositoryId, ns ) ) 101 { 102 walkRepository( metadataRepository, stats, repositoryId, ns + "." + namespace ); 103 } 104 105 Collection<String> projects = metadataRepository.getProjects( repositoryId, ns ); 106 if ( !projects.isEmpty() ) 107 { 108 stats.setTotalGroupCount( stats.getTotalGroupCount() + 1 ); 109 stats.setTotalProjectCount( stats.getTotalProjectCount() + projects.size() ); 110 111 for ( String project : projects ) 112 { 113 for ( String version : metadataRepository.getProjectVersions( repositoryId, ns, project ) ) 114 { 115 for ( ArtifactMetadata artifact : metadataRepository.getArtifacts( repositoryId, ns, project, 116 version ) ) 117 { 118 stats.setTotalArtifactCount( stats.getTotalArtifactCount() + 1 ); 119 stats.setTotalArtifactFileSize( stats.getTotalArtifactFileSize() + artifact.getSize() ); 120 121 MavenArtifactFacet facet = 122 (MavenArtifactFacet) artifact.getFacet( MavenArtifactFacet.FACET_ID ); 123 if ( facet != null ) 124 { 125 String type = facet.getType(); 126 stats.setTotalCountForType( type, stats.getTotalCountForType( type ) + 1 ); 127 } 128 } 129 } 130 } 131 } 132 } 133 134 public void addStatisticsAfterScan( MetadataRepository metadataRepository, String repositoryId, Date startTime, 135 Date endTime, long totalFiles, long newFiles ) 136 throws MetadataRepositoryException 137 { 138 RepositoryStatistics repositoryStatistics = new RepositoryStatistics(); 139 repositoryStatistics.setRepositoryId( repositoryId ); 140 repositoryStatistics.setScanStartTime( startTime ); 141 repositoryStatistics.setScanEndTime( endTime ); 142 repositoryStatistics.setTotalFileCount( totalFiles ); 143 repositoryStatistics.setNewFileCount( newFiles ); 144 145 // TODO 146 // In the future, instead of being tied to a scan we might want to record information in the fly based on 147 // events that are occurring. Even without these totals we could query much of the information on demand based 148 // on information from the metadata content repository. In the mean time, we lock information in at scan time. 149 // Note that if new types are later discoverable due to a code change or new plugin, historical stats will not 150 // be updated and the repository will need to be rescanned. 151 152 long startGather = System.currentTimeMillis(); 153 154 // FIXME what about other implementations ? 155 156 if ( metadataRepository.canObtainAccess( Session.class ) ) 157 { 158 // TODO: this is currently very raw and susceptible to changes in content structure. Should we instead 159 // depend directly on the plugin and interrogate the JCR repository's knowledge of the structure? 160 populateStatisticsFromJcr( (Session) metadataRepository.obtainAccess( Session.class ), repositoryId, 161 repositoryStatistics ); 162 } 163 else 164 { 165 // TODO: 166 // if the file repository is used more permanently, we may seek a more efficient mechanism - e.g. we could 167 // build an index, or store the aggregate information and update it on the fly. We can perhaps even walk 168 // but retrieve less information to speed it up. In the mean time, we walk the repository using the 169 // standard APIs 170 populateStatisticsFromRepositoryWalk( metadataRepository, repositoryId, repositoryStatistics ); 171 } 172 173 log.info( "Gathering statistics executed in {} ms", ( System.currentTimeMillis() - startGather ) ); 174 175 metadataRepository.addMetadataFacet( repositoryId, repositoryStatistics ); 176 } 177 178 private void populateStatisticsFromJcr( Session session, String repositoryId, 179 RepositoryStatistics repositoryStatistics ) 180 throws MetadataRepositoryException 181 { 182 // TODO: these may be best as running totals, maintained by observations on the properties in JCR 183 184 try 185 { 186 QueryManager queryManager = session.getWorkspace().getQueryManager(); 187 188 // TODO: JCR-SQL2 query will not complete on a large repo in Jackrabbit 2.2.0 - see JCR-2835 189 // Using the JCR-SQL2 variants gives 190 // "org.apache.lucene.search.BooleanQuery$TooManyClauses: maxClauseCount is set to 1024" 191 // String whereClause = "WHERE ISDESCENDANTNODE([/repositories/" + repositoryId + "/content])"; 192 // Query query = queryManager.createQuery( "SELECT size FROM [archiva:artifact] " + whereClause, 193 // Query.JCR_SQL2 ); 194 String whereClause = "WHERE jcr:path LIKE '/repositories/" + repositoryId + "/content/%'"; 195 Query query = queryManager.createQuery( "SELECT size FROM archiva:artifact " + whereClause, Query.SQL ); 196 197 QueryResult queryResult = query.execute(); 198 199 Map<String, Integer> totalByType = new HashMap<String, Integer>(); 200 long totalSize = 0, totalArtifacts = 0; 201 for ( Row row : JcrUtils.getRows( queryResult ) ) 202 { 203 Node n = row.getNode(); 204 totalSize += row.getValue( "size" ).getLong(); 205 206 String type; 207 if ( n.hasNode( MavenArtifactFacet.FACET_ID ) ) 208 { 209 Node facetNode = n.getNode( MavenArtifactFacet.FACET_ID ); 210 type = facetNode.getProperty( "type" ).getString(); 211 } 212 else 213 { 214 type = "Other"; 215 } 216 Integer prev = totalByType.get( type ); 217 totalByType.put( type, prev != null ? prev + 1 : 1 ); 218 219 totalArtifacts++; 220 } 221 222 repositoryStatistics.setTotalArtifactCount( totalArtifacts ); 223 repositoryStatistics.setTotalArtifactFileSize( totalSize ); 224 for ( Map.Entry<String, Integer> entry : totalByType.entrySet() ) 225 { 226 repositoryStatistics.setTotalCountForType( entry.getKey(), entry.getValue() ); 227 } 228 229 // The query ordering is a trick to ensure that the size is correct, otherwise due to lazy init it will be -1 230 // query = queryManager.createQuery( "SELECT * FROM [archiva:project] " + whereClause, Query.JCR_SQL2 ); 231 query = queryManager.createQuery( "SELECT * FROM archiva:project " + whereClause + " ORDER BY jcr:score", 232 Query.SQL ); 233 repositoryStatistics.setTotalProjectCount( query.execute().getRows().getSize() ); 234 235 // query = queryManager.createQuery( 236 // "SELECT * FROM [archiva:namespace] " + whereClause + " AND namespace IS NOT NULL", Query.JCR_SQL2 ); 237 query = queryManager.createQuery( 238 "SELECT * FROM archiva:namespace " + whereClause + " AND namespace IS NOT NULL ORDER BY jcr:score", 239 Query.SQL ); 240 repositoryStatistics.setTotalGroupCount( query.execute().getRows().getSize() ); 241 } 242 catch ( RepositoryException e ) 243 { 244 throw new MetadataRepositoryException( e.getMessage(), e ); 245 } 246 } 247 248 private void populateStatisticsFromRepositoryWalk( MetadataRepository metadataRepository, String repositoryId, 249 RepositoryStatistics repositoryStatistics ) 250 throws MetadataRepositoryException 251 { 252 try 253 { 254 for ( String ns : metadataRepository.getRootNamespaces( repositoryId ) ) 255 { 256 walkRepository( metadataRepository, repositoryStatistics, repositoryId, ns ); 257 } 258 } 259 catch ( MetadataResolutionException e ) 260 { 261 throw new MetadataRepositoryException( e.getMessage(), e ); 262 } 263 } 264 265 public void deleteStatistics( MetadataRepository metadataRepository, String repositoryId ) 266 throws MetadataRepositoryException 267 { 268 metadataRepository.removeMetadataFacets( repositoryId, RepositoryStatistics.FACET_ID ); 269 } 270 271 public List<RepositoryStatistics> getStatisticsInRange( MetadataRepository metadataRepository, String repositoryId, 272 Date startTime, Date endTime ) 273 throws MetadataRepositoryException 274 { 275 List<RepositoryStatistics> results = new ArrayList<RepositoryStatistics>(); 276 List<String> list = metadataRepository.getMetadataFacets( repositoryId, RepositoryStatistics.FACET_ID ); 277 Collections.sort( list, Collections.reverseOrder() ); 278 for ( String name : list ) 279 { 280 try 281 { 282 Date date = createNameFormat().parse( name ); 283 if ( ( startTime == null || !date.before( startTime ) ) && ( endTime == null || !date.after( 284 endTime ) ) ) 285 { 286 RepositoryStatistics stats = 287 (RepositoryStatistics) metadataRepository.getMetadataFacet( repositoryId, 288 RepositoryStatistics.FACET_ID, 289 name ); 290 results.add( stats ); 291 } 292 } 293 catch ( ParseException e ) 294 { 295 log.error( "Invalid scan result found in the metadata repository: " + e.getMessage() ); 296 // continue and ignore this one 297 } 298 } 299 return results; 300 } 301 302 private static SimpleDateFormat createNameFormat() 303 { 304 SimpleDateFormat fmt = new SimpleDateFormat( RepositoryStatistics.SCAN_TIMESTAMP_FORMAT ); 305 fmt.setTimeZone( UTC_TIME_ZONE ); 306 return fmt; 307 } 308 }