View Javadoc
1   package org.apache.maven.index.incremental;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0    
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.File;
23  import java.io.FilenameFilter;
24  import java.io.IOException;
25  import java.text.ParseException;
26  import java.text.SimpleDateFormat;
27  import java.util.ArrayList;
28  import java.util.Date;
29  import java.util.HashSet;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.Map.Entry;
33  import java.util.Properties;
34  import java.util.Set;
35  import java.util.TimeZone;
36  import java.util.TreeMap;
37  
38  import javax.inject.Named;
39  import javax.inject.Singleton;
40  
41  import org.apache.lucene.document.Document;
42  import org.apache.lucene.index.IndexReader;
43  import org.apache.lucene.index.MultiFields;
44  import org.apache.lucene.util.Bits;
45  import org.apache.maven.index.ArtifactInfo;
46  import org.apache.maven.index.context.IndexingContext;
47  import org.apache.maven.index.packer.IndexPackingRequest;
48  import org.apache.maven.index.updater.IndexUpdateRequest;
49  import org.codehaus.plexus.util.StringUtils;
50  import org.slf4j.Logger;
51  import org.slf4j.LoggerFactory;
52  
53  @Singleton
54  @Named
55  public class DefaultIncrementalHandler
56      implements IncrementalHandler
57  {
58  
59      private final Logger logger = LoggerFactory.getLogger( getClass() );
60  
61      protected Logger getLogger()
62      {
63          return logger;
64      }
65  
66      public List<Integer> getIncrementalUpdates( IndexPackingRequest request, Properties properties )
67          throws IOException
68      {
69          getLogger().debug( "Handling Incremental Updates" );
70  
71          if ( !validateProperties( properties ) )
72          {
73              getLogger().debug( "Invalid properties found, resetting them and doing no incremental packing." );
74              return null;
75          }
76  
77          // Get the list of document ids that have been added since the last time
78          // the index ran
79          List<Integer> chunk =
80              getIndexChunk( request, parse( properties.getProperty( IndexingContext.INDEX_TIMESTAMP ) ) );
81  
82          getLogger().debug( "Found " + chunk.size() + " differences to put in incremental index." );
83  
84          // if no documents, then we don't need to do anything, no changes
85          if ( chunk.size() > 0 )
86          {
87              updateProperties( properties, request );
88          }
89  
90          cleanUpIncrementalChunks( request, properties );
91  
92          return chunk;
93      }
94  
95      public List<String> loadRemoteIncrementalUpdates( IndexUpdateRequest request, Properties localProperties,
96                                                        Properties remoteProperties )
97          throws IOException
98      {
99          List<String> filenames = null;
100         // If we have local properties, will parse and see what we need to download
101         if ( canRetrieveAllChunks( localProperties, remoteProperties ) )
102         {
103             filenames = new ArrayList<>();
104 
105             int maxCounter = Integer.parseInt( remoteProperties.getProperty( IndexingContext.INDEX_CHUNK_COUNTER ) );
106             int currentCounter = Integer.parseInt( localProperties.getProperty( IndexingContext.INDEX_CHUNK_COUNTER ) );
107 
108             // Start with the next one
109             currentCounter++;
110 
111             while ( currentCounter <= maxCounter )
112             {
113                 filenames.add( IndexingContext.INDEX_FILE_PREFIX + "." + currentCounter++ + ".gz" );
114             }
115         }
116 
117         return filenames;
118     }
119 
120     private boolean validateProperties( Properties properties )
121     {
122         if ( properties == null || properties.isEmpty() )
123         {
124             return false;
125         }
126 
127         if ( properties.getProperty( IndexingContext.INDEX_TIMESTAMP ) == null )
128         {
129             return false;
130         }
131 
132         if ( parse( properties.getProperty( IndexingContext.INDEX_TIMESTAMP ) ) == null )
133         {
134             return false;
135         }
136 
137         initializeProperties( properties );
138 
139         return true;
140     }
141 
142     public void initializeProperties( Properties properties )
143     {
144         if ( properties.getProperty( IndexingContext.INDEX_CHAIN_ID ) == null )
145         {
146             properties.setProperty( IndexingContext.INDEX_CHAIN_ID, Long.toString( new Date().getTime() ) );
147             properties.remove( IndexingContext.INDEX_CHUNK_COUNTER );
148         }
149 
150         if ( properties.getProperty( IndexingContext.INDEX_CHUNK_COUNTER ) == null )
151         {
152             properties.setProperty( IndexingContext.INDEX_CHUNK_COUNTER, "0" );
153         }
154     }
155 
156     private List<Integer> getIndexChunk( IndexPackingRequest request, Date timestamp )
157         throws IOException
158     {
159         final List<Integer> chunk = new ArrayList<>();
160         final IndexReader r = request.getIndexReader();
161         Bits liveDocs = MultiFields.getLiveDocs( r );
162         for ( int i = 0; i < r.maxDoc(); i++ )
163         {
164             if ( liveDocs == null || liveDocs.get( i ) )
165             {
166                 Document d = r.document( i );
167 
168                 String lastModified = d.get( ArtifactInfo.LAST_MODIFIED );
169 
170                 if ( lastModified != null )
171                 {
172                     Date t = new Date( Long.parseLong( lastModified ) );
173 
174                     // Only add documents that were added after the last time we indexed
175                     if ( t.after( timestamp ) )
176                     {
177                         chunk.add( i );
178                     }
179                 }
180             }
181         }
182 
183         return chunk;
184     }
185 
186     private void updateProperties( Properties properties, IndexPackingRequest request )
187         throws IOException
188     {
189         Set<Object> keys = new HashSet<>( properties.keySet() );
190         Map<Integer, String> dataMap = new TreeMap<>();
191 
192         // First go through and retrieve all keys and their values
193         for ( Object key : keys )
194         {
195             String sKey = (String) key;
196 
197             if ( sKey.startsWith( IndexingContext.INDEX_CHUNK_PREFIX ) )
198             {
199                 Integer count = Integer.valueOf( sKey.substring( IndexingContext.INDEX_CHUNK_PREFIX.length() ) );
200                 String value = properties.getProperty( sKey );
201 
202                 dataMap.put( count, value );
203                 properties.remove( key );
204             }
205         }
206 
207         String val = properties.getProperty( IndexingContext.INDEX_CHUNK_COUNTER );
208 
209         int i = 0;
210         // Next put the items back in w/ proper keys
211         for ( Entry<Integer, String> entry : dataMap.entrySet() )
212         {
213             // make sure to end if we reach limit, 0 based
214             if ( i >= ( request.getMaxIndexChunks() - 1 ) )
215             {
216                 break;
217             }
218 
219             properties.put( IndexingContext.INDEX_CHUNK_PREFIX + ( entry.getKey() + 1 ), entry.getValue() );
220 
221             i++;
222         }
223 
224         int nextValue = Integer.parseInt( val ) + 1;
225 
226         // Now put the new one in, and update the counter
227         properties.put( IndexingContext.INDEX_CHUNK_PREFIX + "0", Integer.toString( nextValue ) );
228         properties.put( IndexingContext.INDEX_CHUNK_COUNTER, Integer.toString( nextValue ) );
229     }
230 
231     private void cleanUpIncrementalChunks( IndexPackingRequest request, Properties properties )
232         throws IOException
233     {
234         File[] files = request.getTargetDir().listFiles( new FilenameFilter()
235         {
236             public boolean accept( File dir, String name )
237             {
238                 String[] parts = name.split( "\\." );
239 
240                 if ( parts.length == 3 && parts[0].equals( IndexingContext.INDEX_FILE_PREFIX ) && parts[2].equals(
241                     "gz" ) )
242                 {
243                     return true;
244                 }
245 
246                 return false;
247             }
248         } );
249 
250         for ( int i = 0; i < files.length; i++ )
251         {
252             String[] parts = files[i].getName().split( "\\." );
253 
254             boolean found = false;
255             for ( Entry<Object, Object> entry : properties.entrySet() )
256             {
257                 if ( entry.getKey().toString().startsWith( IndexingContext.INDEX_CHUNK_PREFIX )
258                     && entry.getValue().equals( parts[1] ) )
259                 {
260                     found = true;
261                     break;
262                 }
263             }
264 
265             if ( !found )
266             {
267                 files[i].delete();
268             }
269         }
270     }
271 
272     private Date parse( String s )
273     {
274         try
275         {
276             SimpleDateFormat df = new SimpleDateFormat( IndexingContext.INDEX_TIME_FORMAT );
277             df.setTimeZone( TimeZone.getTimeZone( "GMT" ) );
278             return df.parse( s );
279         }
280         catch ( ParseException e )
281         {
282             return null;
283         }
284     }
285 
286     private boolean canRetrieveAllChunks( Properties localProps, Properties remoteProps )
287     {
288         // no localprops, can't retrieve chunks
289         if ( localProps == null )
290         {
291             return false;
292         }
293 
294         String localChainId = localProps.getProperty( IndexingContext.INDEX_CHAIN_ID );
295         String remoteChainId = remoteProps.getProperty( IndexingContext.INDEX_CHAIN_ID );
296 
297         // If no chain id, or not the same, do whole download
298         if ( StringUtils.isEmpty( localChainId ) || !localChainId.equals( remoteChainId ) )
299         {
300             return false;
301         }
302 
303         String counterProp = localProps.getProperty( IndexingContext.INDEX_CHUNK_COUNTER );
304 
305         // no counter, cant retrieve chunks
306         // not a number, cant retrieve chunks
307         if ( StringUtils.isEmpty( counterProp ) || !StringUtils.isNumeric( counterProp ) )
308         {
309             return false;
310         }
311 
312         int currentLocalCounter = Integer.parseInt( counterProp );
313 
314         // check remote props for existence of next chunk after local
315         // if we find it, then we are ok to retrieve the rest of the chunks
316         for ( Object key : remoteProps.keySet() )
317         {
318             String sKey = (String) key;
319 
320             if ( sKey.startsWith( IndexingContext.INDEX_CHUNK_PREFIX ) )
321             {
322                 String value = remoteProps.getProperty( sKey );
323 
324                 // If we have the current counter, or the next counter, we are good to go
325                 if ( Integer.toString( currentLocalCounter ).equals( value ) || Integer.toString(
326                     currentLocalCounter + 1 ).equals( value ) )
327                 {
328                     return true;
329                 }
330             }
331         }
332 
333         return false;
334     }
335 }