Coverage Report - org.apache.maven.index.updater.IndexDataReader
 
Classes in this File Line Coverage Branch Coverage Complexity
IndexDataReader
57 %
59/103
35 %
20/56
4,333
IndexDataReader$IndexDataReadResult
85 %
6/7
N/A
4,333
IndexDataReader$IndexDataReadVisitor
N/A
N/A
4,333
 
 1  
 package org.apache.maven.index.updater;
 2  
 
 3  
 /*
 4  
  * Licensed to the Apache Software Foundation (ASF) under one
 5  
  * or more contributor license agreements.  See the NOTICE file
 6  
  * distributed with this work for additional information
 7  
  * regarding copyright ownership.  The ASF licenses this file
 8  
  * to you under the Apache License, Version 2.0 (the
 9  
  * "License"); you may not use this file except in compliance
 10  
  * with the License.  You may obtain a copy of the License at
 11  
  *
 12  
  *   http://www.apache.org/licenses/LICENSE-2.0    
 13  
  *
 14  
  * Unless required by applicable law or agreed to in writing,
 15  
  * software distributed under the License is distributed on an
 16  
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 17  
  * KIND, either express or implied.  See the License for the
 18  
  * specific language governing permissions and limitations
 19  
  * under the License.
 20  
  */
 21  
 
 22  
 import java.io.BufferedInputStream;
 23  
 import java.io.DataInput;
 24  
 import java.io.DataInputStream;
 25  
 import java.io.EOFException;
 26  
 import java.io.IOException;
 27  
 import java.io.InputStream;
 28  
 import java.io.UTFDataFormatException;
 29  
 import java.util.Date;
 30  
 import java.util.zip.GZIPInputStream;
 31  
 
 32  
 import org.apache.lucene.document.Document;
 33  
 import org.apache.lucene.document.Field;
 34  
 import org.apache.lucene.document.Field.Index;
 35  
 import org.apache.lucene.document.Field.Store;
 36  
 import org.apache.lucene.index.IndexWriter;
 37  
 import org.apache.maven.index.context.IndexUtils;
 38  
 import org.apache.maven.index.context.IndexingContext;
 39  
 
 40  
 /**
 41  
  * An index data reader used to parse transfer index format.
 42  
  * 
 43  
  * @author Eugene Kuleshov
 44  
  */
 45  
 public class IndexDataReader
 46  
 {
 47  
     private final DataInputStream dis;
 48  
 
 49  
     public IndexDataReader( InputStream is )
 50  
         throws IOException
 51  36
     {
 52  36
         BufferedInputStream bis = new BufferedInputStream( is, 1024 * 8 );
 53  
 
 54  
         // MINDEXER-13
 55  
         // LightweightHttpWagon may have performed automatic decompression
 56  
         // Handle it transparently
 57  36
         bis.mark( 2 );
 58  
         InputStream data;
 59  36
         if ( bis.read() == 0x1f && bis.read() == 0x8b ) // GZIPInputStream.GZIP_MAGIC
 60  
         {
 61  36
             bis.reset();
 62  36
             data = new GZIPInputStream( bis, 2 * 1024 );
 63  
         }
 64  
         else
 65  
         {
 66  0
             bis.reset();
 67  0
             data = bis;
 68  
         }
 69  
 
 70  36
         this.dis = new DataInputStream( data );
 71  36
     }
 72  
 
 73  
     public IndexDataReadResult readIndex( IndexWriter w, IndexingContext context )
 74  
         throws IOException
 75  
     {
 76  35
         long timestamp = readHeader();
 77  
 
 78  35
         Date date = null;
 79  
 
 80  35
         if ( timestamp != -1 )
 81  
         {
 82  34
             date = new Date( timestamp );
 83  
 
 84  34
             IndexUtils.updateTimestamp( w.getDirectory(), date );
 85  
         }
 86  
 
 87  35
         int n = 0;
 88  
 
 89  
         Document doc;
 90  759
         while ( ( doc = readDocument() ) != null )
 91  
         {
 92  724
             w.addDocument( IndexUtils.updateDocument( doc, context, false ) );
 93  
 
 94  724
             n++;
 95  
         }
 96  
 
 97  35
         w.commit();
 98  35
         w.optimize();
 99  
 
 100  35
         IndexDataReadResult result = new IndexDataReadResult();
 101  35
         result.setDocumentCount( n );
 102  35
         result.setTimestamp( date );
 103  35
         return result;
 104  
     }
 105  
 
 106  
     public long readHeader()
 107  
         throws IOException
 108  
     {
 109  36
         final byte HDRBYTE = (byte) ( ( IndexDataWriter.VERSION << 24 ) >> 24 );
 110  
 
 111  36
         if ( HDRBYTE != dis.readByte() )
 112  
         {
 113  
             // data format version mismatch
 114  0
             throw new IOException( "Provided input contains unexpected data (0x01 expected as 1st byte)!" );
 115  
         }
 116  
 
 117  36
         return dis.readLong();
 118  
     }
 119  
 
 120  
     public Document readDocument()
 121  
         throws IOException
 122  
     {
 123  
         int fieldCount;
 124  
         try
 125  
         {
 126  775
             fieldCount = dis.readInt();
 127  
         }
 128  36
         catch ( EOFException ex )
 129  
         {
 130  36
             return null; // no more documents
 131  739
         }
 132  
 
 133  739
         Document doc = new Document();
 134  
 
 135  4380
         for ( int i = 0; i < fieldCount; i++ )
 136  
         {
 137  3641
             doc.add( readField() );
 138  
         }
 139  
 
 140  739
         return doc;
 141  
     }
 142  
 
 143  
     private Field readField()
 144  
         throws IOException
 145  
     {
 146  3641
         int flags = dis.read();
 147  
 
 148  3641
         Index index = Index.NO;
 149  3641
         if ( ( flags & IndexDataWriter.F_INDEXED ) > 0 )
 150  
         {
 151  2243
             boolean isTokenized = ( flags & IndexDataWriter.F_TOKENIZED ) > 0;
 152  2243
             index = isTokenized ? Index.ANALYZED : Index.NOT_ANALYZED;
 153  
         }
 154  
 
 155  3641
         Store store = Store.NO;
 156  3641
         if ( ( flags & IndexDataWriter.F_STORED ) > 0 )
 157  
         {
 158  3641
             store = Store.YES;
 159  
         }
 160  
 
 161  3641
         String name = dis.readUTF();
 162  3641
         String value = readUTF( dis );
 163  
 
 164  3641
         return new Field( name, value, store, index );
 165  
     }
 166  
 
 167  
     private static String readUTF( DataInput in )
 168  
         throws IOException
 169  
     {
 170  3641
         int utflen = in.readInt();
 171  
 
 172  
         byte[] bytearr;
 173  
         char[] chararr;
 174  
 
 175  
         try
 176  
         {
 177  3641
             bytearr = new byte[utflen];
 178  3641
             chararr = new char[utflen];
 179  
         }
 180  0
         catch ( OutOfMemoryError e )
 181  
         {
 182  0
             final IOException ex =
 183  
                 new IOException(
 184  
                     "Index data content is inappropriate (is junk?), leads to OutOfMemoryError! See MINDEXER-28 for more information!" );
 185  0
             ex.initCause( e );
 186  0
             throw ex;
 187  3641
         }
 188  
 
 189  
         int c, char2, char3;
 190  3641
         int count = 0;
 191  3641
         int chararr_count = 0;
 192  
 
 193  3641
         in.readFully( bytearr, 0, utflen );
 194  
 
 195  412952
         while ( count < utflen )
 196  
         {
 197  409311
             c = bytearr[count] & 0xff;
 198  409311
             if ( c > 127 )
 199  
             {
 200  0
                 break;
 201  
             }
 202  409311
             count++;
 203  409311
             chararr[chararr_count++] = (char) c;
 204  
         }
 205  
 
 206  3641
         while ( count < utflen )
 207  
         {
 208  0
             c = bytearr[count] & 0xff;
 209  0
             switch ( c >> 4 )
 210  
             {
 211  
                 case 0:
 212  
                 case 1:
 213  
                 case 2:
 214  
                 case 3:
 215  
                 case 4:
 216  
                 case 5:
 217  
                 case 6:
 218  
                 case 7:
 219  
                     /* 0xxxxxxx */
 220  0
                     count++;
 221  0
                     chararr[chararr_count++] = (char) c;
 222  0
                     break;
 223  
 
 224  
                 case 12:
 225  
                 case 13:
 226  
                     /* 110x xxxx 10xx xxxx */
 227  0
                     count += 2;
 228  0
                     if ( count > utflen )
 229  
                     {
 230  0
                         throw new UTFDataFormatException( "malformed input: partial character at end" );
 231  
                     }
 232  0
                     char2 = bytearr[count - 1];
 233  0
                     if ( ( char2 & 0xC0 ) != 0x80 )
 234  
                     {
 235  0
                         throw new UTFDataFormatException( "malformed input around byte " + count );
 236  
                     }
 237  0
                     chararr[chararr_count++] = (char) ( ( ( c & 0x1F ) << 6 ) | ( char2 & 0x3F ) );
 238  0
                     break;
 239  
 
 240  
                 case 14:
 241  
                     /* 1110 xxxx 10xx xxxx 10xx xxxx */
 242  0
                     count += 3;
 243  0
                     if ( count > utflen )
 244  
                     {
 245  0
                         throw new UTFDataFormatException( "malformed input: partial character at end" );
 246  
                     }
 247  0
                     char2 = bytearr[count - 2];
 248  0
                     char3 = bytearr[count - 1];
 249  0
                     if ( ( ( char2 & 0xC0 ) != 0x80 ) || ( ( char3 & 0xC0 ) != 0x80 ) )
 250  
                     {
 251  0
                         throw new UTFDataFormatException( "malformed input around byte " + ( count - 1 ) );
 252  
                     }
 253  0
                     chararr[chararr_count++] =
 254  
                         (char) ( ( ( c & 0x0F ) << 12 ) | ( ( char2 & 0x3F ) << 6 ) | ( ( char3 & 0x3F ) << 0 ) );
 255  0
                     break;
 256  
 
 257  
                 default:
 258  
                     /* 10xx xxxx, 1111 xxxx */
 259  0
                     throw new UTFDataFormatException( "malformed input around byte " + count );
 260  
             }
 261  
         }
 262  
 
 263  
         // The number of chars produced may be less than utflen
 264  3641
         return new String( chararr, 0, chararr_count );
 265  
     }
 266  
 
 267  
     /**
 268  
      * An index data read result holder
 269  
      */
 270  35
     public static class IndexDataReadResult
 271  
     {
 272  
         private Date timestamp;
 273  
 
 274  
         private int documentCount;
 275  
 
 276  
         public void setDocumentCount( int documentCount )
 277  
         {
 278  35
             this.documentCount = documentCount;
 279  35
         }
 280  
 
 281  
         public int getDocumentCount()
 282  
         {
 283  0
             return documentCount;
 284  
         }
 285  
 
 286  
         public void setTimestamp( Date timestamp )
 287  
         {
 288  35
             this.timestamp = timestamp;
 289  35
         }
 290  
 
 291  
         public Date getTimestamp()
 292  
         {
 293  35
             return timestamp;
 294  
         }
 295  
 
 296  
     }
 297  
 
 298  
     /**
 299  
      * Reads index content by using a visitor. <br>
 300  
      * The visitor is called for each read documents after it has been populated with Lucene fields.
 301  
      * 
 302  
      * @param visitor an index data visitor
 303  
      * @param context indexing context
 304  
      * @return statistics about read data
 305  
      * @throws IOException in case of an IO exception during index file access
 306  
      */
 307  
     public IndexDataReadResult readIndex( final IndexDataReadVisitor visitor, final IndexingContext context )
 308  
         throws IOException
 309  
     {
 310  0
         dis.readByte(); // data format version
 311  
 
 312  0
         long timestamp = dis.readLong();
 313  
 
 314  0
         Date date = null;
 315  
 
 316  0
         if ( timestamp != -1 )
 317  
         {
 318  0
             date = new Date( timestamp );
 319  
         }
 320  
 
 321  0
         int n = 0;
 322  
 
 323  
         Document doc;
 324  0
         while ( ( doc = readDocument() ) != null )
 325  
         {
 326  0
             visitor.visitDocument( IndexUtils.updateDocument( doc, context, false ) );
 327  
 
 328  0
             n++;
 329  
         }
 330  
 
 331  0
         IndexDataReadResult result = new IndexDataReadResult();
 332  0
         result.setDocumentCount( n );
 333  0
         result.setTimestamp( date );
 334  0
         return result;
 335  
     }
 336  
 
 337  
     /**
 338  
      * Visitor of indexed Lucene documents.
 339  
      */
 340  
     public static interface IndexDataReadVisitor
 341  
     {
 342  
 
 343  
         /**
 344  
          * Called on each read document. The document is already populated with fields.
 345  
          * 
 346  
          * @param document read document
 347  
          */
 348  
         void visitDocument( Document document );
 349  
 
 350  
     }
 351  
 
 352  
 }