Coverage Report

 package org.apache.maven.index.updater;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0    
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 
 import java.io.BufferedInputStream;
 import java.io.DataInput;
 import java.io.DataInputStream;
 import java.io.EOFException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.UTFDataFormatException;
 import java.util.Date;
 import java.util.zip.GZIPInputStream;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Field.Index;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.maven.index.context.IndexUtils;
 import org.apache.maven.index.context.IndexingContext;
 
 /**
  * An index data reader used to parse transfer index format.
  * 
  * @author Eugene Kuleshov
  */
 public class IndexDataReader
 {
     private final DataInputStream dis;
 
     public IndexDataReader( InputStream is )
         throws IOException
     {
         BufferedInputStream bis = new BufferedInputStream( is, 1024 * 8 );
 
         // MINDEXER-13
         // LightweightHttpWagon may have performed automatic decompression
         // Handle it transparently
         bis.mark( 2 );
         InputStream data;
         if ( bis.read() == 0x1f && bis.read() == 0x8b ) // GZIPInputStream.GZIP_MAGIC
         {
             bis.reset();
             data = new GZIPInputStream( bis, 2 * 1024 );
         }
         else
         {
             bis.reset();
             data = bis;
         }
 
         this.dis = new DataInputStream( data );
     }
 
     public IndexDataReadResult readIndex( IndexWriter w, IndexingContext context )
         throws IOException
     {
         long timestamp = readHeader();
 
         Date date = null;
 
         if ( timestamp != -1 )
         {
             date = new Date( timestamp );
 
             IndexUtils.updateTimestamp( w.getDirectory(), date );
         }
 
         int n = 0;
 
         Document doc;
         while ( ( doc = readDocument() ) != null )
         {
             w.addDocument( IndexUtils.updateDocument( doc, context, false ) );
 
             n++;
         }
 
         w.commit();
         w.optimize();
 
         IndexDataReadResult result = new IndexDataReadResult();
         result.setDocumentCount( n );
         result.setTimestamp( date );
         return result;
     }
 
     public long readHeader()
         throws IOException
     {
         final byte HDRBYTE = (byte) ( ( IndexDataWriter.VERSION << 24 ) >> 24 );
 
         if ( HDRBYTE != dis.readByte() )
         {
             // data format version mismatch
             throw new IOException( "Provided input contains unexpected data (0x01 expected as 1st byte)!" );
         }
 
         return dis.readLong();
     }
 
     public Document readDocument()
         throws IOException
     {
         int fieldCount;
         try
         {
             fieldCount = dis.readInt();
         }
         catch ( EOFException ex )
         {
             return null; // no more documents
         }
 
         Document doc = new Document();
 
         for ( int i = 0; i < fieldCount; i++ )
         {
             doc.add( readField() );
         }
 
         return doc;
     }
 
     private Field readField()
         throws IOException
     {
         int flags = dis.read();
 
         Index index = Index.NO;
         if ( ( flags & IndexDataWriter.F_INDEXED ) > 0 )
         {
             boolean isTokenized = ( flags & IndexDataWriter.F_TOKENIZED ) > 0;
             index = isTokenized ? Index.ANALYZED : Index.NOT_ANALYZED;
         }
 
         Store store = Store.NO;
         if ( ( flags & IndexDataWriter.F_STORED ) > 0 )
         {
             store = Store.YES;
         }
 
         String name = dis.readUTF();
         String value = readUTF( dis );
 
         return new Field( name, value, store, index );
     }
 
     private static String readUTF( DataInput in )
         throws IOException
     {
         int utflen = in.readInt();
 
         byte[] bytearr;
         char[] chararr;
 
         try
         {
             bytearr = new byte[utflen];
             chararr = new char[utflen];
         }
         catch ( OutOfMemoryError e )
         {
             final IOException ex =
                 new IOException(
                     "Index data content is inappropriate (is junk?), leads to OutOfMemoryError! See MINDEXER-28 for more information!" );
             ex.initCause( e );
             throw ex;
         }
 
         int c, char2, char3;
         int count = 0;
         int chararr_count = 0;
 
         in.readFully( bytearr, 0, utflen );
 
         while ( count < utflen )
         {
             c = bytearr[count] & 0xff;
             if ( c > 127 )
             {
                 break;
             }
             count++;
             chararr[chararr_count++] = (char) c;
         }
 
         while ( count < utflen )
         {
             c = bytearr[count] & 0xff;
             switch ( c >> 4 )
             {
                 case 0:
                 case 1:
                 case 2:
                 case 3:
                 case 4:
                 case 5:
                 case 6:
                 case 7:
                     /* 0xxxxxxx */
                     count++;
                     chararr[chararr_count++] = (char) c;
                     break;
 
                 case 12:
                 case 13:
                     /* 110x xxxx 10xx xxxx */
                     count += 2;
                     if ( count > utflen )
                     {
                         throw new UTFDataFormatException( "malformed input: partial character at end" );
                     }
                     char2 = bytearr[count - 1];
                     if ( ( char2 & 0xC0 ) != 0x80 )
                     {
                         throw new UTFDataFormatException( "malformed input around byte " + count );
                     }
                     chararr[chararr_count++] = (char) ( ( ( c & 0x1F ) << 6 ) | ( char2 & 0x3F ) );
                     break;
 
                 case 14:
                     /* 1110 xxxx 10xx xxxx 10xx xxxx */
                     count += 3;
                     if ( count > utflen )
                     {
                         throw new UTFDataFormatException( "malformed input: partial character at end" );
                     }
                     char2 = bytearr[count - 2];
                     char3 = bytearr[count - 1];
                     if ( ( ( char2 & 0xC0 ) != 0x80 ) || ( ( char3 & 0xC0 ) != 0x80 ) )
                     {
                         throw new UTFDataFormatException( "malformed input around byte " + ( count - 1 ) );
                     }
                     chararr[chararr_count++] =
                         (char) ( ( ( c & 0x0F ) << 12 ) | ( ( char2 & 0x3F ) << 6 ) | ( ( char3 & 0x3F ) << 0 ) );
                     break;
 
                 default:
                     /* 10xx xxxx, 1111 xxxx */
                     throw new UTFDataFormatException( "malformed input around byte " + count );
             }
         }
 
         // The number of chars produced may be less than utflen
         return new String( chararr, 0, chararr_count );
     }
 
     /**
      * An index data read result holder
      */
     public static class IndexDataReadResult
     {
         private Date timestamp;
 
         private int documentCount;
 
         public void setDocumentCount( int documentCount )
         {
             this.documentCount = documentCount;
         }
 
         public int getDocumentCount()
         {
             return documentCount;
         }
 
         public void setTimestamp( Date timestamp )
         {
             this.timestamp = timestamp;
         }
 
         public Date getTimestamp()
         {
             return timestamp;
         }
 
     }
 
     /**
      * Reads index content by using a visitor. <br>
      * The visitor is called for each read documents after it has been populated with Lucene fields.
      * 
      * @param visitor an index data visitor
      * @param context indexing context
      * @return statistics about read data
      * @throws IOException in case of an IO exception during index file access
      */
     public IndexDataReadResult readIndex( final IndexDataReadVisitor visitor, final IndexingContext context )
         throws IOException
     {
         dis.readByte(); // data format version
 
         long timestamp = dis.readLong();
 
         Date date = null;
 
         if ( timestamp != -1 )
         {
             date = new Date( timestamp );
         }
 
         int n = 0;
 
         Document doc;
         while ( ( doc = readDocument() ) != null )
         {
             visitor.visitDocument( IndexUtils.updateDocument( doc, context, false ) );
 
             n++;
         }
 
         IndexDataReadResult result = new IndexDataReadResult();
         result.setDocumentCount( n );
         result.setTimestamp( date );
         return result;
     }
 
     /**
      * Visitor of indexed Lucene documents.
      */
     public static interface IndexDataReadVisitor
     {
 
         /**
          * Called on each read document. The document is already populated with fields.
          * 
          * @param document read document
          */
         void visitDocument( Document document );
 
     }
 
 }

1		package org.apache.maven.index.updater;
2
3		/*
4		* Licensed to the Apache Software Foundation (ASF) under one
5		* or more contributor license agreements. See the NOTICE file
6		* distributed with this work for additional information
7		* regarding copyright ownership. The ASF licenses this file
8		* to you under the Apache License, Version 2.0 (the
9		* "License"); you may not use this file except in compliance
10		* with the License. You may obtain a copy of the License at
11		*
12		* http://www.apache.org/licenses/LICENSE-2.0
13		*
14		* Unless required by applicable law or agreed to in writing,
15		* software distributed under the License is distributed on an
16		* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17		* KIND, either express or implied. See the License for the
18		* specific language governing permissions and limitations
19		* under the License.
20		*/
21
22		import java.io.BufferedInputStream;
23		import java.io.DataInput;
24		import java.io.DataInputStream;
25		import java.io.EOFException;
26		import java.io.IOException;
27		import java.io.InputStream;
28		import java.io.UTFDataFormatException;
29		import java.util.Date;
30		import java.util.zip.GZIPInputStream;
31
32		import org.apache.lucene.document.Document;
33		import org.apache.lucene.document.Field;
34		import org.apache.lucene.document.Field.Index;
35		import org.apache.lucene.document.Field.Store;
36		import org.apache.lucene.index.IndexWriter;
37		import org.apache.maven.index.context.IndexUtils;
38		import org.apache.maven.index.context.IndexingContext;
39
40		/**
41		* An index data reader used to parse transfer index format.
42		*
43		* @author Eugene Kuleshov
44		*/
45		public class IndexDataReader
46		{
47		private final DataInputStream dis;
48
49		public IndexDataReader( InputStream is )
50		throws IOException
51	36	{
52	36	BufferedInputStream bis = new BufferedInputStream( is, 1024 * 8 );
53
54		// MINDEXER-13
55		// LightweightHttpWagon may have performed automatic decompression
56		// Handle it transparently
57	36	bis.mark( 2 );
58		InputStream data;
59	36	if ( bis.read() == 0x1f && bis.read() == 0x8b ) // GZIPInputStream.GZIP_MAGIC
60		{
61	36	bis.reset();
62	36	data = new GZIPInputStream( bis, 2 * 1024 );
63		}
64		else
65		{
66	0	bis.reset();
67	0	data = bis;
68		}
69
70	36	this.dis = new DataInputStream( data );
71	36	}
72
73		public IndexDataReadResult readIndex( IndexWriter w, IndexingContext context )
74		throws IOException
75		{
76	35	long timestamp = readHeader();
77
78	35	Date date = null;
79
80	35	if ( timestamp != -1 )
81		{
82	34	date = new Date( timestamp );
83
84	34	IndexUtils.updateTimestamp( w.getDirectory(), date );
85		}
86
87	35	int n = 0;
88
89		Document doc;
90	759	while ( ( doc = readDocument() ) != null )
91		{
92	724	w.addDocument( IndexUtils.updateDocument( doc, context, false ) );
93
94	724	n++;
95		}
96
97	35	w.commit();
98	35	w.optimize();
99
100	35	IndexDataReadResult result = new IndexDataReadResult();
101	35	result.setDocumentCount( n );
102	35	result.setTimestamp( date );
103	35	return result;
104		}
105
106		public long readHeader()
107		throws IOException
108		{
109	36	final byte HDRBYTE = (byte) ( ( IndexDataWriter.VERSION << 24 ) >> 24 );
110
111	36	if ( HDRBYTE != dis.readByte() )
112		{
113		// data format version mismatch
114	0	throw new IOException( "Provided input contains unexpected data (0x01 expected as 1st byte)!" );
115		}
116
117	36	return dis.readLong();
118		}
119
120		public Document readDocument()
121		throws IOException
122		{
123		int fieldCount;
124		try
125		{
126	775	fieldCount = dis.readInt();
127		}
128	36	catch ( EOFException ex )
129		{
130	36	return null; // no more documents
131	739	}
132
133	739	Document doc = new Document();
134
135	4380	for ( int i = 0; i < fieldCount; i++ )
136		{
137	3641	doc.add( readField() );
138		}
139
140	739	return doc;
141		}
142
143		private Field readField()
144		throws IOException
145		{
146	3641	int flags = dis.read();
147
148	3641	Index index = Index.NO;
149	3641	if ( ( flags & IndexDataWriter.F_INDEXED ) > 0 )
150		{
151	2243	boolean isTokenized = ( flags & IndexDataWriter.F_TOKENIZED ) > 0;
152	2243	index = isTokenized ? Index.ANALYZED : Index.NOT_ANALYZED;
153		}
154
155	3641	Store store = Store.NO;
156	3641	if ( ( flags & IndexDataWriter.F_STORED ) > 0 )
157		{
158	3641	store = Store.YES;
159		}
160
161	3641	String name = dis.readUTF();
162	3641	String value = readUTF( dis );
163
164	3641	return new Field( name, value, store, index );
165		}
166
167		private static String readUTF( DataInput in )
168		throws IOException
169		{
170	3641	int utflen = in.readInt();
171
172		byte[] bytearr;
173		char[] chararr;
174
175		try
176		{
177	3641	bytearr = new byte[utflen];
178	3641	chararr = new char[utflen];
179		}
180	0	catch ( OutOfMemoryError e )
181		{
182	0	final IOException ex =
183		new IOException(
184		"Index data content is inappropriate (is junk?), leads to OutOfMemoryError! See MINDEXER-28 for more information!" );
185	0	ex.initCause( e );
186	0	throw ex;
187	3641	}
188
189		int c, char2, char3;
190	3641	int count = 0;
191	3641	int chararr_count = 0;
192
193	3641	in.readFully( bytearr, 0, utflen );
194
195	412952	while ( count < utflen )
196		{
197	409311	c = bytearr[count] & 0xff;
198	409311	if ( c > 127 )
199		{
200	0	break;
201		}
202	409311	count++;
203	409311	chararr[chararr_count++] = (char) c;
204		}
205
206	3641	while ( count < utflen )
207		{
208	0	c = bytearr[count] & 0xff;
209	0	switch ( c >> 4 )
210		{
211		case 0:
212		case 1:
213		case 2:
214		case 3:
215		case 4:
216		case 5:
217		case 6:
218		case 7:
219		/* 0xxxxxxx */
220	0	count++;
221	0	chararr[chararr_count++] = (char) c;
222	0	break;
223
224		case 12:
225		case 13:
226		/* 110x xxxx 10xx xxxx */
227	0	count += 2;
228	0	if ( count > utflen )
229		{
230	0	throw new UTFDataFormatException( "malformed input: partial character at end" );
231		}
232	0	char2 = bytearr[count - 1];
233	0	if ( ( char2 & 0xC0 ) != 0x80 )
234		{
235	0	throw new UTFDataFormatException( "malformed input around byte " + count );
236		}
237	0	chararr[chararr_count++] = (char) ( ( ( c & 0x1F ) << 6 ) \| ( char2 & 0x3F ) );
238	0	break;
239
240		case 14:
241		/* 1110 xxxx 10xx xxxx 10xx xxxx */
242	0	count += 3;
243	0	if ( count > utflen )
244		{
245	0	throw new UTFDataFormatException( "malformed input: partial character at end" );
246		}
247	0	char2 = bytearr[count - 2];
248	0	char3 = bytearr[count - 1];
249	0	if ( ( ( char2 & 0xC0 ) != 0x80 ) \|\| ( ( char3 & 0xC0 ) != 0x80 ) )
250		{
251	0	throw new UTFDataFormatException( "malformed input around byte " + ( count - 1 ) );
252		}
253	0	chararr[chararr_count++] =
254		(char) ( ( ( c & 0x0F ) << 12 ) \| ( ( char2 & 0x3F ) << 6 ) \| ( ( char3 & 0x3F ) << 0 ) );
255	0	break;
256
257		default:
258		/* 10xx xxxx, 1111 xxxx */
259	0	throw new UTFDataFormatException( "malformed input around byte " + count );
260		}
261		}
262
263		// The number of chars produced may be less than utflen
264	3641	return new String( chararr, 0, chararr_count );
265		}
266
267		/**
268		* An index data read result holder
269		*/
270	35	public static class IndexDataReadResult
271		{
272		private Date timestamp;
273
274		private int documentCount;
275
276		public void setDocumentCount( int documentCount )
277		{
278	35	this.documentCount = documentCount;
279	35	}
280
281		public int getDocumentCount()
282		{
283	0	return documentCount;
284		}
285
286		public void setTimestamp( Date timestamp )
287		{
288	35	this.timestamp = timestamp;
289	35	}
290
291		public Date getTimestamp()
292		{
293	35	return timestamp;
294		}
295
296		}
297
298		/**
299		* Reads index content by using a visitor. <br>
300		* The visitor is called for each read documents after it has been populated with Lucene fields.
301		*
302		* @param visitor an index data visitor
303		* @param context indexing context
304		* @return statistics about read data
305		* @throws IOException in case of an IO exception during index file access
306		*/
307		public IndexDataReadResult readIndex( final IndexDataReadVisitor visitor, final IndexingContext context )
308		throws IOException
309		{
310	0	dis.readByte(); // data format version
311
312	0	long timestamp = dis.readLong();
313
314	0	Date date = null;
315
316	0	if ( timestamp != -1 )
317		{
318	0	date = new Date( timestamp );
319		}
320
321	0	int n = 0;
322
323		Document doc;
324	0	while ( ( doc = readDocument() ) != null )
325		{
326	0	visitor.visitDocument( IndexUtils.updateDocument( doc, context, false ) );
327
328	0	n++;
329		}
330
331	0	IndexDataReadResult result = new IndexDataReadResult();
332	0	result.setDocumentCount( n );
333	0	result.setTimestamp( date );
334	0	return result;
335		}
336
337		/**
338		* Visitor of indexed Lucene documents.
339		*/
340		public static interface IndexDataReadVisitor
341		{
342
343		/**
344		* Called on each read document. The document is already populated with fields.
345		*
346		* @param document read document
347		*/
348		void visitDocument( Document document );
349
350		}
351
352		}