View Javadoc
1   package org.apache.maven.index.reader;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.Closeable;
23  import java.io.DataInput;
24  import java.io.DataInputStream;
25  import java.io.EOFException;
26  import java.io.IOException;
27  import java.io.InputStream;
28  import java.io.UTFDataFormatException;
29  import java.util.Date;
30  import java.util.HashMap;
31  import java.util.Iterator;
32  import java.util.Map;
33  import java.util.NoSuchElementException;
34  import java.util.zip.GZIPInputStream;
35  
36  /**
37   * Maven 2 Index published binary chunk reader, it reads raw Maven Indexer records from the transport binary format.
38   *
39   * @since 5.1.2
40   */
41  public class ChunkReader
42      implements Closeable, Iterable<Map<String, String>>
43  {
44      private final String chunkName;
45  
46      private final DataInputStream dataInputStream;
47  
48      private final int version;
49  
50      private final Date timestamp;
51  
52      public ChunkReader( final String chunkName, final InputStream inputStream )
53          throws IOException
54      {
55          this.chunkName = chunkName.trim();
56          this.dataInputStream = new DataInputStream( new GZIPInputStream( inputStream, 2 * 1024 ) );
57          this.version = ( (int) dataInputStream.readByte() ) & 0xff;
58          this.timestamp = new Date( dataInputStream.readLong() );
59      }
60  
61      /**
62       * Returns the chunk name.
63       */
64      public String getName()
65      {
66          return chunkName;
67      }
68  
69      /**
70       * Returns index version. All releases so far always returned {@code 1}.
71       */
72      public int getVersion()
73      {
74          return version;
75      }
76  
77      /**
78       * Returns the index timestamp of last update of the index.
79       */
80      public Date getTimestamp()
81      {
82          return timestamp;
83      }
84  
85      /**
86       * Returns the {@link Record} iterator.
87       */
88      public Iterator<Map<String, String>> iterator()
89      {
90          try
91          {
92              return new IndexIterator( dataInputStream );
93          }
94          catch ( IOException e )
95          {
96              throw new RuntimeException( "error", e );
97          }
98      }
99  
100     /**
101      * Closes this reader and it's underlying input.
102      */
103     public void close()
104         throws IOException
105     {
106         dataInputStream.close();
107     }
108 
109     /**
110      * Low memory footprint index iterator that incrementally parses the underlying stream.
111      */
112     private static class IndexIterator
113         implements Iterator<Map<String, String>>
114     {
115         private final DataInputStream dataInputStream;
116 
117         private Map<String, String> nextRecord;
118 
119         private IndexIterator( final DataInputStream dataInputStream )
120             throws IOException
121         {
122             this.dataInputStream = dataInputStream;
123             this.nextRecord = nextRecord();
124         }
125 
126         public boolean hasNext()
127         {
128             return nextRecord != null;
129         }
130 
131         public Map<String, String> next()
132         {
133             if ( nextRecord == null )
134             {
135                 throw new NoSuchElementException( "chunk depleted" );
136             }
137             Map<String, String> result = nextRecord;
138             nextRecord = nextRecord();
139             return result;
140         }
141 
142         public void remove()
143         {
144             throw new UnsupportedOperationException( "remove" );
145         }
146 
147         private Map<String, String> nextRecord()
148         {
149             try
150             {
151                 return readRecord( dataInputStream );
152             }
153             catch ( IOException e )
154             {
155                 throw new RuntimeException( "read error", e );
156             }
157         }
158     }
159 
160     /**
161      * Reads and returns next record from the underlying stream, or {@code null} if no more records.
162      */
163     private static Map<String, String> readRecord( final DataInput dataInput )
164         throws IOException
165     {
166         int fieldCount;
167         try
168         {
169             fieldCount = dataInput.readInt();
170         }
171         catch ( EOFException ex )
172         {
173             return null; // no more documents
174         }
175 
176         Map<String, String> recordMap = new HashMap<String, String>();
177         for ( int i = 0; i < fieldCount; i++ )
178         {
179             readField( recordMap, dataInput );
180         }
181         return recordMap;
182     }
183 
184     private static void readField( final Map<String, String> record, final DataInput dataInput )
185         throws IOException
186     {
187         dataInput.readByte(); // flags: neglect them
188         String name = dataInput.readUTF();
189         String value = readUTF( dataInput );
190         record.put( name, value );
191     }
192 
193     private static String readUTF( final DataInput dataInput )
194         throws IOException
195     {
196         int utflen = dataInput.readInt();
197 
198         byte[] bytearr;
199         char[] chararr;
200 
201         try
202         {
203             bytearr = new byte[utflen];
204             chararr = new char[utflen];
205         }
206         catch ( OutOfMemoryError e )
207         {
208             IOException ioex = new IOException( "Index data content is corrupt" );
209             ioex.initCause( e );
210             throw ioex;
211         }
212 
213         int c, char2, char3;
214         int count = 0;
215         int chararrCount = 0;
216 
217         dataInput.readFully( bytearr, 0, utflen );
218 
219         while ( count < utflen )
220         {
221             c = bytearr[count] & 0xff;
222             if ( c > 127 )
223             {
224                 break;
225             }
226             count++;
227             chararr[chararrCount++] = (char) c;
228         }
229 
230         while ( count < utflen )
231         {
232             c = bytearr[count] & 0xff;
233             switch ( c >> 4 )
234             {
235                 case 0:
236                 case 1:
237                 case 2:
238                 case 3:
239                 case 4:
240                 case 5:
241                 case 6:
242                 case 7:
243                     /* 0xxxxxxx */
244                     count++;
245                     chararr[chararrCount++] = (char) c;
246                     break;
247 
248                 case 12:
249                 case 13:
250                     /* 110x xxxx 10xx xxxx */
251                     count += 2;
252                     if ( count > utflen )
253                     {
254                         throw new UTFDataFormatException( "malformed input: partial character at end" );
255                     }
256                     char2 = bytearr[count - 1];
257                     if ( ( char2 & 0xC0 ) != 0x80 )
258                     {
259                         throw new UTFDataFormatException( "malformed input around byte " + count );
260                     }
261                     chararr[chararrCount++] = (char) ( ( ( c & 0x1F ) << 6 ) | ( char2 & 0x3F ) );
262                     break;
263 
264                 case 14:
265                     /* 1110 xxxx 10xx xxxx 10xx xxxx */
266                     count += 3;
267                     if ( count > utflen )
268                     {
269                         throw new UTFDataFormatException( "malformed input: partial character at end" );
270                     }
271                     char2 = bytearr[count - 2];
272                     char3 = bytearr[count - 1];
273                     if ( ( ( char2 & 0xC0 ) != 0x80 ) || ( ( char3 & 0xC0 ) != 0x80 ) )
274                     {
275                         throw new UTFDataFormatException( "malformed input around byte " + ( count - 1 ) );
276                     }
277                     chararr[chararrCount++] =
278                         (char) ( ( ( c & 0x0F ) << 12 ) | ( ( char2 & 0x3F ) << 6 ) | ( char3 & 0x3F ) );
279                     break;
280 
281                 default:
282                     /* 10xx xxxx, 1111 xxxx */
283                     throw new UTFDataFormatException( "malformed input around byte " + count );
284             }
285         }
286 
287         // The number of chars produced may be less than utflen
288         return new String( chararr, 0, chararrCount );
289     }
290 }