1 | |
package org.apache.maven.index.updater; |
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
|
20 | |
|
21 | |
|
22 | |
import java.io.BufferedInputStream; |
23 | |
import java.io.DataInput; |
24 | |
import java.io.DataInputStream; |
25 | |
import java.io.EOFException; |
26 | |
import java.io.IOException; |
27 | |
import java.io.InputStream; |
28 | |
import java.io.UTFDataFormatException; |
29 | |
import java.util.Date; |
30 | |
import java.util.zip.GZIPInputStream; |
31 | |
|
32 | |
import org.apache.lucene.document.Document; |
33 | |
import org.apache.lucene.document.Field; |
34 | |
import org.apache.lucene.document.Field.Index; |
35 | |
import org.apache.lucene.document.Field.Store; |
36 | |
import org.apache.lucene.index.IndexWriter; |
37 | |
import org.apache.maven.index.context.IndexUtils; |
38 | |
import org.apache.maven.index.context.IndexingContext; |
39 | |
|
40 | |
|
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
public class IndexDataReader |
46 | |
{ |
47 | |
private final DataInputStream dis; |
48 | |
|
49 | |
public IndexDataReader( InputStream is ) |
50 | |
throws IOException |
51 | 36 | { |
52 | 36 | BufferedInputStream bis = new BufferedInputStream( is, 1024 * 8 ); |
53 | |
|
54 | |
|
55 | |
|
56 | |
|
57 | 36 | bis.mark( 2 ); |
58 | |
InputStream data; |
59 | 36 | if ( bis.read() == 0x1f && bis.read() == 0x8b ) |
60 | |
{ |
61 | 36 | bis.reset(); |
62 | 36 | data = new GZIPInputStream( bis, 2 * 1024 ); |
63 | |
} |
64 | |
else |
65 | |
{ |
66 | 0 | bis.reset(); |
67 | 0 | data = bis; |
68 | |
} |
69 | |
|
70 | 36 | this.dis = new DataInputStream( data ); |
71 | 36 | } |
72 | |
|
73 | |
public IndexDataReadResult readIndex( IndexWriter w, IndexingContext context ) |
74 | |
throws IOException |
75 | |
{ |
76 | 35 | long timestamp = readHeader(); |
77 | |
|
78 | 35 | Date date = null; |
79 | |
|
80 | 35 | if ( timestamp != -1 ) |
81 | |
{ |
82 | 34 | date = new Date( timestamp ); |
83 | |
|
84 | 34 | IndexUtils.updateTimestamp( w.getDirectory(), date ); |
85 | |
} |
86 | |
|
87 | 35 | int n = 0; |
88 | |
|
89 | |
Document doc; |
90 | 759 | while ( ( doc = readDocument() ) != null ) |
91 | |
{ |
92 | 724 | w.addDocument( IndexUtils.updateDocument( doc, context, false ) ); |
93 | |
|
94 | 724 | n++; |
95 | |
} |
96 | |
|
97 | 35 | w.commit(); |
98 | 35 | w.optimize(); |
99 | |
|
100 | 35 | IndexDataReadResult result = new IndexDataReadResult(); |
101 | 35 | result.setDocumentCount( n ); |
102 | 35 | result.setTimestamp( date ); |
103 | 35 | return result; |
104 | |
} |
105 | |
|
106 | |
public long readHeader() |
107 | |
throws IOException |
108 | |
{ |
109 | 36 | final byte HDRBYTE = (byte) ( ( IndexDataWriter.VERSION << 24 ) >> 24 ); |
110 | |
|
111 | 36 | if ( HDRBYTE != dis.readByte() ) |
112 | |
{ |
113 | |
|
114 | 0 | throw new IOException( "Provided input contains unexpected data (0x01 expected as 1st byte)!" ); |
115 | |
} |
116 | |
|
117 | 36 | return dis.readLong(); |
118 | |
} |
119 | |
|
120 | |
public Document readDocument() |
121 | |
throws IOException |
122 | |
{ |
123 | |
int fieldCount; |
124 | |
try |
125 | |
{ |
126 | 775 | fieldCount = dis.readInt(); |
127 | |
} |
128 | 36 | catch ( EOFException ex ) |
129 | |
{ |
130 | 36 | return null; |
131 | 739 | } |
132 | |
|
133 | 739 | Document doc = new Document(); |
134 | |
|
135 | 4380 | for ( int i = 0; i < fieldCount; i++ ) |
136 | |
{ |
137 | 3641 | doc.add( readField() ); |
138 | |
} |
139 | |
|
140 | 739 | return doc; |
141 | |
} |
142 | |
|
143 | |
private Field readField() |
144 | |
throws IOException |
145 | |
{ |
146 | 3641 | int flags = dis.read(); |
147 | |
|
148 | 3641 | Index index = Index.NO; |
149 | 3641 | if ( ( flags & IndexDataWriter.F_INDEXED ) > 0 ) |
150 | |
{ |
151 | 2243 | boolean isTokenized = ( flags & IndexDataWriter.F_TOKENIZED ) > 0; |
152 | 2243 | index = isTokenized ? Index.ANALYZED : Index.NOT_ANALYZED; |
153 | |
} |
154 | |
|
155 | 3641 | Store store = Store.NO; |
156 | 3641 | if ( ( flags & IndexDataWriter.F_STORED ) > 0 ) |
157 | |
{ |
158 | 3641 | store = Store.YES; |
159 | |
} |
160 | |
|
161 | 3641 | String name = dis.readUTF(); |
162 | 3641 | String value = readUTF( dis ); |
163 | |
|
164 | 3641 | return new Field( name, value, store, index ); |
165 | |
} |
166 | |
|
167 | |
private static String readUTF( DataInput in ) |
168 | |
throws IOException |
169 | |
{ |
170 | 3641 | int utflen = in.readInt(); |
171 | |
|
172 | |
byte[] bytearr; |
173 | |
char[] chararr; |
174 | |
|
175 | |
try |
176 | |
{ |
177 | 3641 | bytearr = new byte[utflen]; |
178 | 3641 | chararr = new char[utflen]; |
179 | |
} |
180 | 0 | catch ( OutOfMemoryError e ) |
181 | |
{ |
182 | 0 | final IOException ex = |
183 | |
new IOException( |
184 | |
"Index data content is inappropriate (is junk?), leads to OutOfMemoryError! See MINDEXER-28 for more information!" ); |
185 | 0 | ex.initCause( e ); |
186 | 0 | throw ex; |
187 | 3641 | } |
188 | |
|
189 | |
int c, char2, char3; |
190 | 3641 | int count = 0; |
191 | 3641 | int chararr_count = 0; |
192 | |
|
193 | 3641 | in.readFully( bytearr, 0, utflen ); |
194 | |
|
195 | 412952 | while ( count < utflen ) |
196 | |
{ |
197 | 409311 | c = bytearr[count] & 0xff; |
198 | 409311 | if ( c > 127 ) |
199 | |
{ |
200 | 0 | break; |
201 | |
} |
202 | 409311 | count++; |
203 | 409311 | chararr[chararr_count++] = (char) c; |
204 | |
} |
205 | |
|
206 | 3641 | while ( count < utflen ) |
207 | |
{ |
208 | 0 | c = bytearr[count] & 0xff; |
209 | 0 | switch ( c >> 4 ) |
210 | |
{ |
211 | |
case 0: |
212 | |
case 1: |
213 | |
case 2: |
214 | |
case 3: |
215 | |
case 4: |
216 | |
case 5: |
217 | |
case 6: |
218 | |
case 7: |
219 | |
|
220 | 0 | count++; |
221 | 0 | chararr[chararr_count++] = (char) c; |
222 | 0 | break; |
223 | |
|
224 | |
case 12: |
225 | |
case 13: |
226 | |
|
227 | 0 | count += 2; |
228 | 0 | if ( count > utflen ) |
229 | |
{ |
230 | 0 | throw new UTFDataFormatException( "malformed input: partial character at end" ); |
231 | |
} |
232 | 0 | char2 = bytearr[count - 1]; |
233 | 0 | if ( ( char2 & 0xC0 ) != 0x80 ) |
234 | |
{ |
235 | 0 | throw new UTFDataFormatException( "malformed input around byte " + count ); |
236 | |
} |
237 | 0 | chararr[chararr_count++] = (char) ( ( ( c & 0x1F ) << 6 ) | ( char2 & 0x3F ) ); |
238 | 0 | break; |
239 | |
|
240 | |
case 14: |
241 | |
|
242 | 0 | count += 3; |
243 | 0 | if ( count > utflen ) |
244 | |
{ |
245 | 0 | throw new UTFDataFormatException( "malformed input: partial character at end" ); |
246 | |
} |
247 | 0 | char2 = bytearr[count - 2]; |
248 | 0 | char3 = bytearr[count - 1]; |
249 | 0 | if ( ( ( char2 & 0xC0 ) != 0x80 ) || ( ( char3 & 0xC0 ) != 0x80 ) ) |
250 | |
{ |
251 | 0 | throw new UTFDataFormatException( "malformed input around byte " + ( count - 1 ) ); |
252 | |
} |
253 | 0 | chararr[chararr_count++] = |
254 | |
(char) ( ( ( c & 0x0F ) << 12 ) | ( ( char2 & 0x3F ) << 6 ) | ( ( char3 & 0x3F ) << 0 ) ); |
255 | 0 | break; |
256 | |
|
257 | |
default: |
258 | |
|
259 | 0 | throw new UTFDataFormatException( "malformed input around byte " + count ); |
260 | |
} |
261 | |
} |
262 | |
|
263 | |
|
264 | 3641 | return new String( chararr, 0, chararr_count ); |
265 | |
} |
266 | |
|
267 | |
|
268 | |
|
269 | |
|
270 | 35 | public static class IndexDataReadResult |
271 | |
{ |
272 | |
private Date timestamp; |
273 | |
|
274 | |
private int documentCount; |
275 | |
|
276 | |
public void setDocumentCount( int documentCount ) |
277 | |
{ |
278 | 35 | this.documentCount = documentCount; |
279 | 35 | } |
280 | |
|
281 | |
public int getDocumentCount() |
282 | |
{ |
283 | 0 | return documentCount; |
284 | |
} |
285 | |
|
286 | |
public void setTimestamp( Date timestamp ) |
287 | |
{ |
288 | 35 | this.timestamp = timestamp; |
289 | 35 | } |
290 | |
|
291 | |
public Date getTimestamp() |
292 | |
{ |
293 | 35 | return timestamp; |
294 | |
} |
295 | |
|
296 | |
} |
297 | |
|
298 | |
|
299 | |
|
300 | |
|
301 | |
|
302 | |
|
303 | |
|
304 | |
|
305 | |
|
306 | |
|
307 | |
public IndexDataReadResult readIndex( final IndexDataReadVisitor visitor, final IndexingContext context ) |
308 | |
throws IOException |
309 | |
{ |
310 | 0 | dis.readByte(); |
311 | |
|
312 | 0 | long timestamp = dis.readLong(); |
313 | |
|
314 | 0 | Date date = null; |
315 | |
|
316 | 0 | if ( timestamp != -1 ) |
317 | |
{ |
318 | 0 | date = new Date( timestamp ); |
319 | |
} |
320 | |
|
321 | 0 | int n = 0; |
322 | |
|
323 | |
Document doc; |
324 | 0 | while ( ( doc = readDocument() ) != null ) |
325 | |
{ |
326 | 0 | visitor.visitDocument( IndexUtils.updateDocument( doc, context, false ) ); |
327 | |
|
328 | 0 | n++; |
329 | |
} |
330 | |
|
331 | 0 | IndexDataReadResult result = new IndexDataReadResult(); |
332 | 0 | result.setDocumentCount( n ); |
333 | 0 | result.setTimestamp( date ); |
334 | 0 | return result; |
335 | |
} |
336 | |
|
337 | |
|
338 | |
|
339 | |
|
340 | |
public static interface IndexDataReadVisitor |
341 | |
{ |
342 | |
|
343 | |
|
344 | |
|
345 | |
|
346 | |
|
347 | |
|
348 | |
void visitDocument( Document document ); |
349 | |
|
350 | |
} |
351 | |
|
352 | |
} |