1 | |
package org.apache.maven.index.updater; |
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
|
20 | |
|
21 | |
|
22 | |
import java.io.BufferedOutputStream; |
23 | |
import java.io.DataOutput; |
24 | |
import java.io.DataOutputStream; |
25 | |
import java.io.IOException; |
26 | |
import java.io.OutputStream; |
27 | |
import java.util.ArrayList; |
28 | |
import java.util.Date; |
29 | |
import java.util.HashSet; |
30 | |
import java.util.List; |
31 | |
import java.util.Set; |
32 | |
import java.util.zip.GZIPOutputStream; |
33 | |
|
34 | |
import org.apache.lucene.document.Document; |
35 | |
import org.apache.lucene.document.Field; |
36 | |
import org.apache.lucene.document.Fieldable; |
37 | |
import org.apache.lucene.index.IndexReader; |
38 | |
import org.apache.maven.index.ArtifactInfo; |
39 | |
import org.apache.maven.index.context.DefaultIndexingContext; |
40 | |
import org.apache.maven.index.context.IndexingContext; |
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | |
public class IndexDataWriter |
48 | |
{ |
49 | |
static final int VERSION = 1; |
50 | |
|
51 | |
static final int F_INDEXED = 1; |
52 | |
|
53 | |
static final int F_TOKENIZED = 2; |
54 | |
|
55 | |
static final int F_STORED = 4; |
56 | |
|
57 | |
static final int F_COMPRESSED = 8; |
58 | |
|
59 | |
private final DataOutputStream dos; |
60 | |
|
61 | |
private final GZIPOutputStream gos; |
62 | |
|
63 | |
private final BufferedOutputStream bos; |
64 | |
|
65 | |
private final Set<String> allGroups; |
66 | |
|
67 | |
private final Set<String> rootGroups; |
68 | |
|
69 | |
private boolean descriptorWritten; |
70 | |
|
71 | |
public IndexDataWriter( OutputStream os ) |
72 | |
throws IOException |
73 | 230 | { |
74 | 230 | bos = new BufferedOutputStream( os, 1024 * 8 ); |
75 | 230 | gos = new GZIPOutputStream( bos, 1024 * 2 ); |
76 | 230 | dos = new DataOutputStream( gos ); |
77 | |
|
78 | 230 | this.allGroups = new HashSet<String>(); |
79 | 230 | this.rootGroups = new HashSet<String>(); |
80 | 230 | this.descriptorWritten = false; |
81 | 230 | } |
82 | |
|
83 | |
public int write( IndexingContext context, List<Integer> docIndexes ) |
84 | |
throws IOException |
85 | |
{ |
86 | 230 | writeHeader( context ); |
87 | |
|
88 | 230 | int n = writeDocuments( context.getIndexReader(), docIndexes ); |
89 | |
|
90 | 230 | writeGroupFields(); |
91 | |
|
92 | 230 | close(); |
93 | |
|
94 | 230 | return n; |
95 | |
} |
96 | |
|
97 | |
public void close() |
98 | |
throws IOException |
99 | |
{ |
100 | 230 | dos.flush(); |
101 | |
|
102 | 230 | gos.flush(); |
103 | 230 | gos.finish(); |
104 | |
|
105 | 230 | bos.flush(); |
106 | 230 | } |
107 | |
|
108 | |
public void writeHeader( IndexingContext context ) |
109 | |
throws IOException |
110 | |
{ |
111 | 230 | dos.writeByte( VERSION ); |
112 | |
|
113 | 230 | Date timestamp = context.getTimestamp(); |
114 | 230 | dos.writeLong( timestamp == null ? -1 : timestamp.getTime() ); |
115 | 230 | } |
116 | |
|
117 | |
public void writeGroupFields() |
118 | |
throws IOException |
119 | |
{ |
120 | |
{ |
121 | 230 | List<Fieldable> allGroupsFields = new ArrayList<Fieldable>( 2 ); |
122 | 230 | allGroupsFields.add( new Field( ArtifactInfo.ALL_GROUPS, ArtifactInfo.ALL_GROUPS_VALUE, Field.Store.YES, |
123 | |
Field.Index.NOT_ANALYZED ) ); |
124 | 230 | allGroupsFields.add( new Field( ArtifactInfo.ALL_GROUPS_LIST, ArtifactInfo.lst2str( allGroups ), |
125 | |
Field.Store.YES, Field.Index.NO ) ); |
126 | 230 | writeDocumentFields( allGroupsFields ); |
127 | |
} |
128 | |
|
129 | |
{ |
130 | 230 | List<Fieldable> rootGroupsFields = new ArrayList<Fieldable>( 2 ); |
131 | 230 | rootGroupsFields.add( new Field( ArtifactInfo.ROOT_GROUPS, ArtifactInfo.ROOT_GROUPS_VALUE, Field.Store.YES, |
132 | |
Field.Index.NOT_ANALYZED ) ); |
133 | 230 | rootGroupsFields.add( new Field( ArtifactInfo.ROOT_GROUPS_LIST, ArtifactInfo.lst2str( rootGroups ), |
134 | |
Field.Store.YES, Field.Index.NO ) ); |
135 | 230 | writeDocumentFields( rootGroupsFields ); |
136 | |
} |
137 | 230 | } |
138 | |
|
139 | |
public int writeDocuments( IndexReader r, List<Integer> docIndexes ) |
140 | |
throws IOException |
141 | |
{ |
142 | 230 | int n = 0; |
143 | |
|
144 | 230 | if ( docIndexes == null ) |
145 | |
{ |
146 | 18447 | for ( int i = 0; i < r.maxDoc(); i++ ) |
147 | |
{ |
148 | 18239 | if ( !r.isDeleted( i ) ) |
149 | |
{ |
150 | 18189 | if ( writeDocument( r.document( i ) ) ) |
151 | |
{ |
152 | 17328 | n++; |
153 | |
} |
154 | |
} |
155 | |
} |
156 | |
} |
157 | |
else |
158 | |
{ |
159 | 22 | for ( int i : docIndexes ) |
160 | |
{ |
161 | 22 | if ( !r.isDeleted( i ) ) |
162 | |
{ |
163 | 22 | if ( writeDocument( r.document( i ) ) ) |
164 | |
{ |
165 | 22 | n++; |
166 | |
} |
167 | |
} |
168 | |
} |
169 | |
} |
170 | |
|
171 | 230 | return n; |
172 | |
} |
173 | |
|
174 | |
public boolean writeDocument( final Document document ) |
175 | |
throws IOException |
176 | |
{ |
177 | 18211 | List<Fieldable> fields = document.getFields(); |
178 | |
|
179 | 18211 | List<Fieldable> storedFields = new ArrayList<Fieldable>( fields.size() ); |
180 | |
|
181 | 18211 | for ( Fieldable field : fields ) |
182 | |
{ |
183 | 90591 | if ( DefaultIndexingContext.FLD_DESCRIPTOR.equals( field.name() ) ) |
184 | |
{ |
185 | 356 | if ( descriptorWritten ) |
186 | |
{ |
187 | 149 | return false; |
188 | |
} |
189 | |
else |
190 | |
{ |
191 | 207 | descriptorWritten = true; |
192 | |
} |
193 | |
} |
194 | |
|
195 | 90442 | if ( ArtifactInfo.ALL_GROUPS.equals( field.name() ) ) |
196 | |
{ |
197 | 356 | final String groupList = document.get( ArtifactInfo.ALL_GROUPS_LIST ); |
198 | |
|
199 | 356 | if ( groupList != null && groupList.trim().length() > 0 ) |
200 | |
{ |
201 | 354 | allGroups.addAll( ArtifactInfo.str2lst( groupList ) ); |
202 | |
} |
203 | |
|
204 | 356 | return false; |
205 | |
} |
206 | |
|
207 | 90086 | if ( ArtifactInfo.ROOT_GROUPS.equals( field.name() ) ) |
208 | |
{ |
209 | 356 | final String groupList = document.get( ArtifactInfo.ROOT_GROUPS_LIST ); |
210 | |
|
211 | 356 | if ( groupList != null && groupList.trim().length() > 0 ) |
212 | |
{ |
213 | 354 | rootGroups.addAll( ArtifactInfo.str2lst( groupList ) ); |
214 | |
} |
215 | |
|
216 | 356 | return false; |
217 | |
} |
218 | |
|
219 | 89730 | if ( field.isStored() ) |
220 | |
{ |
221 | 89730 | storedFields.add( field ); |
222 | |
} |
223 | |
} |
224 | |
|
225 | 17350 | writeDocumentFields( storedFields ); |
226 | |
|
227 | 17350 | return true; |
228 | |
} |
229 | |
|
230 | |
public void writeDocumentFields( List<Fieldable> fields ) |
231 | |
throws IOException |
232 | |
{ |
233 | 17810 | dos.writeInt( fields.size() ); |
234 | |
|
235 | 17810 | for ( Fieldable field : fields ) |
236 | |
{ |
237 | 90650 | writeField( field ); |
238 | |
} |
239 | 17810 | } |
240 | |
|
241 | |
public void writeField( Fieldable field ) |
242 | |
throws IOException |
243 | |
{ |
244 | 90650 | int flags = ( field.isIndexed() ? F_INDEXED : 0 ) |
245 | |
+ ( field.isTokenized() ? F_TOKENIZED : 0 ) |
246 | |
+ ( field.isStored() ? F_STORED : 0 ); |
247 | |
|
248 | |
|
249 | 90650 | String name = field.name(); |
250 | 90650 | String value = field.stringValue(); |
251 | |
|
252 | 90650 | dos.write( flags ); |
253 | 90650 | dos.writeUTF( name ); |
254 | 90650 | writeUTF( value, dos ); |
255 | 90650 | } |
256 | |
|
257 | |
private static void writeUTF( String str, DataOutput out ) |
258 | |
throws IOException |
259 | |
{ |
260 | 90650 | int strlen = str.length(); |
261 | 90650 | int utflen = 0; |
262 | |
int c; |
263 | |
|
264 | |
|
265 | 3470716 | for ( int i = 0; i < strlen; i++ ) |
266 | |
{ |
267 | 3380066 | c = str.charAt( i ); |
268 | 3380066 | if ( ( c >= 0x0001 ) && ( c <= 0x007F ) ) |
269 | |
{ |
270 | 3380066 | utflen++; |
271 | |
} |
272 | 0 | else if ( c > 0x07FF ) |
273 | |
{ |
274 | 0 | utflen += 3; |
275 | |
} |
276 | |
else |
277 | |
{ |
278 | 0 | utflen += 2; |
279 | |
} |
280 | |
} |
281 | |
|
282 | |
|
283 | 90650 | out.writeInt( utflen ); |
284 | |
|
285 | 90650 | byte[] bytearr = new byte[utflen]; |
286 | |
|
287 | 90650 | int count = 0; |
288 | |
|
289 | 90650 | int i = 0; |
290 | 6850782 | for ( ; i < strlen; i++ ) |
291 | |
{ |
292 | 3380066 | c = str.charAt( i ); |
293 | 3380066 | if ( !( ( c >= 0x0001 ) && ( c <= 0x007F ) ) ) |
294 | |
{ |
295 | 0 | break; |
296 | |
} |
297 | 3380066 | bytearr[count++] = (byte) c; |
298 | |
} |
299 | |
|
300 | 90650 | for ( ; i < strlen; i++ ) |
301 | |
{ |
302 | 0 | c = str.charAt( i ); |
303 | 0 | if ( ( c >= 0x0001 ) && ( c <= 0x007F ) ) |
304 | |
{ |
305 | 0 | bytearr[count++] = (byte) c; |
306 | |
|
307 | |
} |
308 | 0 | else if ( c > 0x07FF ) |
309 | |
{ |
310 | 0 | bytearr[count++] = (byte) ( 0xE0 | ( ( c >> 12 ) & 0x0F ) ); |
311 | 0 | bytearr[count++] = (byte) ( 0x80 | ( ( c >> 6 ) & 0x3F ) ); |
312 | 0 | bytearr[count++] = (byte) ( 0x80 | ( ( c >> 0 ) & 0x3F ) ); |
313 | |
} |
314 | |
else |
315 | |
{ |
316 | 0 | bytearr[count++] = (byte) ( 0xC0 | ( ( c >> 6 ) & 0x1F ) ); |
317 | 0 | bytearr[count++] = (byte) ( 0x80 | ( ( c >> 0 ) & 0x3F ) ); |
318 | |
} |
319 | |
} |
320 | |
|
321 | 90650 | out.write( bytearr, 0, utflen ); |
322 | 90650 | } |
323 | |
|
324 | |
} |