1 package org.apache.maven.index.updater;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.BufferedOutputStream;
23 import java.io.DataOutput;
24 import java.io.DataOutputStream;
25 import java.io.IOException;
26 import java.io.OutputStream;
27 import java.util.ArrayList;
28 import java.util.Date;
29 import java.util.HashSet;
30 import java.util.List;
31 import java.util.Set;
32 import java.util.zip.GZIPOutputStream;
33
34 import org.apache.lucene.document.Document;
35 import org.apache.lucene.document.Field;
36 import org.apache.lucene.document.Fieldable;
37 import org.apache.lucene.index.IndexReader;
38 import org.apache.maven.index.ArtifactInfo;
39 import org.apache.maven.index.context.DefaultIndexingContext;
40 import org.apache.maven.index.context.IndexingContext;
41
42
43
44
45
46
47 public class IndexDataWriter
48 {
49 static final int VERSION = 1;
50
51 static final int F_INDEXED = 1;
52
53 static final int F_TOKENIZED = 2;
54
55 static final int F_STORED = 4;
56
57 static final int F_COMPRESSED = 8;
58
59 private final DataOutputStream dos;
60
61 private final GZIPOutputStream gos;
62
63 private final BufferedOutputStream bos;
64
65 private final Set<String> allGroups;
66
67 private final Set<String> rootGroups;
68
69 private boolean descriptorWritten;
70
71 public IndexDataWriter( OutputStream os )
72 throws IOException
73 {
74 bos = new BufferedOutputStream( os, 1024 * 8 );
75 gos = new GZIPOutputStream( bos, 1024 * 2 );
76 dos = new DataOutputStream( gos );
77
78 this.allGroups = new HashSet<String>();
79 this.rootGroups = new HashSet<String>();
80 this.descriptorWritten = false;
81 }
82
83 public int write( IndexingContext context, List<Integer> docIndexes )
84 throws IOException
85 {
86 writeHeader( context );
87
88 int n = writeDocuments( context.getIndexReader(), docIndexes );
89
90 writeGroupFields();
91
92 close();
93
94 return n;
95 }
96
97 public void close()
98 throws IOException
99 {
100 dos.flush();
101
102 gos.flush();
103 gos.finish();
104
105 bos.flush();
106 }
107
108 public void writeHeader( IndexingContext context )
109 throws IOException
110 {
111 dos.writeByte( VERSION );
112
113 Date timestamp = context.getTimestamp();
114 dos.writeLong( timestamp == null ? -1 : timestamp.getTime() );
115 }
116
117 public void writeGroupFields()
118 throws IOException
119 {
120 {
121 List<Fieldable> allGroupsFields = new ArrayList<Fieldable>( 2 );
122 allGroupsFields.add( new Field( ArtifactInfo.ALL_GROUPS, ArtifactInfo.ALL_GROUPS_VALUE, Field.Store.YES,
123 Field.Index.NOT_ANALYZED ) );
124 allGroupsFields.add( new Field( ArtifactInfo.ALL_GROUPS_LIST, ArtifactInfo.lst2str( allGroups ),
125 Field.Store.YES, Field.Index.NO ) );
126 writeDocumentFields( allGroupsFields );
127 }
128
129 {
130 List<Fieldable> rootGroupsFields = new ArrayList<Fieldable>( 2 );
131 rootGroupsFields.add( new Field( ArtifactInfo.ROOT_GROUPS, ArtifactInfo.ROOT_GROUPS_VALUE, Field.Store.YES,
132 Field.Index.NOT_ANALYZED ) );
133 rootGroupsFields.add( new Field( ArtifactInfo.ROOT_GROUPS_LIST, ArtifactInfo.lst2str( rootGroups ),
134 Field.Store.YES, Field.Index.NO ) );
135 writeDocumentFields( rootGroupsFields );
136 }
137 }
138
139 public int writeDocuments( IndexReader r, List<Integer> docIndexes )
140 throws IOException
141 {
142 int n = 0;
143
144 if ( docIndexes == null )
145 {
146 for ( int i = 0; i < r.maxDoc(); i++ )
147 {
148 if ( !r.isDeleted( i ) )
149 {
150 if ( writeDocument( r.document( i ) ) )
151 {
152 n++;
153 }
154 }
155 }
156 }
157 else
158 {
159 for ( int i : docIndexes )
160 {
161 if ( !r.isDeleted( i ) )
162 {
163 if ( writeDocument( r.document( i ) ) )
164 {
165 n++;
166 }
167 }
168 }
169 }
170
171 return n;
172 }
173
174 public boolean writeDocument( final Document document )
175 throws IOException
176 {
177 List<Fieldable> fields = document.getFields();
178
179 List<Fieldable> storedFields = new ArrayList<Fieldable>( fields.size() );
180
181 for ( Fieldable field : fields )
182 {
183 if ( DefaultIndexingContext.FLD_DESCRIPTOR.equals( field.name() ) )
184 {
185 if ( descriptorWritten )
186 {
187 return false;
188 }
189 else
190 {
191 descriptorWritten = true;
192 }
193 }
194
195 if ( ArtifactInfo.ALL_GROUPS.equals( field.name() ) )
196 {
197 final String groupList = document.get( ArtifactInfo.ALL_GROUPS_LIST );
198
199 if ( groupList != null && groupList.trim().length() > 0 )
200 {
201 allGroups.addAll( ArtifactInfo.str2lst( groupList ) );
202 }
203
204 return false;
205 }
206
207 if ( ArtifactInfo.ROOT_GROUPS.equals( field.name() ) )
208 {
209 final String groupList = document.get( ArtifactInfo.ROOT_GROUPS_LIST );
210
211 if ( groupList != null && groupList.trim().length() > 0 )
212 {
213 rootGroups.addAll( ArtifactInfo.str2lst( groupList ) );
214 }
215
216 return false;
217 }
218
219 if ( field.isStored() )
220 {
221 storedFields.add( field );
222 }
223 }
224
225 writeDocumentFields( storedFields );
226
227 return true;
228 }
229
230 public void writeDocumentFields( List<Fieldable> fields )
231 throws IOException
232 {
233 dos.writeInt( fields.size() );
234
235 for ( Fieldable field : fields )
236 {
237 writeField( field );
238 }
239 }
240
241 public void writeField( Fieldable field )
242 throws IOException
243 {
244 int flags = ( field.isIndexed() ? F_INDEXED : 0 )
245 + ( field.isTokenized() ? F_TOKENIZED : 0 )
246 + ( field.isStored() ? F_STORED : 0 );
247
248
249 String name = field.name();
250 String value = field.stringValue();
251
252 dos.write( flags );
253 dos.writeUTF( name );
254 writeUTF( value, dos );
255 }
256
257 private static void writeUTF( String str, DataOutput out )
258 throws IOException
259 {
260 int strlen = str.length();
261 int utflen = 0;
262 int c;
263
264
265 for ( int i = 0; i < strlen; i++ )
266 {
267 c = str.charAt( i );
268 if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
269 {
270 utflen++;
271 }
272 else if ( c > 0x07FF )
273 {
274 utflen += 3;
275 }
276 else
277 {
278 utflen += 2;
279 }
280 }
281
282
283 out.writeInt( utflen );
284
285 byte[] bytearr = new byte[utflen];
286
287 int count = 0;
288
289 int i = 0;
290 for ( ; i < strlen; i++ )
291 {
292 c = str.charAt( i );
293 if ( !( ( c >= 0x0001 ) && ( c <= 0x007F ) ) )
294 {
295 break;
296 }
297 bytearr[count++] = (byte) c;
298 }
299
300 for ( ; i < strlen; i++ )
301 {
302 c = str.charAt( i );
303 if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
304 {
305 bytearr[count++] = (byte) c;
306
307 }
308 else if ( c > 0x07FF )
309 {
310 bytearr[count++] = (byte) ( 0xE0 | ( ( c >> 12 ) & 0x0F ) );
311 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 6 ) & 0x3F ) );
312 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 0 ) & 0x3F ) );
313 }
314 else
315 {
316 bytearr[count++] = (byte) ( 0xC0 | ( ( c >> 6 ) & 0x1F ) );
317 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 0 ) & 0x3F ) );
318 }
319 }
320
321 out.write( bytearr, 0, utflen );
322 }
323
324 }