1 package org.apache.maven.index.updater;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.BufferedOutputStream;
23 import java.io.DataOutput;
24 import java.io.DataOutputStream;
25 import java.io.IOException;
26 import java.io.OutputStream;
27 import java.util.ArrayList;
28 import java.util.Date;
29 import java.util.HashSet;
30 import java.util.List;
31 import java.util.Set;
32 import java.util.zip.GZIPOutputStream;
33 import org.apache.lucene.document.Document;
34 import org.apache.lucene.document.Field.Store;
35 import org.apache.lucene.document.StringField;
36 import org.apache.lucene.index.IndexOptions;
37 import org.apache.lucene.index.IndexReader;
38 import org.apache.lucene.index.IndexableField;
39 import org.apache.lucene.index.MultiFields;
40 import org.apache.lucene.util.Bits;
41 import org.apache.maven.index.ArtifactInfo;
42 import org.apache.maven.index.context.DefaultIndexingContext;
43 import org.apache.maven.index.context.IndexingContext;
44
45
46
47
48
49
50 public class IndexDataWriter
51 {
52 static final int VERSION = 1;
53
54 static final int F_INDEXED = 1;
55
56 static final int F_TOKENIZED = 2;
57
58 static final int F_STORED = 4;
59
60 static final int F_COMPRESSED = 8;
61
62 private final DataOutputStream dos;
63
64 private final GZIPOutputStream gos;
65
66 private final BufferedOutputStream bos;
67
68 private final Set<String> allGroups;
69
70 private final Set<String> rootGroups;
71
72 private boolean descriptorWritten;
73
74 public IndexDataWriter( OutputStream os )
75 throws IOException
76 {
77 bos = new BufferedOutputStream( os, 1024 * 8 );
78 gos = new GZIPOutputStream( bos, 1024 * 2 );
79 dos = new DataOutputStream( gos );
80
81 this.allGroups = new HashSet<String>();
82 this.rootGroups = new HashSet<String>();
83 this.descriptorWritten = false;
84 }
85
86 public int write( IndexingContext context, IndexReader indexReader, List<Integer> docIndexes )
87 throws IOException
88 {
89 writeHeader( context );
90
91 int n = writeDocuments( indexReader, docIndexes );
92
93 writeGroupFields();
94
95 close();
96
97 return n;
98 }
99
100 public void close()
101 throws IOException
102 {
103 dos.flush();
104
105 gos.flush();
106 gos.finish();
107
108 bos.flush();
109 }
110
111 public void writeHeader( IndexingContext context )
112 throws IOException
113 {
114 dos.writeByte( VERSION );
115
116 Date timestamp = context.getTimestamp();
117 dos.writeLong( timestamp == null ? -1 : timestamp.getTime() );
118 }
119
120 public void writeGroupFields()
121 throws IOException
122 {
123 {
124 List<IndexableField> allGroupsFields = new ArrayList<>( 2 );
125 allGroupsFields.add( new StringField( ArtifactInfo.ALL_GROUPS, ArtifactInfo.ALL_GROUPS_VALUE, Store.YES ) );
126 allGroupsFields.add( new StringField( ArtifactInfo.ALL_GROUPS_LIST, ArtifactInfo.lst2str( allGroups ),
127 Store.YES ) );
128 writeDocumentFields( allGroupsFields );
129 }
130
131 {
132 List<IndexableField> rootGroupsFields = new ArrayList<>( 2 );
133 rootGroupsFields.add( new StringField( ArtifactInfo.ROOT_GROUPS, ArtifactInfo.ROOT_GROUPS_VALUE,
134 Store.YES ) );
135 rootGroupsFields.add( new StringField( ArtifactInfo.ROOT_GROUPS_LIST, ArtifactInfo.lst2str( rootGroups ),
136 Store.YES ) );
137 writeDocumentFields( rootGroupsFields );
138 }
139 }
140
141 public int writeDocuments( IndexReader r, List<Integer> docIndexes )
142 throws IOException
143 {
144 int n = 0;
145 Bits liveDocs = MultiFields.getLiveDocs( r );
146
147 if ( docIndexes == null )
148 {
149 for ( int i = 0; i < r.maxDoc(); i++ )
150 {
151 if ( liveDocs == null || liveDocs.get( i ) )
152 {
153 if ( writeDocument( r.document( i ) ) )
154 {
155 n++;
156 }
157 }
158 }
159 }
160 else
161 {
162 for ( int i : docIndexes )
163 {
164 if ( liveDocs == null || liveDocs.get( i ) )
165 {
166 if ( writeDocument( r.document( i ) ) )
167 {
168 n++;
169 }
170 }
171 }
172 }
173
174 return n;
175 }
176
177 public boolean writeDocument( final Document document )
178 throws IOException
179 {
180 List<IndexableField> fields = document.getFields();
181
182 List<IndexableField> storedFields = new ArrayList<>( fields.size() );
183
184 for ( IndexableField field : fields )
185 {
186 if ( DefaultIndexingContext.FLD_DESCRIPTOR.equals( field.name() ) )
187 {
188 if ( descriptorWritten )
189 {
190 return false;
191 }
192 else
193 {
194 descriptorWritten = true;
195 }
196 }
197
198 if ( ArtifactInfo.ALL_GROUPS.equals( field.name() ) )
199 {
200 final String groupList = document.get( ArtifactInfo.ALL_GROUPS_LIST );
201
202 if ( groupList != null && groupList.trim().length() > 0 )
203 {
204 allGroups.addAll( ArtifactInfo.str2lst( groupList ) );
205 }
206
207 return false;
208 }
209
210 if ( ArtifactInfo.ROOT_GROUPS.equals( field.name() ) )
211 {
212 final String groupList = document.get( ArtifactInfo.ROOT_GROUPS_LIST );
213
214 if ( groupList != null && groupList.trim().length() > 0 )
215 {
216 rootGroups.addAll( ArtifactInfo.str2lst( groupList ) );
217 }
218
219 return false;
220 }
221
222 if ( field.fieldType().stored() )
223 {
224 storedFields.add( field );
225 }
226 }
227
228 writeDocumentFields( storedFields );
229
230 return true;
231 }
232
233 public void writeDocumentFields( List<IndexableField> fields )
234 throws IOException
235 {
236 dos.writeInt( fields.size() );
237
238 for ( IndexableField field : fields )
239 {
240 writeField( field );
241 }
242 }
243
244 public void writeField( IndexableField field )
245 throws IOException
246 {
247 int flags = ( field.fieldType().indexOptions() != IndexOptions.NONE ? F_INDEXED : 0 )
248 + ( field.fieldType().tokenized() ? F_TOKENIZED : 0 )
249 + ( field.fieldType().stored() ? F_STORED : 0 );
250
251
252 String name = field.name();
253 String value = field.stringValue();
254
255 dos.write( flags );
256 dos.writeUTF( name );
257 writeUTF( value, dos );
258 }
259
260 private static void writeUTF( String str, DataOutput out )
261 throws IOException
262 {
263 int strlen = str.length();
264 int utflen = 0;
265 int c;
266
267
268 for ( int i = 0; i < strlen; i++ )
269 {
270 c = str.charAt( i );
271 if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
272 {
273 utflen++;
274 }
275 else if ( c > 0x07FF )
276 {
277 utflen += 3;
278 }
279 else
280 {
281 utflen += 2;
282 }
283 }
284
285
286 out.writeInt( utflen );
287
288 byte[] bytearr = new byte[utflen];
289
290 int count = 0;
291
292 int i = 0;
293 for ( ; i < strlen; i++ )
294 {
295 c = str.charAt( i );
296 if ( !( ( c >= 0x0001 ) && ( c <= 0x007F ) ) )
297 {
298 break;
299 }
300 bytearr[count++] = (byte) c;
301 }
302
303 for ( ; i < strlen; i++ )
304 {
305 c = str.charAt( i );
306 if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
307 {
308 bytearr[count++] = (byte) c;
309
310 }
311 else if ( c > 0x07FF )
312 {
313 bytearr[count++] = (byte) ( 0xE0 | ( ( c >> 12 ) & 0x0F ) );
314 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 6 ) & 0x3F ) );
315 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 0 ) & 0x3F ) );
316 }
317 else
318 {
319 bytearr[count++] = (byte) ( 0xC0 | ( ( c >> 6 ) & 0x1F ) );
320 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 0 ) & 0x3F ) );
321 }
322 }
323
324 out.write( bytearr, 0, utflen );
325 }
326
327 }