View Javadoc
1   package org.apache.maven.index.updater;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0    
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.BufferedOutputStream;
23  import java.io.DataOutput;
24  import java.io.DataOutputStream;
25  import java.io.IOException;
26  import java.io.OutputStream;
27  import java.util.ArrayList;
28  import java.util.Date;
29  import java.util.HashSet;
30  import java.util.List;
31  import java.util.Set;
32  import java.util.zip.GZIPOutputStream;
33  import org.apache.lucene.document.Document;
34  import org.apache.lucene.document.Field.Store;
35  import org.apache.lucene.document.StringField;
36  import org.apache.lucene.index.IndexOptions;
37  import org.apache.lucene.index.IndexReader;
38  import org.apache.lucene.index.IndexableField;
39  import org.apache.lucene.index.MultiFields;
40  import org.apache.lucene.util.Bits;
41  import org.apache.maven.index.ArtifactInfo;
42  import org.apache.maven.index.context.DefaultIndexingContext;
43  import org.apache.maven.index.context.IndexingContext;
44  
45  /**
46   * An index data writer used to write transfer index format.
47   * 
48   * @author Eugene Kuleshov
49   */
50  public class IndexDataWriter
51  {
52      static final int VERSION = 1;
53  
54      static final int F_INDEXED = 1;
55  
56      static final int F_TOKENIZED = 2;
57  
58      static final int F_STORED = 4;
59  
60      static final int F_COMPRESSED = 8;
61  
62      private final DataOutputStream dos;
63  
64      private final GZIPOutputStream gos;
65  
66      private final BufferedOutputStream bos;
67  
68      private final Set<String> allGroups;
69  
70      private final Set<String> rootGroups;
71  
72      private boolean descriptorWritten;
73  
74      public IndexDataWriter( OutputStream os )
75          throws IOException
76      {
77          bos = new BufferedOutputStream( os, 1024 * 8 );
78          gos = new GZIPOutputStream( bos, 1024 * 2 );
79          dos = new DataOutputStream( gos );
80  
81          this.allGroups = new HashSet<String>();
82          this.rootGroups = new HashSet<String>();
83          this.descriptorWritten = false;
84      }
85  
86      public int write( IndexingContext context, IndexReader indexReader, List<Integer> docIndexes )
87          throws IOException
88      {
89          writeHeader( context );
90  
91          int n = writeDocuments( indexReader, docIndexes );
92  
93          writeGroupFields();
94  
95          close();
96  
97          return n;
98      }
99  
100     public void close()
101         throws IOException
102     {
103         dos.flush();
104 
105         gos.flush();
106         gos.finish();
107 
108         bos.flush();
109     }
110 
111     public void writeHeader( IndexingContext context )
112         throws IOException
113     {
114         dos.writeByte( VERSION );
115 
116         Date timestamp = context.getTimestamp();
117         dos.writeLong( timestamp == null ? -1 : timestamp.getTime() );
118     }
119 
120     public void writeGroupFields()
121         throws IOException
122     {
123         {
124             List<IndexableField> allGroupsFields = new ArrayList<>( 2 );
125             allGroupsFields.add( new StringField( ArtifactInfo.ALL_GROUPS, ArtifactInfo.ALL_GROUPS_VALUE, Store.YES ) );
126             allGroupsFields.add( new StringField( ArtifactInfo.ALL_GROUPS_LIST, ArtifactInfo.lst2str( allGroups ),
127                                                   Store.YES ) );
128             writeDocumentFields( allGroupsFields );
129         }
130 
131         {
132             List<IndexableField> rootGroupsFields = new ArrayList<>( 2 );
133             rootGroupsFields.add( new StringField( ArtifactInfo.ROOT_GROUPS, ArtifactInfo.ROOT_GROUPS_VALUE,
134                                                    Store.YES ) );
135             rootGroupsFields.add( new StringField( ArtifactInfo.ROOT_GROUPS_LIST, ArtifactInfo.lst2str( rootGroups ),
136                                                    Store.YES ) );
137             writeDocumentFields( rootGroupsFields );
138         }
139     }
140 
141     public int writeDocuments( IndexReader r, List<Integer> docIndexes )
142         throws IOException
143     {
144         int n = 0;
145         Bits liveDocs = MultiFields.getLiveDocs( r );
146 
147         if ( docIndexes == null )
148         {
149             for ( int i = 0; i < r.maxDoc(); i++ )
150             {
151                 if ( liveDocs == null || liveDocs.get( i ) )
152                 {
153                     if ( writeDocument( r.document( i ) ) )
154                     {
155                         n++;
156                     }
157                 }
158             }
159         }
160         else
161         {
162             for ( int i : docIndexes )
163             {
164                 if ( liveDocs == null || liveDocs.get( i ) )
165                 {
166                     if ( writeDocument( r.document( i ) ) )
167                     {
168                         n++;
169                     }
170                 }
171             }
172         }
173 
174         return n;
175     }
176 
177     public boolean writeDocument( final Document document )
178         throws IOException
179     {
180         List<IndexableField> fields = document.getFields();
181 
182         List<IndexableField> storedFields = new ArrayList<>( fields.size() );
183 
184         for ( IndexableField field : fields )
185         {
186             if ( DefaultIndexingContext.FLD_DESCRIPTOR.equals( field.name() ) )
187             {
188                 if ( descriptorWritten )
189                 {
190                     return false;
191                 }
192                 else
193                 {
194                     descriptorWritten = true;
195                 }
196             }
197 
198             if ( ArtifactInfo.ALL_GROUPS.equals( field.name() ) )
199             {
200                 final String groupList = document.get( ArtifactInfo.ALL_GROUPS_LIST );
201 
202                 if ( groupList != null && groupList.trim().length() > 0 )
203                 {
204                     allGroups.addAll( ArtifactInfo.str2lst( groupList ) );
205                 }
206 
207                 return false;
208             }
209 
210             if ( ArtifactInfo.ROOT_GROUPS.equals( field.name() ) )
211             {
212                 final String groupList = document.get( ArtifactInfo.ROOT_GROUPS_LIST );
213 
214                 if ( groupList != null && groupList.trim().length() > 0 )
215                 {
216                     rootGroups.addAll( ArtifactInfo.str2lst( groupList ) );
217                 }
218 
219                 return false;
220             }
221 
222             if ( field.fieldType().stored() )
223             {
224                 storedFields.add( field );
225             }
226         }
227 
228         writeDocumentFields( storedFields );
229 
230         return true;
231     }
232 
233     public void writeDocumentFields( List<IndexableField> fields )
234         throws IOException
235     {
236         dos.writeInt( fields.size() );
237 
238         for ( IndexableField field : fields )
239         {
240             writeField( field );
241         }
242     }
243 
244     public void writeField( IndexableField field )
245         throws IOException
246     {
247         int flags = ( field.fieldType().indexOptions() != IndexOptions.NONE  ? F_INDEXED : 0 ) //
248             + ( field.fieldType().tokenized() ? F_TOKENIZED : 0 ) //
249             + ( field.fieldType().stored() ? F_STORED : 0 ); //
250         // + ( false ? F_COMPRESSED : 0 ); // Compressed not supported anymore
251 
252         String name = field.name();
253         String value = field.stringValue();
254 
255         dos.write( flags );
256         dos.writeUTF( name );
257         writeUTF( value, dos );
258     }
259 
260     private static void writeUTF( String str, DataOutput out )
261         throws IOException
262     {
263         int strlen = str.length();
264         int utflen = 0;
265         int c;
266 
267         // use charAt instead of copying String to char array
268         for ( int i = 0; i < strlen; i++ )
269         {
270             c = str.charAt( i );
271             if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
272             {
273                 utflen++;
274             }
275             else if ( c > 0x07FF )
276             {
277                 utflen += 3;
278             }
279             else
280             {
281                 utflen += 2;
282             }
283         }
284 
285         // TODO optimize storing int value
286         out.writeInt( utflen );
287 
288         byte[] bytearr = new byte[utflen];
289 
290         int count = 0;
291 
292         int i = 0;
293         for ( ; i < strlen; i++ )
294         {
295             c = str.charAt( i );
296             if ( !( ( c >= 0x0001 ) && ( c <= 0x007F ) ) )
297             {
298                 break;
299             }
300             bytearr[count++] = (byte) c;
301         }
302 
303         for ( ; i < strlen; i++ )
304         {
305             c = str.charAt( i );
306             if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
307             {
308                 bytearr[count++] = (byte) c;
309 
310             }
311             else if ( c > 0x07FF )
312             {
313                 bytearr[count++] = (byte) ( 0xE0 | ( ( c >> 12 ) & 0x0F ) );
314                 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 6 ) & 0x3F ) );
315                 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 0 ) & 0x3F ) );
316             }
317             else
318             {
319                 bytearr[count++] = (byte) ( 0xC0 | ( ( c >> 6 ) & 0x1F ) );
320                 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 0 ) & 0x3F ) );
321             }
322         }
323 
324         out.write( bytearr, 0, utflen );
325     }
326 
327 }