View Javadoc

1   package org.apache.maven.index.updater;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0    
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.BufferedOutputStream;
23  import java.io.DataOutput;
24  import java.io.DataOutputStream;
25  import java.io.IOException;
26  import java.io.OutputStream;
27  import java.util.ArrayList;
28  import java.util.Date;
29  import java.util.HashSet;
30  import java.util.List;
31  import java.util.Set;
32  import java.util.zip.GZIPOutputStream;
33  
34  import org.apache.lucene.document.Document;
35  import org.apache.lucene.document.Field;
36  import org.apache.lucene.document.Fieldable;
37  import org.apache.lucene.index.IndexReader;
38  import org.apache.maven.index.ArtifactInfo;
39  import org.apache.maven.index.context.DefaultIndexingContext;
40  import org.apache.maven.index.context.IndexingContext;
41  
42  /**
43   * An index data writer used to write transfer index format.
44   * 
45   * @author Eugene Kuleshov
46   */
47  public class IndexDataWriter
48  {
49      static final int VERSION = 1;
50  
51      static final int F_INDEXED = 1;
52  
53      static final int F_TOKENIZED = 2;
54  
55      static final int F_STORED = 4;
56  
57      static final int F_COMPRESSED = 8;
58  
59      private final DataOutputStream dos;
60  
61      private final GZIPOutputStream gos;
62  
63      private final BufferedOutputStream bos;
64  
65      private final Set<String> allGroups;
66  
67      private final Set<String> rootGroups;
68  
69      private boolean descriptorWritten;
70  
71      public IndexDataWriter( OutputStream os )
72          throws IOException
73      {
74          bos = new BufferedOutputStream( os, 1024 * 8 );
75          gos = new GZIPOutputStream( bos, 1024 * 2 );
76          dos = new DataOutputStream( gos );
77  
78          this.allGroups = new HashSet<String>();
79          this.rootGroups = new HashSet<String>();
80          this.descriptorWritten = false;
81      }
82  
83      public int write( IndexingContext context, List<Integer> docIndexes )
84          throws IOException
85      {
86          writeHeader( context );
87  
88          int n = writeDocuments( context.getIndexReader(), docIndexes );
89  
90          writeGroupFields();
91  
92          close();
93  
94          return n;
95      }
96  
97      public void close()
98          throws IOException
99      {
100         dos.flush();
101 
102         gos.flush();
103         gos.finish();
104 
105         bos.flush();
106     }
107 
108     public void writeHeader( IndexingContext context )
109         throws IOException
110     {
111         dos.writeByte( VERSION );
112 
113         Date timestamp = context.getTimestamp();
114         dos.writeLong( timestamp == null ? -1 : timestamp.getTime() );
115     }
116 
117     public void writeGroupFields()
118         throws IOException
119     {
120         {
121             List<Fieldable> allGroupsFields = new ArrayList<Fieldable>( 2 );
122             allGroupsFields.add( new Field( ArtifactInfo.ALL_GROUPS, ArtifactInfo.ALL_GROUPS_VALUE, Field.Store.YES,
123                 Field.Index.NOT_ANALYZED ) );
124             allGroupsFields.add( new Field( ArtifactInfo.ALL_GROUPS_LIST, ArtifactInfo.lst2str( allGroups ),
125                 Field.Store.YES, Field.Index.NO ) );
126             writeDocumentFields( allGroupsFields );
127         }
128 
129         {
130             List<Fieldable> rootGroupsFields = new ArrayList<Fieldable>( 2 );
131             rootGroupsFields.add( new Field( ArtifactInfo.ROOT_GROUPS, ArtifactInfo.ROOT_GROUPS_VALUE, Field.Store.YES,
132                 Field.Index.NOT_ANALYZED ) );
133             rootGroupsFields.add( new Field( ArtifactInfo.ROOT_GROUPS_LIST, ArtifactInfo.lst2str( rootGroups ),
134                 Field.Store.YES, Field.Index.NO ) );
135             writeDocumentFields( rootGroupsFields );
136         }
137     }
138 
139     public int writeDocuments( IndexReader r, List<Integer> docIndexes )
140         throws IOException
141     {
142         int n = 0;
143 
144         if ( docIndexes == null )
145         {
146             for ( int i = 0; i < r.maxDoc(); i++ )
147             {
148                 if ( !r.isDeleted( i ) )
149                 {
150                     if ( writeDocument( r.document( i ) ) )
151                     {
152                         n++;
153                     }
154                 }
155             }
156         }
157         else
158         {
159             for ( int i : docIndexes )
160             {
161                 if ( !r.isDeleted( i ) )
162                 {
163                     if ( writeDocument( r.document( i ) ) )
164                     {
165                         n++;
166                     }
167                 }
168             }
169         }
170 
171         return n;
172     }
173 
174     public boolean writeDocument( final Document document )
175         throws IOException
176     {
177         List<Fieldable> fields = document.getFields();
178 
179         List<Fieldable> storedFields = new ArrayList<Fieldable>( fields.size() );
180 
181         for ( Fieldable field : fields )
182         {
183             if ( DefaultIndexingContext.FLD_DESCRIPTOR.equals( field.name() ) )
184             {
185                 if ( descriptorWritten )
186                 {
187                     return false;
188                 }
189                 else
190                 {
191                     descriptorWritten = true;
192                 }
193             }
194 
195             if ( ArtifactInfo.ALL_GROUPS.equals( field.name() ) )
196             {
197                 final String groupList = document.get( ArtifactInfo.ALL_GROUPS_LIST );
198 
199                 if ( groupList != null && groupList.trim().length() > 0 )
200                 {
201                     allGroups.addAll( ArtifactInfo.str2lst( groupList ) );
202                 }
203 
204                 return false;
205             }
206 
207             if ( ArtifactInfo.ROOT_GROUPS.equals( field.name() ) )
208             {
209                 final String groupList = document.get( ArtifactInfo.ROOT_GROUPS_LIST );
210 
211                 if ( groupList != null && groupList.trim().length() > 0 )
212                 {
213                     rootGroups.addAll( ArtifactInfo.str2lst( groupList ) );
214                 }
215 
216                 return false;
217             }
218 
219             if ( field.isStored() )
220             {
221                 storedFields.add( field );
222             }
223         }
224 
225         writeDocumentFields( storedFields );
226 
227         return true;
228     }
229 
230     public void writeDocumentFields( List<Fieldable> fields )
231         throws IOException
232     {
233         dos.writeInt( fields.size() );
234 
235         for ( Fieldable field : fields )
236         {
237             writeField( field );
238         }
239     }
240 
241     public void writeField( Fieldable field )
242         throws IOException
243     {
244         int flags = ( field.isIndexed() ? F_INDEXED : 0 ) //
245             + ( field.isTokenized() ? F_TOKENIZED : 0 ) //
246             + ( field.isStored() ? F_STORED : 0 ); //
247         // + ( false ? F_COMPRESSED : 0 ); // Compressed not supported anymore
248 
249         String name = field.name();
250         String value = field.stringValue();
251 
252         dos.write( flags );
253         dos.writeUTF( name );
254         writeUTF( value, dos );
255     }
256 
257     private static void writeUTF( String str, DataOutput out )
258         throws IOException
259     {
260         int strlen = str.length();
261         int utflen = 0;
262         int c;
263 
264         // use charAt instead of copying String to char array
265         for ( int i = 0; i < strlen; i++ )
266         {
267             c = str.charAt( i );
268             if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
269             {
270                 utflen++;
271             }
272             else if ( c > 0x07FF )
273             {
274                 utflen += 3;
275             }
276             else
277             {
278                 utflen += 2;
279             }
280         }
281 
282         // TODO optimize storing int value
283         out.writeInt( utflen );
284 
285         byte[] bytearr = new byte[utflen];
286 
287         int count = 0;
288 
289         int i = 0;
290         for ( ; i < strlen; i++ )
291         {
292             c = str.charAt( i );
293             if ( !( ( c >= 0x0001 ) && ( c <= 0x007F ) ) )
294             {
295                 break;
296             }
297             bytearr[count++] = (byte) c;
298         }
299 
300         for ( ; i < strlen; i++ )
301         {
302             c = str.charAt( i );
303             if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
304             {
305                 bytearr[count++] = (byte) c;
306 
307             }
308             else if ( c > 0x07FF )
309             {
310                 bytearr[count++] = (byte) ( 0xE0 | ( ( c >> 12 ) & 0x0F ) );
311                 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 6 ) & 0x3F ) );
312                 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 0 ) & 0x3F ) );
313             }
314             else
315             {
316                 bytearr[count++] = (byte) ( 0xC0 | ( ( c >> 6 ) & 0x1F ) );
317                 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 0 ) & 0x3F ) );
318             }
319         }
320 
321         out.write( bytearr, 0, utflen );
322     }
323 
324 }