View Javadoc
1   package org.apache.maven.index.reader;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.Closeable;
23  import java.io.DataOutput;
24  import java.io.DataOutputStream;
25  import java.io.IOException;
26  import java.io.OutputStream;
27  import java.util.Date;
28  import java.util.Iterator;
29  import java.util.Map;
30  import java.util.zip.GZIPOutputStream;
31  
32  /**
33   * Maven 2 Index published binary chunk writer, it writes raw Maven Indexer records to the transport binary format.
34   *
35   * @since 5.1.2
36   */
37  public class ChunkWriter
38      implements Closeable
39  {
40      private static final int F_INDEXED = 1;
41  
42      private static final int F_TOKENIZED = 2;
43  
44      private static final int F_STORED = 4;
45  
46      private final String chunkName;
47  
48      private final DataOutputStream dataOutputStream;
49  
50      private final int version;
51  
52      private final Date timestamp;
53  
54      public ChunkWriter( final String chunkName, final OutputStream outputStream, final int version,
55                          final Date timestamp )
56          throws IOException
57      {
58          this.chunkName = chunkName.trim();
59          this.dataOutputStream = new DataOutputStream( new GZIPOutputStream( outputStream, 2 * 1024 ) );
60          this.version = version;
61          this.timestamp = timestamp;
62  
63          dataOutputStream.writeByte( version );
64          dataOutputStream.writeLong( timestamp == null ? -1 : timestamp.getTime() );
65      }
66  
67      /**
68       * Returns the chunk name.
69       */
70      public String getName()
71      {
72          return chunkName;
73      }
74  
75      /**
76       * Returns index version. All releases so far always returned {@code 1}.
77       */
78      public int getVersion()
79      {
80          return version;
81      }
82  
83      /**
84       * Returns the index timestamp of last update of the index.
85       */
86      public Date getTimestamp()
87      {
88          return timestamp;
89      }
90  
91      /**
92       * Writes out the record iterator and returns the written record count.
93       */
94      public int writeChunk( final Iterator<Map<String, String>> iterator )
95          throws IOException
96      {
97          int written = 0;
98          while ( iterator.hasNext() )
99          {
100             writeRecord( iterator.next(), dataOutputStream );
101             written++;
102         }
103         return written;
104     }
105 
106     /**
107      * Closes this reader and it's underlying input.
108      */
109     public void close()
110         throws IOException
111     {
112         dataOutputStream.close();
113     }
114 
115     private static void writeRecord( final Map<String, String> record, final DataOutput dataOutput )
116         throws IOException
117     {
118         dataOutput.writeInt( record.size() );
119         for ( Map.Entry<String, String> entry : record.entrySet() )
120         {
121             writeField( entry.getKey(), entry.getValue(), dataOutput );
122         }
123     }
124 
125     private static void writeField( final String fieldName, final String fieldValue, final DataOutput dataOutput )
126         throws IOException
127     {
128         boolean isIndexed = !( fieldName.equals( "i" ) || fieldName.equals( "m" ) );
129         boolean isTokenized =
130             !( fieldName.equals( "i" ) || fieldName.equals( "m" ) || fieldName.equals( "1" ) || fieldName.equals(
131                 "px" ) );
132         int flags = ( isIndexed ? F_INDEXED : 0 ) + ( isTokenized ? F_TOKENIZED : 0 ) + F_STORED;
133         dataOutput.writeByte( flags );
134         dataOutput.writeUTF( fieldName );
135         writeUTF( fieldValue, dataOutput );
136     }
137 
138     private static void writeUTF( final String str, final DataOutput dataOutput )
139         throws IOException
140     {
141         int strlen = str.length();
142         int utflen = 0;
143         int c;
144         // use charAt instead of copying String to char array
145         for ( int i = 0; i < strlen; i++ )
146         {
147             c = str.charAt( i );
148             if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
149             {
150                 utflen++;
151             }
152             else if ( c > 0x07FF )
153             {
154                 utflen += 3;
155             }
156             else
157             {
158                 utflen += 2;
159             }
160         }
161         dataOutput.writeInt( utflen );
162         byte[] bytearr = new byte[utflen];
163         int count = 0;
164         int i = 0;
165         for ( ; i < strlen; i++ )
166         {
167             c = str.charAt( i );
168             if ( !( ( c >= 0x0001 ) && ( c <= 0x007F ) ) )
169             {
170                 break;
171             }
172             bytearr[count++] = (byte) c;
173         }
174         for ( ; i < strlen; i++ )
175         {
176             c = str.charAt( i );
177             if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
178             {
179                 bytearr[count++] = (byte) c;
180 
181             }
182             else if ( c > 0x07FF )
183             {
184                 bytearr[count++] = (byte) ( 0xE0 | ( ( c >> 12 ) & 0x0F ) );
185                 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 6 ) & 0x3F ) );
186                 bytearr[count++] = (byte) ( 0x80 | ( ( c ) & 0x3F ) );
187             }
188             else
189             {
190                 bytearr[count++] = (byte) ( 0xC0 | ( ( c >> 6 ) & 0x1F ) );
191                 bytearr[count++] = (byte) ( 0x80 | ( ( c ) & 0x3F ) );
192             }
193         }
194         dataOutput.write( bytearr, 0, utflen );
195     }
196 }