View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    * 
9    * http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations under
15   * the License.
16   */
17  
18  package org.apache.accumulo.core.file.rfile.bcfile;
19  
20  import java.io.DataInput;
21  import java.io.DataOutput;
22  import java.io.IOException;
23  import java.util.Comparator;
24  import java.util.List;
25  
26  import org.apache.hadoop.io.Text;
27  
28  /**
29   * Supporting Utility classes used by TFile, and shared by users of TFile.
30   */
31  public final class Utils {
32    
33    /**
34     * Prevent the instantiation of Utils.
35     */
36    private Utils() {
37      // nothing
38    }
39    
40    /**
41     * Encoding an integer into a variable-length encoding format. Synonymous to <code>Utils#writeVLong(out, n)</code>.
42     * 
43     * @param out
44     *          output stream
45     * @param n
46     *          The integer to be encoded
47     * @throws IOException
48     * @see Utils#writeVLong(DataOutput, long)
49     */
50    public static void writeVInt(DataOutput out, int n) throws IOException {
51      writeVLong(out, n);
52    }
53    
54    /**
55     * Encoding a Long integer into a variable-length encoding format.
56     * <ul>
57     * <li>if n in [-32, 127): encode in one byte with the actual value. Otherwise,
58     * <li>if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52; byte[1]=n&0xff. Otherwise,
59     * <li>if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 - 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise,
60     * <li>if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112; byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; byte[3]=n&0xff. Otherwise:
61     * <li>if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] = (n>>24)&0xff; byte[2]=(n>>16)&0xff; byte[3]=(n>>8)&0xff; byte[4]=n&0xff;
62     * <li>if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] = (n>>32)&0xff; byte[2]=(n>>24)&0xff; byte[3]=(n>>16)&0xff; byte[4]=(n>>8)&0xff;
63     * byte[5]=n&0xff
64     * <li>if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] = (n>>40)&0xff; byte[2]=(n>>32)&0xff; byte[3]=(n>>24)&0xff; byte[4]=(n>>16)&0xff;
65     * byte[5]=(n>>8)&0xff; byte[6]=n&0xff;
66     * <li>if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] = (n>>48)&0xff; byte[2] = (n>>40)&0xff; byte[3]=(n>>32)&0xff; byte[4]=(n>>24)&0xff;
67     * byte[5]=(n>>16)&0xff; byte[6]=(n>>8)&0xff; byte[7]=n&0xff;
68     * <li>if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] = (n>>54)&0xff; byte[2] = (n>>48)&0xff; byte[3] = (n>>40)&0xff;
69     * byte[4]=(n>>32)&0xff; byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff; byte[7]=(n>>8)&0xff; byte[8]=n&0xff;
70     * </ul>
71     * 
72     * @param out
73     *          output stream
74     * @param n
75     *          the integer number
76     * @throws IOException
77     */
78    @SuppressWarnings("fallthrough")
79    public static void writeVLong(DataOutput out, long n) throws IOException {
80      if ((n < 128) && (n >= -32)) {
81        out.writeByte((int) n);
82        return;
83      }
84      
85      long un = (n < 0) ? ~n : n;
86      // how many bytes do we need to represent the number with sign bit?
87      int len = (Long.SIZE - Long.numberOfLeadingZeros(un)) / 8 + 1;
88      int firstByte = (int) (n >> ((len - 1) * 8));
89      switch (len) {
90        case 1:
91          // fall it through to firstByte==-1, len=2.
92          firstByte >>= 8;
93        case 2:
94          if ((firstByte < 20) && (firstByte >= -20)) {
95            out.writeByte(firstByte - 52);
96            out.writeByte((int) n);
97            return;
98          }
99          // fall it through to firstByte==0/-1, len=3.
100         firstByte >>= 8;
101       case 3:
102         if ((firstByte < 16) && (firstByte >= -16)) {
103           out.writeByte(firstByte - 88);
104           out.writeShort((int) n);
105           return;
106         }
107         // fall it through to firstByte==0/-1, len=4.
108         firstByte >>= 8;
109       case 4:
110         if ((firstByte < 8) && (firstByte >= -8)) {
111           out.writeByte(firstByte - 112);
112           out.writeShort(((int) n) >>> 8);
113           out.writeByte((int) n);
114           return;
115         }
116         out.writeByte(len - 129);
117         out.writeInt((int) n);
118         return;
119       case 5:
120         out.writeByte(len - 129);
121         out.writeInt((int) (n >>> 8));
122         out.writeByte((int) n);
123         return;
124       case 6:
125         out.writeByte(len - 129);
126         out.writeInt((int) (n >>> 16));
127         out.writeShort((int) n);
128         return;
129       case 7:
130         out.writeByte(len - 129);
131         out.writeInt((int) (n >>> 24));
132         out.writeShort((int) (n >>> 8));
133         out.writeByte((int) n);
134         return;
135       case 8:
136         out.writeByte(len - 129);
137         out.writeLong(n);
138         return;
139       default:
140         throw new RuntimeException("Internel error");
141     }
142   }
143   
144   /**
145    * Decoding the variable-length integer. Synonymous to <code>(int)Utils#readVLong(in)</code>.
146    * 
147    * @param in
148    *          input stream
149    * @return the decoded integer
150    * @throws IOException
151    * 
152    * @see Utils#readVLong(DataInput)
153    */
154   public static int readVInt(DataInput in) throws IOException {
155     long ret = readVLong(in);
156     if ((ret > Integer.MAX_VALUE) || (ret < Integer.MIN_VALUE)) {
157       throw new RuntimeException("Number too large to be represented as Integer");
158     }
159     return (int) ret;
160   }
161   
162   /**
163    * Decoding the variable-length integer. Suppose the value of the first byte is FB, and the following bytes are NB[*].
164    * <ul>
165    * <li>if (FB >= -32), return (long)FB;
166    * <li>if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff;
167    * <li>if (FB in [-104, -73]), return (FB+88)<<16 + (NB[0]&0xff)<<8 + NB[1]&0xff;
168    * <li>if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)<<16 + (NB[1]&0xff)<<8 + NB[2]&0xff;
169    * <li>if (FB in [-128, -121]), return interpret NB[FB+129] as a signed big-endian integer.
170    * 
171    * @param in
172    *          input stream
173    * @return the decoded long integer.
174    * @throws IOException
175    */
176   
177   public static long readVLong(DataInput in) throws IOException {
178     int firstByte = in.readByte();
179     if (firstByte >= -32) {
180       return firstByte;
181     }
182     
183     switch ((firstByte + 128) / 8) {
184       case 11:
185       case 10:
186       case 9:
187       case 8:
188       case 7:
189         return ((firstByte + 52) << 8) | in.readUnsignedByte();
190       case 6:
191       case 5:
192       case 4:
193       case 3:
194         return ((firstByte + 88) << 16) | in.readUnsignedShort();
195       case 2:
196       case 1:
197         return ((firstByte + 112) << 24) | (in.readUnsignedShort() << 8) | in.readUnsignedByte();
198       case 0:
199         int len = firstByte + 129;
200         switch (len) {
201           case 4:
202             return in.readInt();
203           case 5:
204             return ((long) in.readInt()) << 8 | in.readUnsignedByte();
205           case 6:
206             return ((long) in.readInt()) << 16 | in.readUnsignedShort();
207           case 7:
208             return ((long) in.readInt()) << 24 | (in.readUnsignedShort() << 8) | in.readUnsignedByte();
209           case 8:
210             return in.readLong();
211           default:
212             throw new IOException("Corrupted VLong encoding");
213         }
214       default:
215         throw new RuntimeException("Internal error");
216     }
217   }
218   
219   /**
220    * Write a String as a VInt n, followed by n Bytes as in Text format.
221    * 
222    * @param out
223    * @param s
224    * @throws IOException
225    */
226   public static void writeString(DataOutput out, String s) throws IOException {
227     if (s != null) {
228       Text text = new Text(s);
229       byte[] buffer = text.getBytes();
230       int len = text.getLength();
231       writeVInt(out, len);
232       out.write(buffer, 0, len);
233     } else {
234       writeVInt(out, -1);
235     }
236   }
237   
238   /**
239    * Read a String as a VInt n, followed by n Bytes in Text format.
240    * 
241    * @param in
242    *          The input stream.
243    * @return The string
244    * @throws IOException
245    */
246   public static String readString(DataInput in) throws IOException {
247     int length = readVInt(in);
248     if (length == -1)
249       return null;
250     byte[] buffer = new byte[length];
251     in.readFully(buffer);
252     return Text.decode(buffer);
253   }
254   
255   /**
256    * A generic Version class. We suggest applications built on top of TFile use this class to maintain version information in their meta blocks.
257    * 
258    * A version number consists of a major version and a minor version. The suggested usage of major and minor version number is to increment major version
259    * number when the new storage format is not backward compatible, and increment the minor version otherwise.
260    */
261   public static final class Version implements Comparable<Version> {
262     private final short major;
263     private final short minor;
264     
265     /**
266      * Construct the Version object by reading from the input stream.
267      * 
268      * @param in
269      *          input stream
270      * @throws IOException
271      */
272     public Version(DataInput in) throws IOException {
273       major = in.readShort();
274       minor = in.readShort();
275     }
276     
277     /**
278      * Constructor.
279      * 
280      * @param major
281      *          major version.
282      * @param minor
283      *          minor version.
284      */
285     public Version(short major, short minor) {
286       this.major = major;
287       this.minor = minor;
288     }
289     
290     /**
291      * Write the object to a DataOutput. The serialized format of the Version is major version followed by minor version, both as big-endian short integers.
292      * 
293      * @param out
294      *          The DataOutput object.
295      * @throws IOException
296      */
297     public void write(DataOutput out) throws IOException {
298       out.writeShort(major);
299       out.writeShort(minor);
300     }
301     
302     /**
303      * Get the major version.
304      * 
305      * @return Major version.
306      */
307     public int getMajor() {
308       return major;
309     }
310     
311     /**
312      * Get the minor version.
313      * 
314      * @return The minor version.
315      */
316     public int getMinor() {
317       return minor;
318     }
319     
320     /**
321      * Get the size of the serialized Version object.
322      * 
323      * @return serialized size of the version object.
324      */
325     public static int size() {
326       return (Short.SIZE + Short.SIZE) / Byte.SIZE;
327     }
328     
329     /**
330      * Return a string representation of the version.
331      */
332     @Override
333     public String toString() {
334       return new StringBuilder("v").append(major).append(".").append(minor).toString();
335     }
336     
337     /**
338      * Test compatibility.
339      * 
340      * @param other
341      *          The Version object to test compatibility with.
342      * @return true if both versions have the same major version number; false otherwise.
343      */
344     public boolean compatibleWith(Version other) {
345       return major == other.major;
346     }
347     
348     /**
349      * Compare this version with another version.
350      */
351     @Override
352     public int compareTo(Version that) {
353       if (major != that.major) {
354         return major - that.major;
355       }
356       return minor - that.minor;
357     }
358     
359     @Override
360     public boolean equals(Object other) {
361       if (this == other)
362         return true;
363       if (!(other instanceof Version))
364         return false;
365       return compareTo((Version) other) == 0;
366     }
367     
368     @Override
369     public int hashCode() {
370       return (major << 16 + minor);
371     }
372   }
373   
374   /**
375    * Lower bound binary search. Find the index to the first element in the list that compares greater than or equal to key.
376    * 
377    * @param <T>
378    *          Type of the input key.
379    * @param list
380    *          The list
381    * @param key
382    *          The input key.
383    * @param cmp
384    *          Comparator for the key.
385    * @return The index to the desired element if it exists; or list.size() otherwise.
386    */
387   public static <T> int lowerBound(List<? extends T> list, T key, Comparator<? super T> cmp) {
388     int low = 0;
389     int high = list.size();
390     
391     while (low < high) {
392       int mid = (low + high) >>> 1;
393       T midVal = list.get(mid);
394       int ret = cmp.compare(midVal, key);
395       if (ret < 0)
396         low = mid + 1;
397       else
398         high = mid;
399     }
400     return low;
401   }
402   
403   /**
404    * Upper bound binary search. Find the index to the first element in the list that compares greater than the input key.
405    * 
406    * @param <T>
407    *          Type of the input key.
408    * @param list
409    *          The list
410    * @param key
411    *          The input key.
412    * @param cmp
413    *          Comparator for the key.
414    * @return The index to the desired element if it exists; or list.size() otherwise.
415    */
416   public static <T> int upperBound(List<? extends T> list, T key, Comparator<? super T> cmp) {
417     int low = 0;
418     int high = list.size();
419     
420     while (low < high) {
421       int mid = (low + high) >>> 1;
422       T midVal = list.get(mid);
423       int ret = cmp.compare(midVal, key);
424       if (ret <= 0)
425         low = mid + 1;
426       else
427         high = mid;
428     }
429     return low;
430   }
431   
432   /**
433    * Lower bound binary search. Find the index to the first element in the list that compares greater than or equal to key.
434    * 
435    * @param <T>
436    *          Type of the input key.
437    * @param list
438    *          The list
439    * @param key
440    *          The input key.
441    * @return The index to the desired element if it exists; or list.size() otherwise.
442    */
443   public static <T> int lowerBound(List<? extends Comparable<? super T>> list, T key) {
444     int low = 0;
445     int high = list.size();
446     
447     while (low < high) {
448       int mid = (low + high) >>> 1;
449       Comparable<? super T> midVal = list.get(mid);
450       int ret = midVal.compareTo(key);
451       if (ret < 0)
452         low = mid + 1;
453       else
454         high = mid;
455     }
456     return low;
457   }
458   
459   /**
460    * Upper bound binary search. Find the index to the first element in the list that compares greater than the input key.
461    * 
462    * @param <T>
463    *          Type of the input key.
464    * @param list
465    *          The list
466    * @param key
467    *          The input key.
468    * @return The index to the desired element if it exists; or list.size() otherwise.
469    */
470   public static <T> int upperBound(List<? extends Comparable<? super T>> list, T key) {
471     int low = 0;
472     int high = list.size();
473     
474     while (low < high) {
475       int mid = (low + high) >>> 1;
476       Comparable<? super T> midVal = list.get(mid);
477       int ret = midVal.compareTo(key);
478       if (ret <= 0)
479         low = mid + 1;
480       else
481         high = mid;
482     }
483     return low;
484   }
485 }