1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.accumulo.core.file.rfile.bcfile;
19
20 import java.io.DataInput;
21 import java.io.DataOutput;
22 import java.io.IOException;
23 import java.util.Comparator;
24 import java.util.List;
25
26 import org.apache.hadoop.io.Text;
27
28 /**
29 * Supporting Utility classes used by TFile, and shared by users of TFile.
30 */
31 public final class Utils {
32
33 /**
34 * Prevent the instantiation of Utils.
35 */
36 private Utils() {
37
38 }
39
40 /**
41 * Encoding an integer into a variable-length encoding format. Synonymous to <code>Utils#writeVLong(out, n)</code>.
42 *
43 * @param out
44 * output stream
45 * @param n
46 * The integer to be encoded
47 * @throws IOException
48 * @see Utils#writeVLong(DataOutput, long)
49 */
50 public static void writeVInt(DataOutput out, int n) throws IOException {
51 writeVLong(out, n);
52 }
53
54 /**
55 * Encoding a Long integer into a variable-length encoding format.
56 * <ul>
57 * <li>if n in [-32, 127): encode in one byte with the actual value. Otherwise,
58 * <li>if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52; byte[1]=n&0xff. Otherwise,
59 * <li>if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 - 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise,
60 * <li>if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112; byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; byte[3]=n&0xff. Otherwise:
61 * <li>if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] = (n>>24)&0xff; byte[2]=(n>>16)&0xff; byte[3]=(n>>8)&0xff; byte[4]=n&0xff;
62 * <li>if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] = (n>>32)&0xff; byte[2]=(n>>24)&0xff; byte[3]=(n>>16)&0xff; byte[4]=(n>>8)&0xff;
63 * byte[5]=n&0xff
64 * <li>if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] = (n>>40)&0xff; byte[2]=(n>>32)&0xff; byte[3]=(n>>24)&0xff; byte[4]=(n>>16)&0xff;
65 * byte[5]=(n>>8)&0xff; byte[6]=n&0xff;
66 * <li>if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] = (n>>48)&0xff; byte[2] = (n>>40)&0xff; byte[3]=(n>>32)&0xff; byte[4]=(n>>24)&0xff;
67 * byte[5]=(n>>16)&0xff; byte[6]=(n>>8)&0xff; byte[7]=n&0xff;
68 * <li>if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] = (n>>54)&0xff; byte[2] = (n>>48)&0xff; byte[3] = (n>>40)&0xff;
69 * byte[4]=(n>>32)&0xff; byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff; byte[7]=(n>>8)&0xff; byte[8]=n&0xff;
70 * </ul>
71 *
72 * @param out
73 * output stream
74 * @param n
75 * the integer number
76 * @throws IOException
77 */
78 @SuppressWarnings("fallthrough")
79 public static void writeVLong(DataOutput out, long n) throws IOException {
80 if ((n < 128) && (n >= -32)) {
81 out.writeByte((int) n);
82 return;
83 }
84
85 long un = (n < 0) ? ~n : n;
86
87 int len = (Long.SIZE - Long.numberOfLeadingZeros(un)) / 8 + 1;
88 int firstByte = (int) (n >> ((len - 1) * 8));
89 switch (len) {
90 case 1:
91
92 firstByte >>= 8;
93 case 2:
94 if ((firstByte < 20) && (firstByte >= -20)) {
95 out.writeByte(firstByte - 52);
96 out.writeByte((int) n);
97 return;
98 }
99
100 firstByte >>= 8;
101 case 3:
102 if ((firstByte < 16) && (firstByte >= -16)) {
103 out.writeByte(firstByte - 88);
104 out.writeShort((int) n);
105 return;
106 }
107
108 firstByte >>= 8;
109 case 4:
110 if ((firstByte < 8) && (firstByte >= -8)) {
111 out.writeByte(firstByte - 112);
112 out.writeShort(((int) n) >>> 8);
113 out.writeByte((int) n);
114 return;
115 }
116 out.writeByte(len - 129);
117 out.writeInt((int) n);
118 return;
119 case 5:
120 out.writeByte(len - 129);
121 out.writeInt((int) (n >>> 8));
122 out.writeByte((int) n);
123 return;
124 case 6:
125 out.writeByte(len - 129);
126 out.writeInt((int) (n >>> 16));
127 out.writeShort((int) n);
128 return;
129 case 7:
130 out.writeByte(len - 129);
131 out.writeInt((int) (n >>> 24));
132 out.writeShort((int) (n >>> 8));
133 out.writeByte((int) n);
134 return;
135 case 8:
136 out.writeByte(len - 129);
137 out.writeLong(n);
138 return;
139 default:
140 throw new RuntimeException("Internel error");
141 }
142 }
143
144 /**
145 * Decoding the variable-length integer. Synonymous to <code>(int)Utils#readVLong(in)</code>.
146 *
147 * @param in
148 * input stream
149 * @return the decoded integer
150 * @throws IOException
151 *
152 * @see Utils#readVLong(DataInput)
153 */
154 public static int readVInt(DataInput in) throws IOException {
155 long ret = readVLong(in);
156 if ((ret > Integer.MAX_VALUE) || (ret < Integer.MIN_VALUE)) {
157 throw new RuntimeException("Number too large to be represented as Integer");
158 }
159 return (int) ret;
160 }
161
162 /**
163 * Decoding the variable-length integer. Suppose the value of the first byte is FB, and the following bytes are NB[*].
164 * <ul>
165 * <li>if (FB >= -32), return (long)FB;
166 * <li>if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff;
167 * <li>if (FB in [-104, -73]), return (FB+88)<<16 + (NB[0]&0xff)<<8 + NB[1]&0xff;
168 * <li>if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)<<16 + (NB[1]&0xff)<<8 + NB[2]&0xff;
169 * <li>if (FB in [-128, -121]), return interpret NB[FB+129] as a signed big-endian integer.
170 *
171 * @param in
172 * input stream
173 * @return the decoded long integer.
174 * @throws IOException
175 */
176
177 public static long readVLong(DataInput in) throws IOException {
178 int firstByte = in.readByte();
179 if (firstByte >= -32) {
180 return firstByte;
181 }
182
183 switch ((firstByte + 128) / 8) {
184 case 11:
185 case 10:
186 case 9:
187 case 8:
188 case 7:
189 return ((firstByte + 52) << 8) | in.readUnsignedByte();
190 case 6:
191 case 5:
192 case 4:
193 case 3:
194 return ((firstByte + 88) << 16) | in.readUnsignedShort();
195 case 2:
196 case 1:
197 return ((firstByte + 112) << 24) | (in.readUnsignedShort() << 8) | in.readUnsignedByte();
198 case 0:
199 int len = firstByte + 129;
200 switch (len) {
201 case 4:
202 return in.readInt();
203 case 5:
204 return ((long) in.readInt()) << 8 | in.readUnsignedByte();
205 case 6:
206 return ((long) in.readInt()) << 16 | in.readUnsignedShort();
207 case 7:
208 return ((long) in.readInt()) << 24 | (in.readUnsignedShort() << 8) | in.readUnsignedByte();
209 case 8:
210 return in.readLong();
211 default:
212 throw new IOException("Corrupted VLong encoding");
213 }
214 default:
215 throw new RuntimeException("Internal error");
216 }
217 }
218
219 /**
220 * Write a String as a VInt n, followed by n Bytes as in Text format.
221 *
222 * @param out
223 * @param s
224 * @throws IOException
225 */
226 public static void writeString(DataOutput out, String s) throws IOException {
227 if (s != null) {
228 Text text = new Text(s);
229 byte[] buffer = text.getBytes();
230 int len = text.getLength();
231 writeVInt(out, len);
232 out.write(buffer, 0, len);
233 } else {
234 writeVInt(out, -1);
235 }
236 }
237
238 /**
239 * Read a String as a VInt n, followed by n Bytes in Text format.
240 *
241 * @param in
242 * The input stream.
243 * @return The string
244 * @throws IOException
245 */
246 public static String readString(DataInput in) throws IOException {
247 int length = readVInt(in);
248 if (length == -1)
249 return null;
250 byte[] buffer = new byte[length];
251 in.readFully(buffer);
252 return Text.decode(buffer);
253 }
254
255 /**
256 * A generic Version class. We suggest applications built on top of TFile use this class to maintain version information in their meta blocks.
257 *
258 * A version number consists of a major version and a minor version. The suggested usage of major and minor version number is to increment major version
259 * number when the new storage format is not backward compatible, and increment the minor version otherwise.
260 */
261 public static final class Version implements Comparable<Version> {
262 private final short major;
263 private final short minor;
264
265 /**
266 * Construct the Version object by reading from the input stream.
267 *
268 * @param in
269 * input stream
270 * @throws IOException
271 */
272 public Version(DataInput in) throws IOException {
273 major = in.readShort();
274 minor = in.readShort();
275 }
276
277 /**
278 * Constructor.
279 *
280 * @param major
281 * major version.
282 * @param minor
283 * minor version.
284 */
285 public Version(short major, short minor) {
286 this.major = major;
287 this.minor = minor;
288 }
289
290 /**
291 * Write the object to a DataOutput. The serialized format of the Version is major version followed by minor version, both as big-endian short integers.
292 *
293 * @param out
294 * The DataOutput object.
295 * @throws IOException
296 */
297 public void write(DataOutput out) throws IOException {
298 out.writeShort(major);
299 out.writeShort(minor);
300 }
301
302 /**
303 * Get the major version.
304 *
305 * @return Major version.
306 */
307 public int getMajor() {
308 return major;
309 }
310
311 /**
312 * Get the minor version.
313 *
314 * @return The minor version.
315 */
316 public int getMinor() {
317 return minor;
318 }
319
320 /**
321 * Get the size of the serialized Version object.
322 *
323 * @return serialized size of the version object.
324 */
325 public static int size() {
326 return (Short.SIZE + Short.SIZE) / Byte.SIZE;
327 }
328
329 /**
330 * Return a string representation of the version.
331 */
332 @Override
333 public String toString() {
334 return new StringBuilder("v").append(major).append(".").append(minor).toString();
335 }
336
337 /**
338 * Test compatibility.
339 *
340 * @param other
341 * The Version object to test compatibility with.
342 * @return true if both versions have the same major version number; false otherwise.
343 */
344 public boolean compatibleWith(Version other) {
345 return major == other.major;
346 }
347
348 /**
349 * Compare this version with another version.
350 */
351 @Override
352 public int compareTo(Version that) {
353 if (major != that.major) {
354 return major - that.major;
355 }
356 return minor - that.minor;
357 }
358
359 @Override
360 public boolean equals(Object other) {
361 if (this == other)
362 return true;
363 if (!(other instanceof Version))
364 return false;
365 return compareTo((Version) other) == 0;
366 }
367
368 @Override
369 public int hashCode() {
370 return (major << 16 + minor);
371 }
372 }
373
374 /**
375 * Lower bound binary search. Find the index to the first element in the list that compares greater than or equal to key.
376 *
377 * @param <T>
378 * Type of the input key.
379 * @param list
380 * The list
381 * @param key
382 * The input key.
383 * @param cmp
384 * Comparator for the key.
385 * @return The index to the desired element if it exists; or list.size() otherwise.
386 */
387 public static <T> int lowerBound(List<? extends T> list, T key, Comparator<? super T> cmp) {
388 int low = 0;
389 int high = list.size();
390
391 while (low < high) {
392 int mid = (low + high) >>> 1;
393 T midVal = list.get(mid);
394 int ret = cmp.compare(midVal, key);
395 if (ret < 0)
396 low = mid + 1;
397 else
398 high = mid;
399 }
400 return low;
401 }
402
403 /**
404 * Upper bound binary search. Find the index to the first element in the list that compares greater than the input key.
405 *
406 * @param <T>
407 * Type of the input key.
408 * @param list
409 * The list
410 * @param key
411 * The input key.
412 * @param cmp
413 * Comparator for the key.
414 * @return The index to the desired element if it exists; or list.size() otherwise.
415 */
416 public static <T> int upperBound(List<? extends T> list, T key, Comparator<? super T> cmp) {
417 int low = 0;
418 int high = list.size();
419
420 while (low < high) {
421 int mid = (low + high) >>> 1;
422 T midVal = list.get(mid);
423 int ret = cmp.compare(midVal, key);
424 if (ret <= 0)
425 low = mid + 1;
426 else
427 high = mid;
428 }
429 return low;
430 }
431
432 /**
433 * Lower bound binary search. Find the index to the first element in the list that compares greater than or equal to key.
434 *
435 * @param <T>
436 * Type of the input key.
437 * @param list
438 * The list
439 * @param key
440 * The input key.
441 * @return The index to the desired element if it exists; or list.size() otherwise.
442 */
443 public static <T> int lowerBound(List<? extends Comparable<? super T>> list, T key) {
444 int low = 0;
445 int high = list.size();
446
447 while (low < high) {
448 int mid = (low + high) >>> 1;
449 Comparable<? super T> midVal = list.get(mid);
450 int ret = midVal.compareTo(key);
451 if (ret < 0)
452 low = mid + 1;
453 else
454 high = mid;
455 }
456 return low;
457 }
458
459 /**
460 * Upper bound binary search. Find the index to the first element in the list that compares greater than the input key.
461 *
462 * @param <T>
463 * Type of the input key.
464 * @param list
465 * The list
466 * @param key
467 * The input key.
468 * @return The index to the desired element if it exists; or list.size() otherwise.
469 */
470 public static <T> int upperBound(List<? extends Comparable<? super T>> list, T key) {
471 int low = 0;
472 int high = list.size();
473
474 while (low < high) {
475 int mid = (low + high) >>> 1;
476 Comparable<? super T> midVal = list.get(mid);
477 int ret = midVal.compareTo(key);
478 if (ret <= 0)
479 low = mid + 1;
480 else
481 high = mid;
482 }
483 return low;
484 }
485 }