1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.io.hfile;
21
22 import java.io.ByteArrayOutputStream;
23 import java.io.DataInput;
24 import java.io.IOException;
25 import java.io.PrintStream;
26 import java.util.ArrayList;
27 import java.util.HashMap;
28 import java.util.Iterator;
29 import java.util.LinkedHashSet;
30 import java.util.List;
31 import java.util.Locale;
32 import java.util.Map;
33 import java.util.Set;
34 import java.util.SortedMap;
35
36 import org.apache.commons.cli.CommandLine;
37 import org.apache.commons.cli.CommandLineParser;
38 import org.apache.commons.cli.HelpFormatter;
39 import org.apache.commons.cli.Option;
40 import org.apache.commons.cli.OptionGroup;
41 import org.apache.commons.cli.Options;
42 import org.apache.commons.cli.ParseException;
43 import org.apache.commons.cli.PosixParser;
44 import org.apache.commons.logging.Log;
45 import org.apache.commons.logging.LogFactory;
46 import org.apache.hadoop.hbase.classification.InterfaceAudience;
47 import org.apache.hadoop.hbase.classification.InterfaceStability;
48 import org.apache.hadoop.conf.Configuration;
49 import org.apache.hadoop.conf.Configured;
50 import org.apache.hadoop.fs.FileSystem;
51 import org.apache.hadoop.fs.Path;
52 import org.apache.hadoop.hbase.Cell;
53 import org.apache.hadoop.hbase.CellComparator;
54 import org.apache.hadoop.hbase.CellUtil;
55 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
56 import org.apache.hadoop.hbase.HConstants;
57 import org.apache.hadoop.hbase.TableName;
58 import org.apache.hadoop.hbase.HBaseConfiguration;
59 import org.apache.hadoop.hbase.HRegionInfo;
60 import org.apache.hadoop.hbase.KeyValueUtil;
61 import org.apache.hadoop.hbase.Tag;
62 import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
63 import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
64 import org.apache.hadoop.hbase.mob.MobUtils;
65 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
66 import org.apache.hadoop.hbase.util.BloomFilter;
67 import org.apache.hadoop.hbase.util.BloomFilterUtil;
68 import org.apache.hadoop.hbase.util.BloomFilterFactory;
69 import org.apache.hadoop.hbase.util.Bytes;
70 import org.apache.hadoop.hbase.util.FSUtils;
71 import org.apache.hadoop.hbase.util.HFileArchiveUtil;
72 import org.apache.hadoop.hbase.util.Writables;
73 import org.apache.hadoop.util.Tool;
74 import org.apache.hadoop.util.ToolRunner;
75
76 import com.yammer.metrics.core.Histogram;
77 import com.yammer.metrics.core.Metric;
78 import com.yammer.metrics.core.MetricName;
79 import com.yammer.metrics.core.MetricPredicate;
80 import com.yammer.metrics.core.MetricsRegistry;
81 import com.yammer.metrics.reporting.ConsoleReporter;
82
83
84
85
86 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
87 @InterfaceStability.Evolving
88 public class HFilePrettyPrinter extends Configured implements Tool {
89
90 private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
91
92 private Options options = new Options();
93
94 private boolean verbose;
95 private boolean printValue;
96 private boolean printKey;
97 private boolean shouldPrintMeta;
98 private boolean printBlockIndex;
99 private boolean printBlockHeaders;
100 private boolean printStats;
101 private boolean checkRow;
102 private boolean checkFamily;
103 private boolean isSeekToRow = false;
104 private boolean checkMobIntegrity = false;
105 private Map<String, List<Path>> mobFileLocations;
106 private static final int FOUND_MOB_FILES_CACHE_CAPACITY = 50;
107 private static final int MISSING_MOB_FILES_CACHE_CAPACITY = 20;
108
109
110
111
112 private byte[] row = null;
113
114 private List<Path> files = new ArrayList<Path>();
115 private int count;
116
117 private static final String FOUR_SPACES = " ";
118
119 public HFilePrettyPrinter() {
120 super();
121 init();
122 }
123
124 public HFilePrettyPrinter(Configuration conf) {
125 super(conf);
126 init();
127 }
128
129 private void init() {
130 options.addOption("v", "verbose", false,
131 "Verbose output; emits file and meta data delimiters");
132 options.addOption("p", "printkv", false, "Print key/value pairs");
133 options.addOption("e", "printkey", false, "Print keys");
134 options.addOption("m", "printmeta", false, "Print meta data of file");
135 options.addOption("b", "printblocks", false, "Print block index meta data");
136 options.addOption("h", "printblockheaders", false, "Print block headers for each block.");
137 options.addOption("k", "checkrow", false,
138 "Enable row order check; looks for out-of-order keys");
139 options.addOption("a", "checkfamily", false, "Enable family check");
140 options.addOption("w", "seekToRow", true,
141 "Seek to this row and print all the kvs for this row only");
142 options.addOption("s", "stats", false, "Print statistics");
143 options.addOption("i", "checkMobIntegrity", false,
144 "Print all cells whose mob files are missing");
145
146 OptionGroup files = new OptionGroup();
147 files.addOption(new Option("f", "file", true,
148 "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34"));
149 files.addOption(new Option("r", "region", true,
150 "Region to scan. Pass region name; e.g. 'hbase:meta,,1'"));
151 options.addOptionGroup(files);
152 }
153
154 public boolean parseOptions(String args[]) throws ParseException,
155 IOException {
156 if (args.length == 0) {
157 HelpFormatter formatter = new HelpFormatter();
158 formatter.printHelp("HFile", options, true);
159 return false;
160 }
161 CommandLineParser parser = new PosixParser();
162 CommandLine cmd = parser.parse(options, args);
163
164 verbose = cmd.hasOption("v");
165 printValue = cmd.hasOption("p");
166 printKey = cmd.hasOption("e") || printValue;
167 shouldPrintMeta = cmd.hasOption("m");
168 printBlockIndex = cmd.hasOption("b");
169 printBlockHeaders = cmd.hasOption("h");
170 printStats = cmd.hasOption("s");
171 checkRow = cmd.hasOption("k");
172 checkFamily = cmd.hasOption("a");
173 checkMobIntegrity = cmd.hasOption("i");
174
175 if (cmd.hasOption("f")) {
176 files.add(new Path(cmd.getOptionValue("f")));
177 }
178
179 if (cmd.hasOption("w")) {
180 String key = cmd.getOptionValue("w");
181 if (key != null && key.length() != 0) {
182 row = Bytes.toBytesBinary(key);
183 isSeekToRow = true;
184 } else {
185 System.err.println("Invalid row is specified.");
186 System.exit(-1);
187 }
188 }
189
190 if (cmd.hasOption("r")) {
191 String regionName = cmd.getOptionValue("r");
192 byte[] rn = Bytes.toBytes(regionName);
193 byte[][] hri = HRegionInfo.parseRegionName(rn);
194 Path rootDir = FSUtils.getRootDir(getConf());
195 Path tableDir = FSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
196 String enc = HRegionInfo.encodeRegionName(rn);
197 Path regionDir = new Path(tableDir, enc);
198 if (verbose)
199 System.out.println("region dir -> " + regionDir);
200 List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(getConf()),
201 regionDir);
202 if (verbose)
203 System.out.println("Number of region files found -> "
204 + regionFiles.size());
205 if (verbose) {
206 int i = 1;
207 for (Path p : regionFiles) {
208 if (verbose)
209 System.out.println("Found file[" + i++ + "] -> " + p);
210 }
211 }
212 files.addAll(regionFiles);
213 }
214
215 if(checkMobIntegrity) {
216 if (verbose) {
217 System.out.println("checkMobIntegrity is enabled");
218 }
219 mobFileLocations = new HashMap<String, List<Path>>();
220 }
221 return true;
222 }
223
224
225
226
227
228 @Override
229 public int run(String[] args) {
230 if (getConf() == null) {
231 throw new RuntimeException("A Configuration instance must be provided.");
232 }
233 try {
234 FSUtils.setFsDefault(getConf(), FSUtils.getRootDir(getConf()));
235 if (!parseOptions(args))
236 return 1;
237 } catch (IOException ex) {
238 LOG.error("Error parsing command-line options", ex);
239 return 1;
240 } catch (ParseException ex) {
241 LOG.error("Error parsing command-line options", ex);
242 return 1;
243 }
244
245
246 for (Path fileName : files) {
247 try {
248 processFile(fileName);
249 } catch (IOException ex) {
250 LOG.error("Error reading " + fileName, ex);
251 System.exit(-2);
252 }
253 }
254
255 if (verbose || printKey) {
256 System.out.println("Scanned kv count -> " + count);
257 }
258
259 return 0;
260 }
261
262 private void processFile(Path file) throws IOException {
263 if (verbose)
264 System.out.println("Scanning -> " + file);
265 FileSystem fs = file.getFileSystem(getConf());
266 if (!fs.exists(file)) {
267 System.err.println("ERROR, file doesnt exist: " + file);
268 System.exit(-2);
269 }
270
271 HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(getConf()), getConf());
272
273 Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
274
275 KeyValueStatsCollector fileStats = null;
276
277 if (verbose || printKey || checkRow || checkFamily || printStats || checkMobIntegrity) {
278
279 HFileScanner scanner = reader.getScanner(false, false, false);
280 fileStats = new KeyValueStatsCollector();
281 boolean shouldScanKeysValues = false;
282 if (this.isSeekToRow) {
283
284 shouldScanKeysValues =
285 (scanner.seekTo(KeyValueUtil.createFirstOnRow(this.row)) != -1);
286 } else {
287 shouldScanKeysValues = scanner.seekTo();
288 }
289 if (shouldScanKeysValues)
290 scanKeysValues(file, fileStats, scanner, row);
291 }
292
293
294 if (shouldPrintMeta) {
295 printMeta(reader, fileInfo);
296 }
297
298 if (printBlockIndex) {
299 System.out.println("Block Index:");
300 System.out.println(reader.getDataBlockIndexReader());
301 }
302
303 if (printBlockHeaders) {
304 System.out.println("Block Headers:");
305
306
307
308
309 FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, file);
310 long fileSize = fs.getFileStatus(file).getLen();
311 FixedFileTrailer trailer =
312 FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
313 long offset = trailer.getFirstDataBlockOffset(),
314 max = trailer.getLastDataBlockOffset();
315 HFileBlock block;
316 while (offset <= max) {
317 block = reader.readBlock(offset, -1,
318
319 offset += block.getOnDiskSizeWithHeader();
320 System.out.println(block);
321 }
322 }
323
324 if (printStats) {
325 fileStats.finish();
326 System.out.println("Stats:\n" + fileStats);
327 }
328
329 reader.close();
330 }
331
332 private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
333 HFileScanner scanner, byte[] row) throws IOException {
334 Cell pCell = null;
335 FileSystem fs = FileSystem.get(getConf());
336 Set<String> foundMobFiles = new LinkedHashSet<String>(FOUND_MOB_FILES_CACHE_CAPACITY);
337 Set<String> missingMobFiles = new LinkedHashSet<String>(MISSING_MOB_FILES_CACHE_CAPACITY);
338 do {
339 Cell cell = scanner.getCell();
340 if (row != null && row.length != 0) {
341 int result = CellComparator.COMPARATOR.compareRows(cell, row, 0, row.length);
342 if (result > 0) {
343 break;
344 } else if (result < 0) {
345 continue;
346 }
347 }
348
349 if (printStats) {
350 fileStats.collect(cell);
351 }
352
353 if (printKey) {
354 System.out.print("K: " + cell);
355 if (printValue) {
356 System.out.print(" V: "
357 + Bytes.toStringBinary(cell.getValueArray(), cell.getValueOffset(),
358 cell.getValueLength()));
359 int i = 0;
360 List<Tag> tags = Tag.asList(cell.getTagsArray(), cell.getTagsOffset(),
361 cell.getTagsLength());
362 for (Tag tag : tags) {
363 System.out.print(String.format(" T[%d]: %s", i++,
364 Bytes.toStringBinary(tag.getBuffer(), tag.getTagOffset(), tag.getTagLength())));
365 }
366 }
367 System.out.println();
368 }
369
370 if (checkRow && pCell != null) {
371 if (CellComparator.COMPARATOR.compareRows(pCell, cell) > 0) {
372 System.err.println("WARNING, previous row is greater then"
373 + " current row\n\tfilename -> " + file + "\n\tprevious -> "
374 + CellUtil.getCellKeyAsString(pCell) + "\n\tcurrent -> "
375 + CellUtil.getCellKeyAsString(cell));
376 }
377 }
378
379 if (checkFamily) {
380 String fam = Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(),
381 cell.getFamilyLength());
382 if (!file.toString().contains(fam)) {
383 System.err.println("WARNING, filename does not match kv family,"
384 + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
385 + CellUtil.getCellKeyAsString(cell));
386 }
387 if (pCell != null && CellComparator.compareFamilies(pCell, cell) != 0) {
388 System.err.println("WARNING, previous kv has different family"
389 + " compared to current key\n\tfilename -> " + file
390 + "\n\tprevious -> " + CellUtil.getCellKeyAsString(pCell)
391 + "\n\tcurrent -> " + CellUtil.getCellKeyAsString(cell));
392 }
393 }
394
395 if (checkMobIntegrity && MobUtils.isMobReferenceCell(cell)) {
396 Tag tnTag = MobUtils.getTableNameTag(cell);
397 if (tnTag == null) {
398 System.err.println("ERROR, wrong tag format in mob reference cell "
399 + CellUtil.getCellKeyAsString(cell));
400 } else if (!MobUtils.hasValidMobRefCellValue(cell)) {
401 System.err.println("ERROR, wrong value format in mob reference cell "
402 + CellUtil.getCellKeyAsString(cell));
403 } else {
404 TableName tn = TableName.valueOf(tnTag.getValue());
405 String mobFileName = MobUtils.getMobFileName(cell);
406 boolean exist = mobFileExists(fs, tn, mobFileName,
407 Bytes.toString(CellUtil.cloneFamily(cell)), foundMobFiles, missingMobFiles);
408 if (!exist) {
409
410 System.err.println("ERROR, the mob file [" + mobFileName
411 + "] is missing referenced by cell " + CellUtil.getCellKeyAsString(cell));
412 }
413 }
414 }
415 pCell = cell;
416 ++count;
417 } while (scanner.next());
418 }
419
420
421
422
423 private boolean mobFileExists(FileSystem fs, TableName tn, String mobFileName, String family,
424 Set<String> foundMobFiles, Set<String> missingMobFiles) throws IOException {
425 if (foundMobFiles.contains(mobFileName)) {
426 return true;
427 }
428 if (missingMobFiles.contains(mobFileName)) {
429 return false;
430 }
431 String tableName = tn.getNameAsString();
432 List<Path> locations = mobFileLocations.get(tableName);
433 if (locations == null) {
434 locations = new ArrayList<Path>(2);
435 locations.add(MobUtils.getMobFamilyPath(getConf(), tn, family));
436 locations.add(HFileArchiveUtil.getStoreArchivePath(getConf(), tn,
437 MobUtils.getMobRegionInfo(tn).getEncodedName(), family));
438 mobFileLocations.put(tn.getNameAsString(), locations);
439 }
440 boolean exist = false;
441 for (Path location : locations) {
442 Path mobFilePath = new Path(location, mobFileName);
443 if (fs.exists(mobFilePath)) {
444 exist = true;
445 break;
446 }
447 }
448 if (exist) {
449 evictMobFilesIfNecessary(foundMobFiles, FOUND_MOB_FILES_CACHE_CAPACITY);
450 foundMobFiles.add(mobFileName);
451 } else {
452 evictMobFilesIfNecessary(missingMobFiles, MISSING_MOB_FILES_CACHE_CAPACITY);
453 missingMobFiles.add(mobFileName);
454 }
455 return exist;
456 }
457
458
459
460
461 private void evictMobFilesIfNecessary(Set<String> mobFileNames, int limit) {
462 if (mobFileNames.size() < limit) {
463 return;
464 }
465 int index = 0;
466 int evict = limit / 2;
467 Iterator<String> fileNamesItr = mobFileNames.iterator();
468 while (index < evict && fileNamesItr.hasNext()) {
469 fileNamesItr.next();
470 fileNamesItr.remove();
471 index++;
472 }
473 }
474
475
476
477
478
479 private static String asSeparateLines(String keyValueStr) {
480 return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
481 ",\n" + FOUR_SPACES + "$1");
482 }
483
484 private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
485 throws IOException {
486 System.out.println("Block index size as per heapsize: "
487 + reader.indexSize());
488 System.out.println(asSeparateLines(reader.toString()));
489 System.out.println("Trailer:\n "
490 + asSeparateLines(reader.getTrailer().toString()));
491 System.out.println("Fileinfo:");
492 for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
493 System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
494 if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
495 long seqid = Bytes.toLong(e.getValue());
496 System.out.println(seqid);
497 } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
498 TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
499 Writables.copyWritable(e.getValue(), timeRangeTracker);
500 System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
501 + timeRangeTracker.getMaximumTimestamp());
502 } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
503 || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
504 System.out.println(Bytes.toInt(e.getValue()));
505 } else {
506 System.out.println(Bytes.toStringBinary(e.getValue()));
507 }
508 }
509
510 try {
511 System.out.println("Mid-key: " + (CellUtil.getCellKeyAsString(reader.midkey())));
512 } catch (Exception e) {
513 System.out.println ("Unable to retrieve the midkey");
514 }
515
516
517 DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
518 BloomFilter bloomFilter = null;
519 if (bloomMeta != null)
520 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
521
522 System.out.println("Bloom filter:");
523 if (bloomFilter != null) {
524 System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
525 BloomFilterUtil.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
526 } else {
527 System.out.println(FOUR_SPACES + "Not present");
528 }
529
530
531 bloomMeta = reader.getDeleteBloomFilterMetadata();
532 bloomFilter = null;
533 if (bloomMeta != null)
534 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
535
536 System.out.println("Delete Family Bloom filter:");
537 if (bloomFilter != null) {
538 System.out.println(FOUR_SPACES
539 + bloomFilter.toString().replaceAll(BloomFilterUtil.STATS_RECORD_SEP,
540 "\n" + FOUR_SPACES));
541 } else {
542 System.out.println(FOUR_SPACES + "Not present");
543 }
544 }
545
546 private static class KeyValueStatsCollector {
547 private final MetricsRegistry metricsRegistry = new MetricsRegistry();
548 private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
549 private final SimpleReporter simpleReporter = new SimpleReporter(metricsRegistry, new PrintStream(metricsOutput));
550 Histogram keyLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Key length");
551 Histogram valLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Val length");
552 Histogram rowSizeBytes = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (bytes)");
553 Histogram rowSizeCols = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (columns)");
554
555 long curRowBytes = 0;
556 long curRowCols = 0;
557
558 byte[] biggestRow = null;
559
560 private Cell prevCell = null;
561 private long maxRowBytes = 0;
562 private long curRowKeyLength;
563
564 public void collect(Cell cell) {
565 valLen.update(cell.getValueLength());
566 if (prevCell != null &&
567 CellComparator.COMPARATOR.compareRows(prevCell, cell) != 0) {
568
569 collectRow();
570 }
571 curRowBytes += KeyValueUtil.length(cell);
572 curRowKeyLength = KeyValueUtil.keyLength(cell);
573 curRowCols++;
574 prevCell = cell;
575 }
576
577 private void collectRow() {
578 rowSizeBytes.update(curRowBytes);
579 rowSizeCols.update(curRowCols);
580 keyLen.update(curRowKeyLength);
581
582 if (curRowBytes > maxRowBytes && prevCell != null) {
583 biggestRow = CellUtil.cloneRow(prevCell);
584 maxRowBytes = curRowBytes;
585 }
586
587 curRowBytes = 0;
588 curRowCols = 0;
589 }
590
591 public void finish() {
592 if (curRowCols > 0) {
593 collectRow();
594 }
595 }
596
597 @Override
598 public String toString() {
599 if (prevCell == null)
600 return "no data available for statistics";
601
602
603 simpleReporter.shutdown();
604 simpleReporter.run();
605 metricsRegistry.shutdown();
606
607 return
608 metricsOutput.toString() +
609 "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
610 }
611 }
612
613 private static class SimpleReporter extends ConsoleReporter {
614 private final PrintStream out;
615
616 public SimpleReporter(MetricsRegistry metricsRegistry, PrintStream out) {
617 super(metricsRegistry, out, MetricPredicate.ALL);
618 this.out = out;
619 }
620
621 @Override
622 public void run() {
623 for (Map.Entry<String, SortedMap<MetricName, Metric>> entry : getMetricsRegistry().groupedMetrics(
624 MetricPredicate.ALL).entrySet()) {
625 try {
626 for (Map.Entry<MetricName, Metric> subEntry : entry.getValue().entrySet()) {
627 out.print(" " + subEntry.getKey().getName());
628 out.println(':');
629
630 subEntry.getValue().processWith(this, subEntry.getKey(), out);
631 }
632 } catch (Exception e) {
633 e.printStackTrace(out);
634 }
635 }
636 }
637
638 @Override
639 public void processHistogram(MetricName name, Histogram histogram, PrintStream stream) {
640 super.processHistogram(name, histogram, stream);
641 stream.printf(Locale.getDefault(), " count = %d%n", histogram.count());
642 }
643 }
644
645 public static void main(String[] args) throws Exception {
646 Configuration conf = HBaseConfiguration.create();
647
648 conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
649 int ret = ToolRunner.run(conf, new HFilePrettyPrinter(), args);
650 System.exit(ret);
651 }
652 }