View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    * 
9    * http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations under
15   * the License.
16   */
17  package org.apache.accumulo.core.file.rfile.bcfile;
18  
19  import java.io.IOException;
20  import java.io.PrintStream;
21  import java.util.Collection;
22  import java.util.Iterator;
23  import java.util.LinkedHashMap;
24  import java.util.Map;
25  import java.util.Set;
26  
27  import org.apache.accumulo.core.file.rfile.bcfile.BCFile.BlockRegion;
28  import org.apache.accumulo.core.file.rfile.bcfile.BCFile.MetaIndexEntry;
29  import org.apache.accumulo.core.file.rfile.bcfile.TFile.TFileIndexEntry;
30  import org.apache.accumulo.core.file.rfile.bcfile.Utils.Version;
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FSDataInputStream;
35  import org.apache.hadoop.fs.FileSystem;
36  import org.apache.hadoop.fs.Path;
37  import org.apache.hadoop.io.IOUtils;
38  
39  /**
40   * Dumping the information of a TFile.
41   */
42  class TFileDumper {
43    static final Log LOG = LogFactory.getLog(TFileDumper.class);
44    
45    private TFileDumper() {
46      // namespace object not constructable.
47    }
48    
49    private enum Align {
50      LEFT, CENTER, RIGHT, ZERO_PADDED;
51      static String format(String s, int width, Align align) {
52        if (s.length() >= width)
53          return s;
54        int room = width - s.length();
55        Align alignAdjusted = align;
56        if (room == 1) {
57          alignAdjusted = LEFT;
58        }
59        if (alignAdjusted == LEFT) {
60          return s + String.format("%" + room + "s", "");
61        }
62        if (alignAdjusted == RIGHT) {
63          return String.format("%" + room + "s", "") + s;
64        }
65        if (alignAdjusted == CENTER) {
66          int half = room / 2;
67          return String.format("%" + half + "s", "") + s + String.format("%" + (room - half) + "s", "");
68        }
69        throw new IllegalArgumentException("Unsupported alignment");
70      }
71      
72      static String format(long l, int width, Align align) {
73        if (align == ZERO_PADDED) {
74          return String.format("%0" + width + "d", l);
75        }
76        return format(Long.toString(l), width, align);
77      }
78      
79      static int calculateWidth(String caption, long max) {
80        return Math.max(caption.length(), Long.toString(max).length());
81      }
82    }
83    
84    /**
85     * Dump information about TFile.
86     * 
87     * @param file
88     *          Path string of the TFile
89     * @param out
90     *          PrintStream to output the information.
91     * @param conf
92     *          The configuration object.
93     * @throws IOException
94     */
95    static public void dumpInfo(String file, PrintStream out, Configuration conf) throws IOException {
96      final int maxKeySampleLen = 16;
97      Path path = new Path(file);
98      FileSystem fs = path.getFileSystem(conf);
99      long length = fs.getFileStatus(path).getLen();
100     FSDataInputStream fsdis = fs.open(path);
101     TFile.Reader reader = new TFile.Reader(fsdis, length, conf);
102     try {
103       LinkedHashMap<String,String> properties = new LinkedHashMap<String,String>();
104       int blockCnt = reader.readerBCF.getBlockCount();
105       int metaBlkCnt = reader.readerBCF.metaIndex.index.size();
106       properties.put("BCFile Version", reader.readerBCF.version.toString());
107       properties.put("TFile Version", reader.tfileMeta.version.toString());
108       properties.put("File Length", Long.toString(length));
109       properties.put("Data Compression", reader.readerBCF.getDefaultCompressionName());
110       properties.put("Record Count", Long.toString(reader.getEntryCount()));
111       properties.put("Sorted", Boolean.toString(reader.isSorted()));
112       if (reader.isSorted()) {
113         properties.put("Comparator", reader.getComparatorName());
114       }
115       properties.put("Data Block Count", Integer.toString(blockCnt));
116       long dataSize = 0, dataSizeUncompressed = 0;
117       if (blockCnt > 0) {
118         for (int i = 0; i < blockCnt; ++i) {
119           BlockRegion region = reader.readerBCF.dataIndex.getBlockRegionList().get(i);
120           dataSize += region.getCompressedSize();
121           dataSizeUncompressed += region.getRawSize();
122         }
123         properties.put("Data Block Bytes", Long.toString(dataSize));
124         if (reader.readerBCF.getDefaultCompressionName() != "none") {
125           properties.put("Data Block Uncompressed Bytes", Long.toString(dataSizeUncompressed));
126           properties.put("Data Block Compression Ratio", String.format("1:%.1f", (double) dataSizeUncompressed / dataSize));
127         }
128       }
129       
130       properties.put("Meta Block Count", Integer.toString(metaBlkCnt));
131       long metaSize = 0, metaSizeUncompressed = 0;
132       if (metaBlkCnt > 0) {
133         Collection<MetaIndexEntry> metaBlks = reader.readerBCF.metaIndex.index.values();
134         boolean calculateCompression = false;
135         for (Iterator<MetaIndexEntry> it = metaBlks.iterator(); it.hasNext();) {
136           MetaIndexEntry e = it.next();
137           metaSize += e.getRegion().getCompressedSize();
138           metaSizeUncompressed += e.getRegion().getRawSize();
139           if (e.getCompressionAlgorithm() != Compression.Algorithm.NONE) {
140             calculateCompression = true;
141           }
142         }
143         properties.put("Meta Block Bytes", Long.toString(metaSize));
144         if (calculateCompression) {
145           properties.put("Meta Block Uncompressed Bytes", Long.toString(metaSizeUncompressed));
146           properties.put("Meta Block Compression Ratio", String.format("1:%.1f", (double) metaSizeUncompressed / metaSize));
147         }
148       }
149       properties.put("Meta-Data Size Ratio", String.format("1:%.1f", (double) dataSize / metaSize));
150       long leftOverBytes = length - dataSize - metaSize;
151       long miscSize = BCFile.Magic.size() * 2 + Long.SIZE / Byte.SIZE + Version.size();
152       long metaIndexSize = leftOverBytes - miscSize;
153       properties.put("Meta Block Index Bytes", Long.toString(metaIndexSize));
154       properties.put("Headers Etc Bytes", Long.toString(miscSize));
155       // Now output the properties table.
156       int maxKeyLength = 0;
157       Set<Map.Entry<String,String>> entrySet = properties.entrySet();
158       for (Iterator<Map.Entry<String,String>> it = entrySet.iterator(); it.hasNext();) {
159         Map.Entry<String,String> e = it.next();
160         if (e.getKey().length() > maxKeyLength) {
161           maxKeyLength = e.getKey().length();
162         }
163       }
164       for (Iterator<Map.Entry<String,String>> it = entrySet.iterator(); it.hasNext();) {
165         Map.Entry<String,String> e = it.next();
166         out.printf("%s : %s%n", Align.format(e.getKey(), maxKeyLength, Align.LEFT), e.getValue());
167       }
168       out.println();
169       reader.checkTFileDataIndex();
170       if (blockCnt > 0) {
171         String blkID = "Data-Block";
172         int blkIDWidth = Align.calculateWidth(blkID, blockCnt);
173         int blkIDWidth2 = Align.calculateWidth("", blockCnt);
174         String offset = "Offset";
175         int offsetWidth = Align.calculateWidth(offset, length);
176         String blkLen = "Length";
177         int blkLenWidth = Align.calculateWidth(blkLen, dataSize / blockCnt * 10);
178         String rawSize = "Raw-Size";
179         int rawSizeWidth = Align.calculateWidth(rawSize, dataSizeUncompressed / blockCnt * 10);
180         String records = "Records";
181         int recordsWidth = Align.calculateWidth(records, reader.getEntryCount() / blockCnt * 10);
182         String endKey = "End-Key";
183         int endKeyWidth = Math.max(endKey.length(), maxKeySampleLen * 2 + 5);
184         
185         out.printf("%s %s %s %s %s %s%n", Align.format(blkID, blkIDWidth, Align.CENTER), Align.format(offset, offsetWidth, Align.CENTER),
186             Align.format(blkLen, blkLenWidth, Align.CENTER), Align.format(rawSize, rawSizeWidth, Align.CENTER),
187             Align.format(records, recordsWidth, Align.CENTER), Align.format(endKey, endKeyWidth, Align.LEFT));
188         
189         for (int i = 0; i < blockCnt; ++i) {
190           BlockRegion region = reader.readerBCF.dataIndex.getBlockRegionList().get(i);
191           TFileIndexEntry indexEntry = reader.tfileIndex.getEntry(i);
192           out.printf("%s %s %s %s %s ", Align.format(Align.format(i, blkIDWidth2, Align.ZERO_PADDED), blkIDWidth, Align.LEFT),
193               Align.format(region.getOffset(), offsetWidth, Align.LEFT), Align.format(region.getCompressedSize(), blkLenWidth, Align.LEFT),
194               Align.format(region.getRawSize(), rawSizeWidth, Align.LEFT), Align.format(indexEntry.kvEntries, recordsWidth, Align.LEFT));
195           byte[] key = indexEntry.key;
196           boolean asAscii = true;
197           int sampleLen = Math.min(maxKeySampleLen, key.length);
198           for (int j = 0; j < sampleLen; ++j) {
199             byte b = key[j];
200             if ((b < 32 && b != 9) || (b == 127)) {
201               asAscii = false;
202             }
203           }
204           if (!asAscii) {
205             out.print("0X");
206             for (int j = 0; j < sampleLen; ++j) {
207               byte b = key[i];
208               out.printf("%X", b);
209             }
210           } else {
211             out.print(new String(key, 0, sampleLen));
212           }
213           if (sampleLen < key.length) {
214             out.print("...");
215           }
216           out.println();
217         }
218       }
219       
220       out.println();
221       if (metaBlkCnt > 0) {
222         String name = "Meta-Block";
223         int maxNameLen = 0;
224         Set<Map.Entry<String,MetaIndexEntry>> metaBlkEntrySet = reader.readerBCF.metaIndex.index.entrySet();
225         for (Iterator<Map.Entry<String,MetaIndexEntry>> it = metaBlkEntrySet.iterator(); it.hasNext();) {
226           Map.Entry<String,MetaIndexEntry> e = it.next();
227           if (e.getKey().length() > maxNameLen) {
228             maxNameLen = e.getKey().length();
229           }
230         }
231         int nameWidth = Math.max(name.length(), maxNameLen);
232         String offset = "Offset";
233         int offsetWidth = Align.calculateWidth(offset, length);
234         String blkLen = "Length";
235         int blkLenWidth = Align.calculateWidth(blkLen, metaSize / metaBlkCnt * 10);
236         String rawSize = "Raw-Size";
237         int rawSizeWidth = Align.calculateWidth(rawSize, metaSizeUncompressed / metaBlkCnt * 10);
238         String compression = "Compression";
239         int compressionWidth = compression.length();
240         out.printf("%s %s %s %s %s%n", Align.format(name, nameWidth, Align.CENTER), Align.format(offset, offsetWidth, Align.CENTER),
241             Align.format(blkLen, blkLenWidth, Align.CENTER), Align.format(rawSize, rawSizeWidth, Align.CENTER),
242             Align.format(compression, compressionWidth, Align.LEFT));
243         
244         for (Iterator<Map.Entry<String,MetaIndexEntry>> it = metaBlkEntrySet.iterator(); it.hasNext();) {
245           Map.Entry<String,MetaIndexEntry> e = it.next();
246           String blkName = e.getValue().getMetaName();
247           BlockRegion region = e.getValue().getRegion();
248           String blkCompression = e.getValue().getCompressionAlgorithm().getName();
249           out.printf("%s %s %s %s %s%n", Align.format(blkName, nameWidth, Align.LEFT), Align.format(region.getOffset(), offsetWidth, Align.LEFT),
250               Align.format(region.getCompressedSize(), blkLenWidth, Align.LEFT), Align.format(region.getRawSize(), rawSizeWidth, Align.LEFT),
251               Align.format(blkCompression, compressionWidth, Align.LEFT));
252         }
253       }
254     } finally {
255       IOUtils.cleanup(LOG, reader, fsdis);
256     }
257   }
258 }