View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.ByteArrayOutputStream;
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.fs.ChecksumException;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.fs.Path;
29  import org.apache.hadoop.hbase.util.ChecksumType;
30  import org.apache.hadoop.util.DataChecksum;
31  
32  /**
33   * Utility methods to compute and validate checksums.
34   */
35  @InterfaceAudience.Private
36  public class ChecksumUtil {
37    public static final Log LOG = LogFactory.getLog(ChecksumUtil.class);
38  
39    /** This is used to reserve space in a byte buffer */
40    private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];
41  
42    /** 
43     * This is used by unit tests to make checksum failures throw an 
44     * exception instead of returning null. Returning a null value from 
45     * checksum validation will cause the higher layer to retry that 
46     * read with hdfs-level checksums. Instead, we would like checksum 
47     * failures to cause the entire unit test to fail.
48     */
49    private static boolean generateExceptions = false;
50  
51    /**
52     * Generates a checksum for all the data in indata. The checksum is
53     * written to outdata.
54     * @param indata input data stream
55     * @param startOffset starting offset in the indata stream from where to
56     *                    compute checkums from
57     * @param endOffset ending offset in the indata stream upto
58     *                   which checksums needs to be computed
59     * @param outdata the output buffer where checksum values are written
60     * @param outOffset the starting offset in the outdata where the
61     *                  checksum values are written
62     * @param checksumType type of checksum
63     * @param bytesPerChecksum number of bytes per checksum value
64     */
65    static void generateChecksums(byte[] indata, int startOffset, int endOffset,
66      byte[] outdata, int outOffset, ChecksumType checksumType,
67      int bytesPerChecksum) throws IOException {
68  
69      if (checksumType == ChecksumType.NULL) {
70        return; // No checksum for this block.
71      }
72  
73      DataChecksum checksum = DataChecksum.newDataChecksum(
74          checksumType.getDataChecksumType(), bytesPerChecksum);
75  
76      checksum.calculateChunkedSums(
77         ByteBuffer.wrap(indata, startOffset, endOffset - startOffset),
78         ByteBuffer.wrap(outdata, outOffset, outdata.length - outOffset));
79    }
80  
81    /**
82     * Validates that the data in the specified HFileBlock matches the
83     * checksum.  Generates the checksum for the data and
84     * then validate that it matches the value stored in the header.
85     * If there is a checksum mismatch, then return false. Otherwise
86     * return true.
87     * The header is extracted from the specified HFileBlock while the
88     * data-to-be-verified is extracted from 'data'.
89     */
90    static boolean validateBlockChecksum(Path path, HFileBlock block,
91      byte[] data, int hdrSize) throws IOException {
92  
93      // If this is an older version of the block that does not have
94      // checksums, then return false indicating that checksum verification
95      // did not succeed. Actually, this methiod should never be called
96      // when the minorVersion is 0, thus this is a defensive check for a
97      // cannot-happen case. Since this is a cannot-happen case, it is
98      // better to return false to indicate a checksum validation failure.
99      if (!block.getHFileContext().isUseHBaseChecksum()) {
100       return false;
101     }
102 
103     // Get a checksum object based on the type of checksum that is
104     // set in the HFileBlock header. A ChecksumType.NULL indicates that 
105     // the caller is not interested in validating checksums, so we
106     // always return true.
107     ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType());
108     if (cktype == ChecksumType.NULL) {
109       return true; // No checksum validations needed for this block.
110     }
111 
112     // read in the stored value of the checksum size from the header.
113     int bytesPerChecksum = block.getBytesPerChecksum();
114 
115     DataChecksum dataChecksum = DataChecksum.newDataChecksum(
116         cktype.getDataChecksumType(), bytesPerChecksum);
117     assert dataChecksum != null;
118     int sizeWithHeader =  block.getOnDiskDataSizeWithHeader();
119     if (LOG.isTraceEnabled()) {
120       LOG.info("length of data = " + data.length
121           + " OnDiskDataSizeWithHeader = " + sizeWithHeader
122           + " checksum type = " + cktype.getName()
123           + " file =" + path.toString()
124           + " header size = " + hdrSize
125           + " bytesPerChecksum = " + bytesPerChecksum);
126     }
127     try {
128       dataChecksum.verifyChunkedSums(ByteBuffer.wrap(data, 0, sizeWithHeader),
129           ByteBuffer.wrap(data, sizeWithHeader, data.length - sizeWithHeader),
130                           path.toString(), 0);
131     } catch (ChecksumException e) {
132       return false;
133     }
134     return true;  // checksum is valid
135   }
136 
137   /**
138    * Returns the number of bytes needed to store the checksums for
139    * a specified data size
140    * @param datasize number of bytes of data
141    * @param bytesPerChecksum number of bytes in a checksum chunk
142    * @return The number of bytes needed to store the checksum values
143    */
144   static long numBytes(long datasize, int bytesPerChecksum) {
145     return numChunks(datasize, bytesPerChecksum) * 
146                      HFileBlock.CHECKSUM_SIZE;
147   }
148 
149   /**
150    * Returns the number of checksum chunks needed to store the checksums for
151    * a specified data size
152    * @param datasize number of bytes of data
153    * @param bytesPerChecksum number of bytes in a checksum chunk
154    * @return The number of checksum chunks
155    */
156   static long numChunks(long datasize, int bytesPerChecksum) {
157     long numChunks = datasize/bytesPerChecksum;
158     if (datasize % bytesPerChecksum != 0) {
159       numChunks++;
160     }
161     return numChunks;
162   }
163 
164   /**
165    * Write dummy checksums to the end of the specified bytes array
166    * to reserve space for writing checksums later
167    * @param baos OutputStream to write dummy checkum values
168    * @param numBytes Number of bytes of data for which dummy checksums
169    *                 need to be generated
170    * @param bytesPerChecksum Number of bytes per checksum value
171    */
172   static void reserveSpaceForChecksums(ByteArrayOutputStream baos,
173     int numBytes, int bytesPerChecksum) throws IOException {
174     long numChunks = numChunks(numBytes, bytesPerChecksum);
175     long bytesLeft = numChunks * HFileBlock.CHECKSUM_SIZE;
176     while (bytesLeft > 0) {
177       long count = Math.min(bytesLeft, DUMMY_VALUE.length);
178       baos.write(DUMMY_VALUE, 0, (int)count);
179       bytesLeft -= count;
180     }
181   }
182 
183   /**
184    * Mechanism to throw an exception in case of hbase checksum
185    * failure. This is used by unit tests only.
186    * @param value Setting this to true will cause hbase checksum
187    *              verification failures to generate exceptions.
188    */
189   public static void generateExceptionForChecksumFailureForTest(boolean value) {
190     generateExceptions = value;
191   }
192 }
193