1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 package org.apache.hadoop.hbase.io.hfile; 19 20 import java.io.ByteArrayOutputStream; 21 import java.io.IOException; 22 import java.nio.ByteBuffer; 23 24 import org.apache.commons.logging.Log; 25 import org.apache.commons.logging.LogFactory; 26 import org.apache.hadoop.fs.ChecksumException; 27 import org.apache.hadoop.hbase.classification.InterfaceAudience; 28 import org.apache.hadoop.fs.Path; 29 import org.apache.hadoop.hbase.util.ChecksumType; 30 import org.apache.hadoop.util.DataChecksum; 31 32 /** 33 * Utility methods to compute and validate checksums. 34 */ 35 @InterfaceAudience.Private 36 public class ChecksumUtil { 37 public static final Log LOG = LogFactory.getLog(ChecksumUtil.class); 38 39 /** This is used to reserve space in a byte buffer */ 40 private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE]; 41 42 /** 43 * This is used by unit tests to make checksum failures throw an 44 * exception instead of returning null. Returning a null value from 45 * checksum validation will cause the higher layer to retry that 46 * read with hdfs-level checksums. Instead, we would like checksum 47 * failures to cause the entire unit test to fail. 48 */ 49 private static boolean generateExceptions = false; 50 51 /** 52 * Generates a checksum for all the data in indata. The checksum is 53 * written to outdata. 54 * @param indata input data stream 55 * @param startOffset starting offset in the indata stream from where to 56 * compute checkums from 57 * @param endOffset ending offset in the indata stream upto 58 * which checksums needs to be computed 59 * @param outdata the output buffer where checksum values are written 60 * @param outOffset the starting offset in the outdata where the 61 * checksum values are written 62 * @param checksumType type of checksum 63 * @param bytesPerChecksum number of bytes per checksum value 64 */ 65 static void generateChecksums(byte[] indata, int startOffset, int endOffset, 66 byte[] outdata, int outOffset, ChecksumType checksumType, 67 int bytesPerChecksum) throws IOException { 68 69 if (checksumType == ChecksumType.NULL) { 70 return; // No checksum for this block. 71 } 72 73 DataChecksum checksum = DataChecksum.newDataChecksum( 74 checksumType.getDataChecksumType(), bytesPerChecksum); 75 76 checksum.calculateChunkedSums( 77 ByteBuffer.wrap(indata, startOffset, endOffset - startOffset), 78 ByteBuffer.wrap(outdata, outOffset, outdata.length - outOffset)); 79 } 80 81 /** 82 * Validates that the data in the specified HFileBlock matches the 83 * checksum. Generates the checksum for the data and 84 * then validate that it matches the value stored in the header. 85 * If there is a checksum mismatch, then return false. Otherwise 86 * return true. 87 * The header is extracted from the specified HFileBlock while the 88 * data-to-be-verified is extracted from 'data'. 89 */ 90 static boolean validateBlockChecksum(Path path, HFileBlock block, 91 byte[] data, int hdrSize) throws IOException { 92 93 // If this is an older version of the block that does not have 94 // checksums, then return false indicating that checksum verification 95 // did not succeed. Actually, this methiod should never be called 96 // when the minorVersion is 0, thus this is a defensive check for a 97 // cannot-happen case. Since this is a cannot-happen case, it is 98 // better to return false to indicate a checksum validation failure. 99 if (!block.getHFileContext().isUseHBaseChecksum()) { 100 return false; 101 } 102 103 // Get a checksum object based on the type of checksum that is 104 // set in the HFileBlock header. A ChecksumType.NULL indicates that 105 // the caller is not interested in validating checksums, so we 106 // always return true. 107 ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType()); 108 if (cktype == ChecksumType.NULL) { 109 return true; // No checksum validations needed for this block. 110 } 111 112 // read in the stored value of the checksum size from the header. 113 int bytesPerChecksum = block.getBytesPerChecksum(); 114 115 DataChecksum dataChecksum = DataChecksum.newDataChecksum( 116 cktype.getDataChecksumType(), bytesPerChecksum); 117 assert dataChecksum != null; 118 int sizeWithHeader = block.getOnDiskDataSizeWithHeader(); 119 if (LOG.isTraceEnabled()) { 120 LOG.info("length of data = " + data.length 121 + " OnDiskDataSizeWithHeader = " + sizeWithHeader 122 + " checksum type = " + cktype.getName() 123 + " file =" + path.toString() 124 + " header size = " + hdrSize 125 + " bytesPerChecksum = " + bytesPerChecksum); 126 } 127 try { 128 dataChecksum.verifyChunkedSums(ByteBuffer.wrap(data, 0, sizeWithHeader), 129 ByteBuffer.wrap(data, sizeWithHeader, data.length - sizeWithHeader), 130 path.toString(), 0); 131 } catch (ChecksumException e) { 132 return false; 133 } 134 return true; // checksum is valid 135 } 136 137 /** 138 * Returns the number of bytes needed to store the checksums for 139 * a specified data size 140 * @param datasize number of bytes of data 141 * @param bytesPerChecksum number of bytes in a checksum chunk 142 * @return The number of bytes needed to store the checksum values 143 */ 144 static long numBytes(long datasize, int bytesPerChecksum) { 145 return numChunks(datasize, bytesPerChecksum) * 146 HFileBlock.CHECKSUM_SIZE; 147 } 148 149 /** 150 * Returns the number of checksum chunks needed to store the checksums for 151 * a specified data size 152 * @param datasize number of bytes of data 153 * @param bytesPerChecksum number of bytes in a checksum chunk 154 * @return The number of checksum chunks 155 */ 156 static long numChunks(long datasize, int bytesPerChecksum) { 157 long numChunks = datasize/bytesPerChecksum; 158 if (datasize % bytesPerChecksum != 0) { 159 numChunks++; 160 } 161 return numChunks; 162 } 163 164 /** 165 * Write dummy checksums to the end of the specified bytes array 166 * to reserve space for writing checksums later 167 * @param baos OutputStream to write dummy checkum values 168 * @param numBytes Number of bytes of data for which dummy checksums 169 * need to be generated 170 * @param bytesPerChecksum Number of bytes per checksum value 171 */ 172 static void reserveSpaceForChecksums(ByteArrayOutputStream baos, 173 int numBytes, int bytesPerChecksum) throws IOException { 174 long numChunks = numChunks(numBytes, bytesPerChecksum); 175 long bytesLeft = numChunks * HFileBlock.CHECKSUM_SIZE; 176 while (bytesLeft > 0) { 177 long count = Math.min(bytesLeft, DUMMY_VALUE.length); 178 baos.write(DUMMY_VALUE, 0, (int)count); 179 bytesLeft -= count; 180 } 181 } 182 183 /** 184 * Mechanism to throw an exception in case of hbase checksum 185 * failure. This is used by unit tests only. 186 * @param value Setting this to true will cause hbase checksum 187 * verification failures to generate exceptions. 188 */ 189 public static void generateExceptionForChecksumFailureForTest(boolean value) { 190 generateExceptions = value; 191 } 192 } 193