/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.io.hfile;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.zip.Checksum;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ChecksumFactory;
import org.apache.hadoop.hbase.util.ChecksumType;
/**
* Utility methods to compute and validate checksums.
*/
public class ChecksumUtil {
/** This is used to reserve space in a byte buffer */
private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];
/**
* This is used by unit tests to make checksum failures throw an
* exception instead of returning null. Returning a null value from
* checksum validation will cause the higher layer to retry that
* read with hdfs-level checksums. Instead, we would like checksum
* failures to cause the entire unit test to fail.
*/
private static boolean generateExceptions = false;
/**
* Generates a checksum for all the data in indata. The checksum is
* written to outdata.
* @param indata input data stream
* @param startOffset starting offset in the indata stream from where to
* compute checkums from
* @param endOffset ending offset in the indata stream upto
* which checksums needs to be computed
* @param outData the output buffer where checksum values are written
* @param outOffset the starting offset in the outdata where the
* checksum values are written
* @param checksumType type of checksum
* @param bytesPerChecksum number of bytes per checksum value
*/
static void generateChecksums(byte[] indata,
int startOffset, int endOffset,
byte[] outdata, int outOffset,
ChecksumType checksumType,
int bytesPerChecksum) throws IOException {
if (checksumType == ChecksumType.NULL) {
return; // No checkums for this block.
}
Checksum checksum = checksumType.getChecksumObject();
int bytesLeft = endOffset - startOffset;
int chunkNum = 0;
while (bytesLeft > 0) {
// generate the checksum for one chunk
checksum.reset();
int count = Math.min(bytesLeft, bytesPerChecksum);
checksum.update(indata, startOffset, count);
// write the checksum value to the output buffer.
int cksumValue = (int)checksum.getValue();
outOffset = Bytes.putInt(outdata, outOffset, cksumValue);
chunkNum++;
startOffset += count;
bytesLeft -= count;
}
}
/**
* Validates that the data in the specified HFileBlock matches the
* checksum. Generates the checksum for the data and
* then validate that it matches the value stored in the header.
* If there is a checksum mismatch, then return false. Otherwise
* return true.
* The header is extracted from the specified HFileBlock while the
* data-to-be-verified is extracted from 'data'.
*/
static boolean validateBlockChecksum(Path path, HFileBlock block,
byte[] data, int hdrSize) throws IOException {
// If this is an older version of the block that does not have
// checksums, then return false indicating that checksum verification
// did not succeed. Actually, this methiod should never be called
// when the minorVersion is 0, thus this is a defensive check for a
// cannot-happen case. Since this is a cannot-happen case, it is
// better to return false to indicate a checksum validation failure.
if (block.getMinorVersion() < HFileBlock.MINOR_VERSION_WITH_CHECKSUM) {
return false;
}
// Get a checksum object based on the type of checksum that is
// set in the HFileBlock header. A ChecksumType.NULL indicates that
// the caller is not interested in validating checksums, so we
// always return true.
ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType());
if (cktype == ChecksumType.NULL) {
return true; // No checkums validations needed for this block.
}
Checksum checksumObject = cktype.getChecksumObject();
checksumObject.reset();
// read in the stored value of the checksum size from the header.
int bytesPerChecksum = block.getBytesPerChecksum();
// bytesPerChecksum is always larger than the size of the header
if (bytesPerChecksum < hdrSize) {
String msg = "Unsupported value of bytesPerChecksum. " +
" Minimum is " + hdrSize +
" but the configured value is " + bytesPerChecksum;
HFile.LOG.warn(msg);
return false; // cannot happen case, unable to verify checksum
}
// Extract the header and compute checksum for the header.
ByteBuffer hdr = block.getBufferWithHeader();
checksumObject.update(hdr.array(), hdr.arrayOffset(), hdrSize);
int off = hdrSize;
int consumed = hdrSize;
int bytesLeft = block.getOnDiskDataSizeWithHeader() - off;
int cksumOffset = block.getOnDiskDataSizeWithHeader();
// validate each chunk
while (bytesLeft > 0) {
int thisChunkSize = bytesPerChecksum - consumed;
int count = Math.min(bytesLeft, thisChunkSize);
checksumObject.update(data, off, count);
int storedChecksum = Bytes.toInt(data, cksumOffset);
if (storedChecksum != (int)checksumObject.getValue()) {
String msg = "File " + path +
" Stored checksum value of " + storedChecksum +
" at offset " + cksumOffset +
" does not match computed checksum " +
checksumObject.getValue() +
", total data size " + data.length +
" Checksum data range offset " + off + " len " + count +
HFileBlock.toStringHeader(block.getBufferReadOnly());
HFile.LOG.warn(msg);
if (generateExceptions) {
throw new IOException(msg); // this is only for unit tests
} else {
return false; // checksum validation failure
}
}
cksumOffset += HFileBlock.CHECKSUM_SIZE;
bytesLeft -= count;
off += count;
consumed = 0;
checksumObject.reset();
}
return true; // checksum is valid
}
/**
* Returns the number of bytes needed to store the checksums for
* a specified data size
* @param datasize number of bytes of data
* @param bytesPerChecksum number of bytes in a checksum chunk
* @return The number of bytes needed to store the checksum values
*/
static long numBytes(long datasize, int bytesPerChecksum) {
return numChunks(datasize, bytesPerChecksum) *
HFileBlock.CHECKSUM_SIZE;
}
/**
* Returns the number of checksum chunks needed to store the checksums for
* a specified data size
* @param datasize number of bytes of data
* @param bytesPerChecksum number of bytes in a checksum chunk
* @return The number of checksum chunks
*/
static long numChunks(long datasize, int bytesPerChecksum) {
long numChunks = datasize/bytesPerChecksum;
if (datasize % bytesPerChecksum != 0) {
numChunks++;
}
return numChunks;
}
/**
* Write dummy checksums to the end of the specified bytes array
* to reserve space for writing checksums later
* @param baos OutputStream to write dummy checkum values
* @param numBytes Number of bytes of data for which dummy checksums
* need to be generated
* @param bytesPerChecksum Number of bytes per checksum value
*/
static void reserveSpaceForChecksums(ByteArrayOutputStream baos,
int numBytes, int bytesPerChecksum) throws IOException {
long numChunks = numChunks(numBytes, bytesPerChecksum);
long bytesLeft = numChunks * HFileBlock.CHECKSUM_SIZE;
while (bytesLeft > 0) {
long count = Math.min(bytesLeft, DUMMY_VALUE.length);
baos.write(DUMMY_VALUE, 0, (int)count);
bytesLeft -= count;
}
}
/**
* Mechanism to throw an exception in case of hbase checksum
* failure. This is used by unit tests only.
* @param value Setting this to true will cause hbase checksum
* verification failures to generate exceptions.
*/
public static void generateExceptionForChecksumFailureForTest(boolean value) {
generateExceptions = value;
}
}