/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package org.apache.hadoop.hbase.util;
import java.util.Random;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.MD5Hash;
/**
* A generator of random keys and values for load testing. Keys are generated
* by converting numeric indexes to strings and prefixing them with an MD5
* hash. Values are generated by selecting value size in the configured range
* and generating a pseudo-random sequence of bytes seeded by key, column
* qualifier, and value size.
*/
public class LoadTestKVGenerator {
/** A random number generator for determining value size */
private Random randomForValueSize = new Random();
private final int minValueSize;
private final int maxValueSize;
public LoadTestKVGenerator(int minValueSize, int maxValueSize) {
if (minValueSize <= 0 || maxValueSize <= 0) {
throw new IllegalArgumentException("Invalid min/max value sizes: " +
minValueSize + ", " + maxValueSize);
}
this.minValueSize = minValueSize;
this.maxValueSize = maxValueSize;
}
/**
* Verifies that the given byte array is the same as what would be generated
* for the given seed strings (row/cf/column/...). We are assuming that the
* value size is correct, and only verify the actual bytes. However, if the
* min/max value sizes are set sufficiently high, an accidental match should be
* extremely improbable.
*/
public static boolean verify(byte[] value, byte[]... seedStrings) {
byte[] expectedData = getValueForRowColumn(value.length, seedStrings);
return Bytes.equals(expectedData, value);
}
/**
* Converts the given key to string, and prefixes it with the MD5 hash of
* the index's string representation.
*/
public static String md5PrefixedKey(long key) {
String stringKey = Long.toString(key);
String md5hash = MD5Hash.getMD5AsHex(Bytes.toBytes(stringKey));
// flip the key to randomize
return md5hash + "-" + stringKey;
}
/**
* Generates a value for the given key index and column qualifier. Size is
* selected randomly in the configured range. The generated value depends
* only on the combination of the strings passed (key/cf/column/...) and the selected
* value size. This allows to verify the actual value bytes when reading, as done
* in {#verify(byte[], byte[]...)}
* This method is as thread-safe as Random class. It appears that the worst bug ever
* found with the latter is that multiple threads will get some duplicate values, which
* we don't care about.
*/
public byte[] generateRandomSizeValue(byte[]... seedStrings) {
int dataSize = minValueSize;
if (minValueSize != maxValueSize) {
dataSize = minValueSize + randomForValueSize.nextInt(Math.abs(maxValueSize - minValueSize));
}
return getValueForRowColumn(dataSize, seedStrings);
}
/**
* Generates random bytes of the given size for the given row and column
* qualifier. The random seed is fully determined by these parameters.
*/
private static byte[] getValueForRowColumn(int dataSize, byte[]... seedStrings) {
long seed = dataSize;
for (byte[] str : seedStrings) {
seed += Bytes.toString(str).hashCode();
}
Random seededRandom = new Random(seed);
byte[] randomBytes = new byte[dataSize];
seededRandom.nextBytes(randomBytes);
return randomBytes;
}
}