/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.io.hfile;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Random;
import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.junit.experimental.categories.Category;
/**
* Set of long-running tests to measure performance of HFile.
* <p>
* Copied from
* <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
* Remove after tfile is committed and use the tfile version of this class
* instead.</p>
*/
@Category(MediumTests.class)
public class TestHFilePerformance extends TestCase {
private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private static String ROOT_DIR =
TEST_UTIL.getDataTestDir("TestHFilePerformance").toString();
private FileSystem fs;
private Configuration conf;
private long startTimeEpoch;
private long finishTimeEpoch;
private DateFormat formatter;
@Override
public void setUp() throws IOException {
conf = new Configuration();
fs = FileSystem.get(conf);
formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
}
public void startTime() {
startTimeEpoch = System.currentTimeMillis();
System.out.println(formatTime() + " Started timing.");
}
public void stopTime() {
finishTimeEpoch = System.currentTimeMillis();
System.out.println(formatTime() + " Stopped timing.");
}
public long getIntervalMillis() {
return finishTimeEpoch - startTimeEpoch;
}
public void printlnWithTimestamp(String message) {
System.out.println(formatTime() + " " + message);
}
/*
* Format millis into minutes and seconds.
*/
public String formatTime(long milis){
return formatter.format(milis);
}
public String formatTime(){
return formatTime(System.currentTimeMillis());
}
private FSDataOutputStream createFSOutput(Path name) throws IOException {
if (fs.exists(name))
fs.delete(name, true);
FSDataOutputStream fout = fs.create(name);
return fout;
}
//TODO have multiple ways of generating key/value e.g. dictionary words
//TODO to have a sample compressable data, for now, made 1 out of 3 values random
// keys are all random.
private static class KeyValueGenerator {
Random keyRandomizer;
Random valueRandomizer;
long randomValueRatio = 3; // 1 out of randomValueRatio generated values will be random.
long valueSequence = 0 ;
KeyValueGenerator() {
keyRandomizer = new Random(0L); //TODO with seed zero
valueRandomizer = new Random(1L); //TODO with seed one
}
// Key is always random now.
void getKey(byte[] key) {
keyRandomizer.nextBytes(key);
}
void getValue(byte[] value) {
if (valueSequence % randomValueRatio == 0)
valueRandomizer.nextBytes(value);
valueSequence++;
}
}
/**
*
* @param fileType "HFile" or "SequenceFile"
* @param keyLength
* @param valueLength
* @param codecName "none", "lzo", "gz", "snappy"
* @param rows number of rows to be written.
* @param writeMethod used for HFile only.
* @param minBlockSize used for HFile only.
* @throws IOException
*/
//TODO writeMethod: implement multiple ways of writing e.g. A) known length (no chunk) B) using a buffer and streaming (for many chunks).
public void timeWrite(String fileType, int keyLength, int valueLength,
String codecName, long rows, String writeMethod, int minBlockSize)
throws IOException {
System.out.println("File Type: " + fileType);
System.out.println("Writing " + fileType + " with codecName: " + codecName);
long totalBytesWritten = 0;
//Using separate randomizer for key/value with seeds matching Sequence File.
byte[] key = new byte[keyLength];
byte[] value = new byte[valueLength];
KeyValueGenerator generator = new KeyValueGenerator();
startTime();
Path path = new Path(ROOT_DIR, fileType + ".Performance");
System.out.println(ROOT_DIR + path.getName());
FSDataOutputStream fout = createFSOutput(path);
if ("HFile".equals(fileType)){
System.out.println("HFile write method: ");
HFile.Writer writer = HFile.getWriterFactoryNoCache(conf)
.withOutputStream(fout)
.withBlockSize(minBlockSize)
.withCompression(codecName)
.create();
// Writing value in one shot.
for (long l=0; l<rows; l++ ) {
generator.getKey(key);
generator.getValue(value);
writer.append(key, value);
totalBytesWritten += key.length;
totalBytesWritten += value.length;
}
writer.close();
} else if ("SequenceFile".equals(fileType)){
CompressionCodec codec = null;
if ("gz".equals(codecName))
codec = new GzipCodec();
else if (!"none".equals(codecName))
throw new IOException("Codec not supported.");
SequenceFile.Writer writer;
//TODO
//JobConf conf = new JobConf();
if (!"none".equals(codecName))
writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec);
else
writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
BytesWritable.class, SequenceFile.CompressionType.NONE, null);
BytesWritable keyBsw;
BytesWritable valBsw;
for (long l=0; l<rows; l++ ) {
generator.getKey(key);
keyBsw = new BytesWritable(key);
totalBytesWritten += keyBsw.getSize();
generator.getValue(value);
valBsw = new BytesWritable(value);
writer.append(keyBsw, valBsw);
totalBytesWritten += valBsw.getSize();
}
writer.close();
} else
throw new IOException("File Type is not supported");
fout.close();
stopTime();
printlnWithTimestamp("Data written: ");
printlnWithTimestamp(" rate = " +
totalBytesWritten / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
printlnWithTimestamp(" total = " + totalBytesWritten + "B");
printlnWithTimestamp("File written: ");
printlnWithTimestamp(" rate = " +
fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
printlnWithTimestamp(" total = " + fs.getFileStatus(path).getLen() + "B");
}
public void timeReading(String fileType, int keyLength, int valueLength,
long rows, int method) throws IOException {
System.out.println("Reading file of type: " + fileType);
Path path = new Path(ROOT_DIR, fileType + ".Performance");
System.out.println("Input file size: " + fs.getFileStatus(path).getLen());
long totalBytesRead = 0;
ByteBuffer val;
ByteBuffer key;
startTime();
FSDataInputStream fin = fs.open(path);
if ("HFile".equals(fileType)){
HFile.Reader reader = HFile.createReaderFromStream(path, fs.open(path),
fs.getFileStatus(path).getLen(), new CacheConfig(conf));
reader.loadFileInfo();
switch (method) {
case 0:
case 1:
default:
{
HFileScanner scanner = reader.getScanner(false, false);
scanner.seekTo();
for (long l=0; l<rows; l++ ) {
key = scanner.getKey();
val = scanner.getValue();
totalBytesRead += key.limit() + val.limit();
scanner.next();
}
}
break;
}
reader.close();
} else if("SequenceFile".equals(fileType)){
SequenceFile.Reader reader;
reader = new SequenceFile.Reader(fs, path, new Configuration());
if (reader.getCompressionCodec() != null) {
printlnWithTimestamp("Compression codec class: " + reader.getCompressionCodec().getClass());
} else
printlnWithTimestamp("Compression codec class: " + "none");
BytesWritable keyBsw = new BytesWritable();
BytesWritable valBsw = new BytesWritable();
for (long l=0; l<rows; l++ ) {
reader.next(keyBsw, valBsw);
totalBytesRead += keyBsw.getSize() + valBsw.getSize();
}
reader.close();
//TODO make a tests for other types of SequenceFile reading scenarios
} else {
throw new IOException("File Type not supported.");
}
//printlnWithTimestamp("Closing reader");
fin.close();
stopTime();
//printlnWithTimestamp("Finished close");
printlnWithTimestamp("Finished in " + getIntervalMillis() + "ms");
printlnWithTimestamp("Data read: ");
printlnWithTimestamp(" rate = " +
totalBytesRead / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
printlnWithTimestamp(" total = " + totalBytesRead + "B");
printlnWithTimestamp("File read: ");
printlnWithTimestamp(" rate = " +
fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
printlnWithTimestamp(" total = " + fs.getFileStatus(path).getLen() + "B");
//TODO uncomment this for final committing so test files is removed.
//fs.delete(path, true);
}
public void testRunComparisons() throws IOException {
int keyLength = 100; // 100B
int valueLength = 5*1024; // 5KB
int minBlockSize = 10*1024*1024; // 10MB
int rows = 10000;
System.out.println("****************************** Sequence File *****************************");
timeWrite("SequenceFile", keyLength, valueLength, "none", rows, null, minBlockSize);
System.out.println("\n+++++++\n");
timeReading("SequenceFile", keyLength, valueLength, rows, -1);
System.out.println("");
System.out.println("----------------------");
System.out.println("");
/* DISABLED LZO
timeWrite("SequenceFile", keyLength, valueLength, "lzo", rows, null, minBlockSize);
System.out.println("\n+++++++\n");
timeReading("SequenceFile", keyLength, valueLength, rows, -1);
System.out.println("");
System.out.println("----------------------");
System.out.println("");
/* Sequence file can only use native hadoop libs gzipping so commenting out.
*/
try {
timeWrite("SequenceFile", keyLength, valueLength, "gz", rows, null,
minBlockSize);
System.out.println("\n+++++++\n");
timeReading("SequenceFile", keyLength, valueLength, rows, -1);
} catch (IllegalArgumentException e) {
System.out.println("Skipping sequencefile gz: " + e.getMessage());
}
System.out.println("\n\n\n");
System.out.println("****************************** HFile *****************************");
timeWrite("HFile", keyLength, valueLength, "none", rows, null, minBlockSize);
System.out.println("\n+++++++\n");
timeReading("HFile", keyLength, valueLength, rows, 0 );
System.out.println("");
System.out.println("----------------------");
System.out.println("");
/* DISABLED LZO
timeWrite("HFile", keyLength, valueLength, "lzo", rows, null, minBlockSize);
System.out.println("\n+++++++\n");
timeReading("HFile", keyLength, valueLength, rows, 0 );
System.out.println("\n+++++++\n");
timeReading("HFile", keyLength, valueLength, rows, 1 );
System.out.println("\n+++++++\n");
timeReading("HFile", keyLength, valueLength, rows, 2 );
System.out.println("");
System.out.println("----------------------");
System.out.println("");
*/
timeWrite("HFile", keyLength, valueLength, "gz", rows, null, minBlockSize);
System.out.println("\n+++++++\n");
timeReading("HFile", keyLength, valueLength, rows, 0 );
System.out.println("\n\n\n\nNotes: ");
System.out.println(" * Timing includes open/closing of files.");
System.out.println(" * Timing includes reading both Key and Value");
System.out.println(" * Data is generated as random bytes. Other methods e.g. using " +
"dictionary with care for distributation of words is under development.");
System.out.println(" * Timing of write currently, includes random value/key generations. " +
"Which is the same for Sequence File and HFile. Another possibility is to generate " +
"test data beforehand");
System.out.println(" * We need to mitigate cache effect on benchmark. We can apply several " +
"ideas, for next step we do a large dummy read between benchmark read to dismantle " +
"caching of data. Renaming of file may be helpful. We can have a loop that reads with" +
" the same method several times and flood cache every time and average it to get a" +
" better number.");
}
@org.junit.Rule
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
}