/* * Copyright (c) 2014 Oculus Info Inc. * http://www.oculusinfo.com/ * * Released under the MIT License. * * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package com.oculusinfo.binning.io.serialization; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.zip.GZIPOutputStream; import java.util.zip.InflaterInputStream; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; import org.apache.avro.file.CodecFactory; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import org.junit.Ignore; import org.junit.Test; import org.xerial.snappy.SnappyOutputStream; import com.oculusinfo.binning.TileIndex; import com.oculusinfo.binning.io.serialization.impl.KryoSerializer; import com.oculusinfo.binning.io.serialization.impl.KryoSerializer.Codec; import com.oculusinfo.binning.io.serialization.impl.PrimitiveArrayAvroSerializer; import com.oculusinfo.binning.util.TypeDescriptor; import com.oculusinfo.factory.util.Pair; /* * This is a set of tests to determine the relative speeds of various serializers and/or * compression codecs over tiles with multiple elements per bin. * * It is not a general test, and would never be run as part of a test suite, but it's * useful enough when looking at new serializers and codecs that I want it in the code * base for future use and reference. */ @Ignore public class SeriesSerializationSpeedTests { // Build up a list of pairs of table names/serializers private static TypeDescriptor __serializerType = new TypeDescriptor(List.class, new TypeDescriptor(Double.class)); private static TileSerializer<List<Double>> __kryoBZIP = new KryoSerializer<>(__serializerType, Codec.BZIP); private static TileSerializer<List<Double>> __kryoGZIP = new KryoSerializer<>(__serializerType, Codec.GZIP); private static TileSerializer<List<Double>> __kryoDEFLATE = new KryoSerializer<>(__serializerType, Codec.DEFLATE); private static TileSerializer<List<Double>> __avroBZIP = new PrimitiveArrayAvroSerializer<>(Double.class, CodecFactory.bzip2Codec()); private static <S, T> Pair<S, T> p (S first, T second) { return new Pair<>(first, second); } private static List<Pair<String, TileSerializer<List<Double>>>> __tables = Arrays.asList(p("kryo-002-BZIP.julia.x.y.series", __kryoBZIP), p("kryo-005-BZIP.julia.x.y.series", __kryoBZIP), p("kryo-010-BZIP.julia.x.y.series", __kryoBZIP), p("kryo-020-BZIP.julia.x.y.series", __kryoBZIP), p("kryo-050-BZIP.julia.x.y.series", __kryoBZIP), p("kryo-100-BZIP.julia.x.y.series", __kryoBZIP), p("kryo-002-GZIP.julia.x.y.series", __kryoGZIP), p("kryo-005-GZIP.julia.x.y.series", __kryoGZIP), p("kryo-010-GZIP.julia.x.y.series", __kryoGZIP), p("kryo-020-GZIP.julia.x.y.series", __kryoGZIP), p("kryo-050-GZIP.julia.x.y.series", __kryoGZIP), p("kryo-100-GZIP.julia.x.y.series", __kryoGZIP), p("kryo-002-DEFLATE.julia.x.y.series", __kryoDEFLATE), p("kryo-005-DEFLATE.julia.x.y.series", __kryoDEFLATE), p("kryo-010-DEFLATE.julia.x.y.series", __kryoDEFLATE), p("kryo-020-DEFLATE.julia.x.y.series", __kryoDEFLATE), p("kryo-050-DEFLATE.julia.x.y.series", __kryoDEFLATE), p("kryo-100-DEFLATE.julia.x.y.series", __kryoDEFLATE), p("avro-002.julia.x.y.series", __avroBZIP), p("avro-005.julia.x.y.series", __avroBZIP), p("avro-010.julia.x.y.series", __avroBZIP), p("avro-020.julia.x.y.series", __avroBZIP), p("avro-050.julia.x.y.series", __avroBZIP), p("avro-100.julia.x.y.series", __avroBZIP) ); @Test public void tileSpeedTests () throws Exception { runTests(8, new TimingTestRunner()); } private void runTests (int n, TestRunner runner) throws Exception { Configuration config = HBaseConfiguration.create(); config.set("hbase.zookeeper.quorum", "hadoop-s1.oculus.local"); config.set("hbase.zookeeper.property.clientPort", "2181"); config.set("hbase.master", "hadoop-s1.oculus.local:60000"); config.set("hbase.client.keyvalue.maxsize", "0"); Connection connection = ConnectionFactory.createConnection(config); List<String> rows = new ArrayList<>(); for (int x=0; x<n; ++x) { for (int y=0; y<n; ++y) { int digits = (int) Math.floor(Math.log10(1 << 3))+1; rows.add(String.format("%02d,%0"+digits+"d,%0"+digits+"d", 3, x, y)); } } byte[] EMPTY_BYTES = new byte[0]; byte[] TILE_FAMILY_NAME = "tileData".getBytes(); List<Get> gets = new ArrayList<Get>(rows.size()); for (String rowId: rows) { Get get = new Get(rowId.getBytes()); get.addColumn(TILE_FAMILY_NAME, EMPTY_BYTES); gets.add(get); } runner.initial(); for (int i=0; i<__tables.size(); ++i) { String tableName = __tables.get(i).getFirst(); TileSerializer<List<Double>> serializer = __tables.get(i).getSecond(); Table table = connection.getTable(TableName.valueOf(tableName)); int tiles = 0; runner.preTest(); long startTime = System.currentTimeMillis(); Result[] results = table.get(gets); for (Result result: results) { if (result.containsColumn(TILE_FAMILY_NAME, EMPTY_BYTES)) { byte[] rowValue = result.getValue(TILE_FAMILY_NAME, EMPTY_BYTES); runner.runTest(tableName, rowValue, serializer); tiles++; } } long endTime = System.currentTimeMillis(); table.close(); runner.postTest(tableName, endTime-startTime, tiles); } } interface TestRunner { void initial (); void preTest (); void runTest (String tableName, byte[] rowValue, TileSerializer<List<Double>> serializer) throws Exception; void postTest (String tableName, long elapsedTime, int iterations); } class TimingTestRunner implements TestRunner { long parseTime; double totalSize; TileIndex deserializationIndex = new TileIndex(3, 0, 0); @Override public void initial () { System.out.println("Fetched all 64 tiles from level 3"); System.out.println("table\ttiles\taverage size\ttotal time\tparse time\tfetch time\tparse time/tile\tfetch time/tile"); } @Override public void preTest () { parseTime = 0L; totalSize = 0; } @Override public void runTest (String tableName, byte[] rowValue, TileSerializer<List<Double>> serializer) throws Exception { long st2 = System.currentTimeMillis(); ByteArrayInputStream bais = new ByteArrayInputStream(rowValue); serializer.deserialize(deserializationIndex, bais); long et2 = System.currentTimeMillis(); totalSize += rowValue.length; parseTime += (et2-st2); } @Override public void postTest (String tableName, long elapsedTime, int iterations) { double time = elapsedTime/1000.0; double pTime = parseTime/1000.0; double fTime = time-pTime; System.out.println(String.format("%s\t%d\t%.4f\t%.4fs\t%.4fs\t%.4fs\t%.4fs\t%.4fs", tableName, iterations, totalSize/iterations, time, pTime, fTime, pTime/iterations, fTime/iterations)); } } class SizeTestRunner implements TestRunner { long rawSize; long deflateSize; long zipSize; long gzipSize; long gzip2Size; long bzipSize; long snappySize; @Override public void initial () { System.out.println("set\tn\traw\tdeflate\tzip\tgzip\tgzip commons\tbzip\tsnappy"); } @Override public void preTest () { rawSize = 0L; deflateSize = 0L; zipSize = 0L; gzipSize = 0L; gzip2Size = 0L; bzipSize = 0L; snappySize = 0L; } @Override public void runTest (String tableName, byte[] rowValue, TileSerializer<List<Double>> serializer) throws Exception { deflateSize += rowValue.length; ByteArrayInputStream bais = new ByteArrayInputStream(rowValue); InflaterInputStream iis = new InflaterInputStream(bais); ByteArrayOutputStream baos = new ByteArrayOutputStream(); int input; while ((input = iis.read()) >= 0) { baos.write(input); } baos.flush(); baos.close(); byte[] raw = baos.toByteArray(); rawSize += raw.length; baos = new ByteArrayOutputStream(); ZipOutputStream zos = new ZipOutputStream(baos); zos.putNextEntry(new ZipEntry("abc")); zos.write(raw); baos.flush(); baos.close(); zipSize += baos.toByteArray().length; bais = new ByteArrayInputStream(raw); baos = new ByteArrayOutputStream(); GZIPOutputStream gos = new GZIPOutputStream(baos); gos.write(raw); baos.flush(); baos.close(); gzipSize += baos.toByteArray().length; baos = new ByteArrayOutputStream(); GzipCompressorOutputStream gos2 = new GzipCompressorOutputStream(baos); gos2.write(raw); baos.flush(); baos.close(); gos2.close(); gzip2Size += baos.toByteArray().length; baos = new ByteArrayOutputStream(); BZip2CompressorOutputStream bos = new BZip2CompressorOutputStream(baos); bos.write(raw); baos.flush(); baos.close(); bos.close(); bzipSize += baos.toByteArray().length; baos = new ByteArrayOutputStream(); SnappyOutputStream sos = new SnappyOutputStream(baos); sos.write(raw); baos.flush(); baos.close(); sos.close(); snappySize += baos.toByteArray().length; } @Override public void postTest (String tableName, long elapsedTime, int iterations) { System.out.println(String.format("%s\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f", tableName, iterations, (((double) rawSize)/iterations), (((double) deflateSize)/iterations), (((double) zipSize)/iterations), (((double) gzipSize)/iterations), (((double) gzip2Size)/iterations), (((double) bzipSize)/iterations), (((double) snappySize)/iterations))); } } }