/* * Copyright 2017 LinkedIn, Inc * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.linkedin.parseq; import java.lang.management.ManagementFactory; import java.lang.management.ThreadMXBean; import java.util.HashMap; import java.util.Map; import java.util.Optional; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.Exchanger; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; import org.HdrHistogram.Base64CompressedHistogramSerializer; import org.HdrHistogram.Histogram; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.linkedin.parseq.batching.BatchingSupport; import com.linkedin.parseq.trace.ShallowTrace; /** * @author Jaroslaw Odzga (jodzga@linkedin.com) */ public abstract class AbstractBenchmark { public static final String BENCHMARK_TEST_RESULTS_LOG_PREFIX = "Benchmark test results -> "; private static final Logger LOG = LoggerFactory.getLogger(AbstractBenchmark.class); private final BatchingSupport _batchingSupport = new BatchingSupport(); private static final HistogramSerializer _histogramSerializer = new Base64CompressedHistogramSerializer(); private final ThreadMXBean threadBean = ManagementFactory.getThreadMXBean(); private final ConcurrentLinkedQueue<Thread> _parseqThreads = new ConcurrentLinkedQueue<>(); private final Map<Long, Long> threadCPU = new HashMap<>(); private final Map<Long, Long> threadUserCPU = new HashMap<>(); public void runExample(BenchmarkConfig config) throws Exception { final int numCores = Runtime.getRuntime().availableProcessors(); final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(numCores - 1, new ThreadFactory() { @Override public Thread newThread(Runnable r) { Thread t = new Thread(r); _parseqThreads.add(t); return t; } }); final EngineBuilder builder = new EngineBuilder().setTaskExecutor(scheduler).setTimerScheduler(scheduler); builder.setPlanDeactivationListener(_batchingSupport); builder.setEngineProperty(Engine.MAX_CONCURRENT_PLANS, config.CONCURRENCY_LEVEL); final Engine engine = builder.build(); try { doRunBenchmark(engine, config); } finally { engine.shutdown(); scheduler.shutdownNow(); } } abstract Task<?> createPlan(); private int N(BenchmarkConfig config) { if (config instanceof FullLoadBenchmarkConfig) { FullLoadBenchmarkConfig cfg = (FullLoadBenchmarkConfig)config; return cfg.N; } else if (config instanceof ConstantThroughputBenchmarkConfig) { ConstantThroughputBenchmarkConfig cfg = (ConstantThroughputBenchmarkConfig)config; return (int) (cfg.runtime * cfg.events); } else { throw new IllegalArgumentException(); } } private int warmUpN(BenchmarkConfig config) { if (config instanceof FullLoadBenchmarkConfig) { FullLoadBenchmarkConfig cfg = (FullLoadBenchmarkConfig)config; return cfg.WARMUP_ROUNDS; } else if (config instanceof ConstantThroughputBenchmarkConfig) { ConstantThroughputBenchmarkConfig cfg = (ConstantThroughputBenchmarkConfig)config; return (int) (cfg.warmupRime * cfg.events); } else { throw new IllegalArgumentException(); } } protected void doRunBenchmark(final Engine engine, BenchmarkConfig config) throws Exception { final int N = N(config); final int warmUpN = warmUpN(config); final Histogram planHistogram = createHistogram(); final Histogram taskHistogram = createHistogram(); LOG.info("Number of cores: " + Runtime.getRuntime().availableProcessors()); LOG.info("Configuration: " + config); Task<?> probe = createPlan(); engine.run(probe); probe.await(); final int numberOfTasks = probe.getTrace().getTraceMap().size(); LOG.info("Number of tasks per plan: " + numberOfTasks); final Exchanger<Optional<Task<?>>> exchanger = new Exchanger<>(); Thread histogramCollector = new Thread(() -> { try { Optional<Task<?>> t = exchanger.exchange(Optional.empty()); while (t.isPresent()) { Task<?> task = t.get(); task.await(); recordCompletionTimes(planHistogram, taskHistogram, task); t = exchanger.exchange(Optional.empty()); } } catch (Exception e) { e.printStackTrace(); } }); histogramCollector.start(); Task<?> t = null; LOG.info("Warming up using " + warmUpN + " plan execution"); System.out.print("Progress["); Stepper warmUpPercentage = new Stepper(0.1, warmUpN); for (int i = 0; i < warmUpN; i++) { t = createPlan(); config.runTask(engine, t); warmUpPercentage.isNewStep(i).ifPresent(pct -> { System.out.print("."); }); } System.out.println(".]"); grabCPUTimesBeforeTest(); LOG.info("Starting test of " + N + " plan executions"); System.out.print("Progress["); Stepper percentage = new Stepper(0.1, N); Stepper sampler = new Stepper(1 / (N * config.sampleRate), N); long start = System.nanoTime(); for (int i = 0; i < N; i++) { t = createPlan(); config.runTask(engine, t); final Task<?> task = t; sampler.isNewStep(i).ifPresent(s -> { try { exchanger.exchange(Optional.of(task)); } catch (Exception e) { e.printStackTrace(); } }); percentage.isNewStep(i).ifPresent(pct -> { System.out.print("."); }); } long end = System.nanoTime(); System.out.println(".]"); grabCPUTimesAfterTest(); exchanger.exchange(Optional.empty()); histogramCollector.join(); config.wrapUp(); LOG.info("----------------------------------------------------------------"); LOG.info("Histogram of task execution times on parseq threads in \u00B5s:"); taskHistogram.outputPercentileDistribution(System.out, 1000.0); LOG.info(BENCHMARK_TEST_RESULTS_LOG_PREFIX + "Histogram of task execution times on parseq threads in \u00B5s: " + _histogramSerializer.serialize(taskHistogram)); LOG.info("----------------------------------------------------------------"); LOG.info("Histogram of plan completion times in \u00B5s:"); planHistogram.outputPercentileDistribution(System.out, 1000.0); LOG.info(BENCHMARK_TEST_RESULTS_LOG_PREFIX + "Histogram of plan completion times in \u00B5s: " + _histogramSerializer.serialize(planHistogram)); LOG.info("----------------------------------------------------------------"); LOG.info("Throughput: " + String.format("%.3f", (N / ((double)(end - start) / 1000000000))) + " plans/s, " + String.format("%.3f", ((N * numberOfTasks) / ((double)(end - start) / 1000000000))) + " tasks/s"); } private void grabCPUTimesBeforeTest() { final boolean threadCPUTimeSupported = threadBean.isThreadCpuTimeSupported(); LOG.info("Thread CPU time measurment supported: " + threadCPUTimeSupported); if (threadCPUTimeSupported) { threadBean.setThreadCpuTimeEnabled(true); } //grab CPU times before test for (Thread thread: _parseqThreads) { long threadId = thread.getId(); long cpuTime = threadBean.getThreadCpuTime(threadId); if (cpuTime > -1) { threadCPU.put(threadId, cpuTime); } long cpuUserTime = threadBean.getThreadUserTime(threadId); if (cpuUserTime > -1) { threadUserCPU.put(threadId, cpuUserTime); } } } private long addTime(Map<Long, Long> before, long time, long total, long threadId, String name) { long beforeTime = before.get(threadId); if (beforeTime == -1) { if (time > -1) { LOG.warn(name + " time could not be captured before test but was captured after the test - bailing out..."); } //else CPU time measuring not supported } else { if (time > -1) { if (time < beforeTime) { LOG.warn(name + " Time captured before test is greater than the one captured after the test - bailing out..."); } else { //happy path total += time - beforeTime; } } else { LOG.warn(name + " Time could be captured before test but was not captured after the test - bailing out..."); } } return total; } private void grabCPUTimesAfterTest() { long totalCPUTime = 0; long totalUserTime = 0; for (Thread thread: _parseqThreads) { long threadId = thread.getId(); long cpuTime = threadBean.getThreadCpuTime(threadId); long cpuUserTime = threadBean.getThreadUserTime(threadId); if (!threadCPU.containsKey(threadId)) { LOG.warn("New ParSeq thread was added during test"); } else { totalCPUTime = addTime(threadCPU, cpuTime, totalCPUTime, threadId, "CPU"); totalUserTime = addTime(threadUserCPU, cpuUserTime, totalUserTime, threadId, "User"); } } if (totalCPUTime > 0) { LOG.info(BENCHMARK_TEST_RESULTS_LOG_PREFIX + "Total CPU time in ms: " + totalCPUTime / 1000000); LOG.info(BENCHMARK_TEST_RESULTS_LOG_PREFIX + "Total CPU User time in ms: " + totalUserTime / 1000000); } } private static Histogram createHistogram() { return new Histogram(1, 10000000000L, 3); } private void recordCompletionTimes(final Histogram planHistogram, Histogram taskHistogram, Task<?> task) { ShallowTrace st = task.getShallowTrace(); planHistogram.recordValue(st.getEndNanos() - st.getStartNanos()); task.getTrace().getTraceMap().values().forEach(shallowTrace -> { taskHistogram.recordValue(shallowTrace.getPendingNanos() - shallowTrace.getStartNanos()); }); } static class FullLoadBenchmarkConfig extends BenchmarkConfig { int WARMUP_ROUNDS = 100000; int N = 1000000; @Override public void runTask(Engine engine, Task<?> t) { engine.blockingRun(t); } @Override public String toString() { return "FullLoadBenchmarkConfig [WARMUP_ROUNDS=" + WARMUP_ROUNDS + ", ROUNDS=" + N +"]"; } @Override public void wrapUp() { } } static class ConstantThroughputBenchmarkConfig extends BenchmarkConfig { long warmupRime = 2*60; double events = 1000; TimeUnit perUnit = TimeUnit.SECONDS; long runtime = 6*60; final Histogram planExecutionAccuracy = createHistogram(); EventsArrival arrivalProcess; private long lastNano = 0; @Override public void runTask(Engine engine, Task<?> t) throws InterruptedException { initArrivalProcess(); if (lastNano == 0) { lastNano = System.nanoTime(); } long nextNano = lastNano + arrivalProcess.nanosToNextEvent(); long actualNano = waitUntil(nextNano); planExecutionAccuracy.recordValue(Math.abs(actualNano - nextNano)); engine.run(t); lastNano = nextNano; } private void initArrivalProcess() { if (arrivalProcess == null) { arrivalProcess = new PoissonEventsArrival(events, perUnit); } } @Override public String toString() { initArrivalProcess(); return "ConstantThroughputBenchmarkConfig [throughput=" + events + "/" + perUnit + ", warmup=" + warmupRime + " " + perUnit + ", runtime=" + runtime + " " + perUnit + ", arrivalProcess=" + arrivalProcess + "], " + super.toString(); } @Override public void wrapUp() { LOG.info("----------------------------------------------------------------"); LOG.info("Histogram of benchmark execution plan accuracy in \u00B5s:"); planExecutionAccuracy.outputPercentileDistribution(System.out, 1000.0); LOG.info(BENCHMARK_TEST_RESULTS_LOG_PREFIX + "Histogram of benchmark execution plan accuracy in \u00B5s: " + _histogramSerializer.serialize(planExecutionAccuracy)); } } abstract static class BenchmarkConfig { int CONCURRENCY_LEVEL = Runtime.getRuntime().availableProcessors() / 2 + 1; double sampleRate = 0.001; abstract public void runTask(Engine engine, Task<?> t) throws InterruptedException; abstract public void wrapUp(); @Override public String toString() { return "BenchmarkConfig [CONCURRENCY_LEVEL=" + CONCURRENCY_LEVEL + ", sampleRate=" + sampleRate + "]"; } } private static long waitUntil(long nextNano) throws InterruptedException { long current = System.nanoTime(); if ((nextNano - current) > 0) { return waitNano(nextNano, current); } else { return current; } } private static long waitNano(long nextNano, long current) throws InterruptedException { long waitTime = nextNano - current; long millis = (waitTime >> 20) - 1; //2^20ns = 1048576ns ~ 1ms if (millis < 0) { millis = 0; } if (millis > 0) { Thread.sleep(millis); return waitUntil(nextNano); } else { return busyWaitUntil(nextNano); } } private static long busyWaitUntil(long nextNano) { long counter = 0L; while (true) { counter += 1; if (counter % 1000 == 0) { long current = System.nanoTime(); if (current - nextNano >= 0) { return current; } } } } private static class Stepper { private final double countPerStep; private int currentStep = 0; public Stepper(double fractionPerStep, int N) { countPerStep = ((double)N) * fractionPerStep; } Optional<Integer> isNewStep(int i) { int step = (int) (i / countPerStep); if (currentStep != step) { currentStep = step; return Optional.of(step); } else { return Optional.empty(); } } } }