package edu.stanford.rsl.conrad.opencl; import ij.process.FloatProcessor; import java.nio.FloatBuffer; import java.nio.IntBuffer; import com.jogamp.opencl.CLBuffer; import com.jogamp.opencl.CLCommandQueue; import com.jogamp.opencl.CLDevice; import com.jogamp.opencl.CLKernel; import com.jogamp.opencl.CLMemory.Mem; import edu.stanford.rsl.conrad.utils.VisualizationUtil; /** * Performs rendering using a y buffer. This enables very fast simulation of volumetric rendering. * * @author akmaier * */ public class OpenCLYBufferRenderer extends OpenCLRenderer { CLBuffer<IntBuffer> yBuffer; CLBuffer<IntBuffer> xBuffer; CLBuffer<IntBuffer> yBufferPointer; CLBuffer<IntBuffer> xBufferPointer; CLBuffer<IntBuffer> pixelCount; CLBuffer<IntBuffer> xPixelCount; int yBufferSize; private boolean debug = false; public OpenCLYBufferRenderer(CLDevice device) { super(device); OpenCLUtil.initYXDraw(context); } public void init (int width, int height){ super.init(width, height); // assumption: we have about 500 hits per slice (on average) yBufferSize = height * 200; yBuffer = generateIntBuffer(yBufferSize, 3, Mem.READ_WRITE); xBuffer = generateIntBuffer(yBufferSize*width, 3, Mem.READ_WRITE); yBufferPointer = generateIntBuffer(1, 1, Mem.READ_WRITE); xBufferPointer = generateIntBuffer(1, 1, Mem.READ_WRITE); pixelCount = generateIntBuffer(height, 1, Mem.READ_WRITE); xPixelCount = generateIntBuffer(height*width, 1, Mem.READ_WRITE); } public void resetBuffers(){ yBufferPointer.getBuffer().put(0); yBufferPointer.getBuffer().rewind(); xBufferPointer.getBuffer().put(0); xBufferPointer.getBuffer().rewind(); device.createCommandQueue().putWriteBuffer(yBuffer, false) .putWriteBuffer(xBuffer, false) .putWriteBuffer(pixelCount, false) .putWriteBuffer(xPixelCount, false) .putWriteBuffer(yBufferPointer, false) .putWriteBuffer(xBufferPointer, false).finish(); } public int drawTriangles(CLBuffer<FloatBuffer> pointBuffer, CLBuffer<FloatBuffer> screenBuffer, int id){ int elementCount = pointBuffer.getBuffer().capacity()/3; // Length of arrays to process int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 32); // Local work size dimensions int globalWorkSize = OpenCLUtil.roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize CLKernel kernel = OpenCLUtil.getYXDrawInstance().createCLKernel("drawTrianglesYBufferLocal"); kernel.putArgs(pointBuffer, yBuffer, yBufferPointer, pixelCount) .putArg(width) .putArg(id) .putArg(elementCount); // asynchronous write of data to GPU device, // followed by blocking read to get the computed results back. device.createCommandQueue() .put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize).finish(); // TODO: probably causes a memory leak on the OpenCL device device.createCommandQueue().putReadBuffer(yBufferPointer, true).finish(); int revan = yBufferPointer.getBuffer().get(); if (debug ) System.out.println("Final append buffer index: " + revan + " local group size: " +localWorkSize); yBufferPointer.getBuffer().rewind(); return revan; } public void readAndShowBuffer(int width, int height, CLBuffer<IntBuffer> screenBuffer, String title){ float [] array = new float [width*height]; for (int j = 0; j < height; j++){ for (int i = 0; i < width; i++){ array[(j*width)+i] = screenBuffer.getBuffer().get(); } } screenBuffer.getBuffer().rewind(); FloatProcessor test = new FloatProcessor(width, height, array, null); VisualizationUtil.showImageProcessor(test, title).show(); } public void drawScreen(CLBuffer<FloatBuffer> screen){ // draw to screen buffer: int elementCount = height; // Length of arrays to process int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 1); // Local work size dimensions int globalWorkSize = OpenCLUtil.roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize CLKernel kernel = OpenCLUtil.getYXDrawInstance().createCLKernel("drawYBufferXBuffer"); kernel.putArgs(yBuffer, pixelCount, xBuffer, xBufferPointer, xPixelCount) .putArg(width) .putArg(elementCount); device.createCommandQueue() .put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize) .putReadBuffer(xPixelCount, true) .putReadBuffer(xBuffer, true) .finish(); readAndShowBuffer(width, height, xPixelCount, "xPixelCount"); kernel = OpenCLUtil.getYXDrawInstance().createCLKernel("drawXBufferScreen"); kernel.putArgs(screen, xBuffer, xPixelCount) .putArg(width) .putArg(elementCount); device.createCommandQueue() .put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize).finish(); // TODO: probably causes a memory leak on the OpenCL device } public void drawSlice(CLBuffer<FloatBuffer> screen){ // draw to screen buffer: int elementCount = height; // Length of arrays to process int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 256); // Local work size dimensions int globalWorkSize = OpenCLUtil.roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize CLKernel kernel = OpenCLUtil.getYXDrawInstance().createCLKernel("drawYBufferScreen"); kernel.putArgs(screen, yBuffer, pixelCount) .putArg(width) .putArg(elementCount); System.out.println(width); CLCommandQueue clc = device.createCommandQueue(); clc.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize).finish(); kernel.release(); clc.release(); } } /* * Copyright (C) 2010-2014 Andreas Maier * CONRAD is developed as an Open Source project under the GNU General Public License (GPL). */